diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index d64f36cf74e..cfe0075986f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -12,18 +12,6 @@
 (AbsInt64x2 ...) => (VPABSQ128 ...)
 (AbsInt64x4 ...) => (VPABSQ256 ...)
 (AbsInt64x8 ...) => (VPABSQ512 ...)
-(AbsMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AbsMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AbsMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AbsMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AbsMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AbsMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AbsMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AbsMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AbsMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AbsMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AbsMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AbsMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AddFloat32x4 ...) => (VADDPS128 ...)
 (AddFloat32x8 ...) => (VADDPS256 ...)
 (AddFloat32x16 ...) => (VADDPS512 ...)
@@ -57,51 +45,12 @@
 (AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
 (AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
 (AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
-(AddDotProdPairsSaturatedMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
 (AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
 (AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(AddDotProdQuadrupleMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
 (AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
 (AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
-(AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AddPairsFloat32x4 ...) => (VHADDPS128 ...)
 (AddPairsFloat32x8 ...) => (VHADDPS256 ...)
 (AddPairsFloat64x2 ...) => (VHADDPD128 ...)
@@ -128,18 +77,6 @@
 (AddSaturatedUint16x8 ...) => (VPADDUSW128 ...)
 (AddSaturatedUint16x16 ...) => (VPADDUSW256 ...)
 (AddSaturatedUint16x32 ...) => (VPADDUSW512 ...)
-(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x16 x y mask) => (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x32 x y mask) => (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x64 x y mask) => (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x8 x y mask) => (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x16 x y mask) => (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x32 x y mask) => (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
 (AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
 (AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
@@ -168,18 +105,6 @@
 (AndUint64x2 ...) => (VPAND128 ...)
 (AndUint64x4 ...) => (VPAND256 ...)
 (AndUint64x8 ...) => (VPANDQ512 ...)
-(AndMaskedInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AndNotInt8x16 ...) => (VPANDN128 ...)
 (AndNotInt8x32 ...) => (VPANDN256 ...)
 (AndNotInt8x64 ...) => (VPANDND512 ...)
@@ -204,30 +129,12 @@
 (AndNotUint64x2 ...) => (VPANDN128 ...)
 (AndNotUint64x4 ...) => (VPANDN256 ...)
 (AndNotUint64x8 ...) => (VPANDNQ512 ...)
-(AndNotMaskedInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AverageUint8x16 ...) => (VPAVGB128 ...)
 (AverageUint8x32 ...) => (VPAVGB256 ...)
 (AverageUint8x64 ...) => (VPAVGB512 ...)
 (AverageUint16x8 ...) => (VPAVGW128 ...)
 (AverageUint16x16 ...) => (VPAVGW256 ...)
 (AverageUint16x32 ...) => (VPAVGW512 ...)
-(AverageMaskedUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AverageMaskedUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AverageMaskedUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
 (Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
 (Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
@@ -238,16 +145,6 @@
 (Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
 (Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
 (Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
 (Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
 (Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
@@ -258,16 +155,6 @@
 (Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
 (Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
 (Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
 (Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
 (Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
@@ -278,16 +165,6 @@
 (Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
 (Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
 (Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (CeilFloat32x4 x) => (VROUNDPS128 [2] x)
 (CeilFloat32x8 x) => (VROUNDPS256 [2] x)
 (CeilFloat64x2 x) => (VROUNDPD128 [2] x)
@@ -298,24 +175,12 @@
 (CeilScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
 (CeilScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
 (CeilScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
-(CeilScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (CeilScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
 (CeilScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
 (CeilScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
 (CeilScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
 (CeilScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
 (CeilScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
-(CeilScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -349,15 +214,9 @@
 (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
 (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
 (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
-(ConvertToInt32MaskedFloat32x4 x mask) => (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x8 x mask) => (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x16 x mask) => (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
 (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
 (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
-(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (CopySignInt8x16 ...) => (VPSIGNB128 ...)
 (CopySignInt8x32 ...) => (VPSIGNB256 ...)
 (CopySignInt16x8 ...) => (VPSIGNW128 ...)
@@ -370,24 +229,12 @@
 (DivFloat64x2 ...) => (VDIVPD128 ...)
 (DivFloat64x4 ...) => (VDIVPD256 ...)
 (DivFloat64x8 ...) => (VDIVPD512 ...)
-(DivMaskedFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DivMaskedFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DivMaskedFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
 (DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
 (DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
-(DotProdPairsMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
 (DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
 (DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProdPairsSaturatedMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
 (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
 (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
@@ -418,36 +265,6 @@
 (EqualUint64x2 ...) => (VPCMPEQQ128 ...)
 (EqualUint64x4 ...) => (VPCMPEQQ256 ...)
 (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -488,42 +305,21 @@
 (FloorScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
 (FloorScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
 (FloorScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
-(FloorScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (FloorScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
 (FloorScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
 (FloorScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
 (FloorScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
 (FloorScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
 (FloorScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
-(FloorScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...)
 (GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...)
 (GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...)
 (GaloisFieldAffineTransformInverseUint8x16 ...) => (VGF2P8AFFINEINVQB128 ...)
 (GaloisFieldAffineTransformInverseUint8x32 ...) => (VGF2P8AFFINEINVQB256 ...)
 (GaloisFieldAffineTransformInverseUint8x64 ...) => (VGF2P8AFFINEINVQB512 ...)
-(GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
 (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
 (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
 (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
-(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
 (GetElemFloat32x4 ...) => (VPEXTRD128 ...)
 (GetElemFloat64x2 ...) => (VPEXTRQ128 ...)
 (GetElemInt8x16 ...) => (VPEXTRB128 ...)
@@ -610,78 +406,12 @@
 (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
 (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
 (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
-(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
 (IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
 (IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
 (IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
 (IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
 (IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
-(IsNanMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (LessFloat32x4 x y) => (VCMPPS128 [1] x y)
 (LessFloat32x8 x y) => (VCMPPS256 [1] x y)
 (LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
@@ -710,66 +440,6 @@
 (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
 (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
 (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
-(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (MaxFloat32x4 ...) => (VMAXPS128 ...)
 (MaxFloat32x8 ...) => (VMAXPS256 ...)
 (MaxFloat32x16 ...) => (VMAXPS512 ...)
@@ -800,36 +470,6 @@
 (MaxUint64x2 ...) => (VPMAXUQ128 ...)
 (MaxUint64x4 ...) => (VPMAXUQ256 ...)
 (MaxUint64x8 ...) => (VPMAXUQ512 ...)
-(MaxMaskedFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MinFloat32x4 ...) => (VMINPS128 ...)
 (MinFloat32x8 ...) => (VMINPS256 ...)
 (MinFloat32x16 ...) => (VMINPS512 ...)
@@ -860,36 +500,6 @@
 (MinUint64x2 ...) => (VPMINUQ128 ...)
 (MinUint64x4 ...) => (VPMINUQ256 ...)
 (MinUint64x8 ...) => (VPMINUQ512 ...)
-(MinMaskedFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulFloat32x4 ...) => (VMULPS128 ...)
 (MulFloat32x8 ...) => (VMULPS256 ...)
 (MulFloat32x16 ...) => (VMULPS512 ...)
@@ -920,24 +530,12 @@
 (MulAddFloat64x2 ...) => (VFMADD213PD128 ...)
 (MulAddFloat64x4 ...) => (VFMADD213PD256 ...)
 (MulAddFloat64x8 ...) => (VFMADD213PD512 ...)
-(MulAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
 (MulAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
 (MulAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
 (MulAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
 (MulAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
 (MulAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
-(MulAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
 (MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
 (MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...)
@@ -948,48 +546,12 @@
 (MulHighUint16x8 ...) => (VPMULHUW128 ...)
 (MulHighUint16x16 ...) => (VPMULHUW256 ...)
 (MulHighUint16x32 ...) => (VPMULHUW512 ...)
-(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedUint16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedUint32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedUint32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedUint32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedUint64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedUint64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedUint64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulSubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
 (MulSubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
 (MulSubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
 (MulSubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
 (MulSubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
 (MulSubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
-(MulSubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
 (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
 (NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
@@ -1004,36 +566,6 @@
 (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
 (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
 (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
-(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (OnesCountInt8x16 ...) => (VPOPCNTB128 ...)
 (OnesCountInt8x32 ...) => (VPOPCNTB256 ...)
 (OnesCountInt8x64 ...) => (VPOPCNTB512 ...)
@@ -1058,30 +590,6 @@
 (OnesCountUint64x2 ...) => (VPOPCNTQ128 ...)
 (OnesCountUint64x4 ...) => (VPOPCNTQ256 ...)
 (OnesCountUint64x8 ...) => (VPOPCNTQ512 ...)
-(OnesCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (OrInt8x16 ...) => (VPOR128 ...)
 (OrInt8x32 ...) => (VPOR256 ...)
 (OrInt8x64 ...) => (VPORD512 ...)
@@ -1106,18 +614,6 @@
 (OrUint64x2 ...) => (VPOR128 ...)
 (OrUint64x4 ...) => (VPOR256 ...)
 (OrUint64x8 ...) => (VPORQ512 ...)
-(OrMaskedInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (PermuteFloat32x8 ...) => (VPERMPS256 ...)
 (PermuteFloat32x16 ...) => (VPERMPS512 ...)
 (PermuteFloat64x4 ...) => (VPERMPD256 ...)
@@ -1172,84 +668,18 @@
 (Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
 (Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
 (Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
-(Permute2MaskedFloat32x4 x y z mask) => (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x8 x y z mask) => (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x16 x y z mask) => (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x2 x y z mask) => (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x4 x y z mask) => (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x8 x y z mask) => (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x8 x y mask) => (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x16 x y mask) => (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x4 x y mask) => (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x8 x y mask) => (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
 (ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
 (ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
 (ReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
 (ReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
 (ReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
-(ReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ReciprocalSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
 (ReciprocalSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
 (ReciprocalSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
 (ReciprocalSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
 (ReciprocalSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
 (ReciprocalSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
-(ReciprocalSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateAllLeftInt32x4 ...) => (VPROLD128 ...)
 (RotateAllLeftInt32x8 ...) => (VPROLD256 ...)
 (RotateAllLeftInt32x16 ...) => (VPROLD512 ...)
@@ -1262,18 +692,6 @@
 (RotateAllLeftUint64x2 ...) => (VPROLQ128 ...)
 (RotateAllLeftUint64x4 ...) => (VPROLQ256 ...)
 (RotateAllLeftUint64x8 ...) => (VPROLQ512 ...)
-(RotateAllLeftMaskedInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateAllRightInt32x4 ...) => (VPRORD128 ...)
 (RotateAllRightInt32x8 ...) => (VPRORD256 ...)
 (RotateAllRightInt32x16 ...) => (VPRORD512 ...)
@@ -1286,18 +704,6 @@
 (RotateAllRightUint64x2 ...) => (VPRORQ128 ...)
 (RotateAllRightUint64x4 ...) => (VPRORQ256 ...)
 (RotateAllRightUint64x8 ...) => (VPRORQ512 ...)
-(RotateAllRightMaskedInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateLeftInt32x4 ...) => (VPROLVD128 ...)
 (RotateLeftInt32x8 ...) => (VPROLVD256 ...)
 (RotateLeftInt32x16 ...) => (VPROLVD512 ...)
@@ -1310,18 +716,6 @@
 (RotateLeftUint64x2 ...) => (VPROLVQ128 ...)
 (RotateLeftUint64x4 ...) => (VPROLVQ256 ...)
 (RotateLeftUint64x8 ...) => (VPROLVQ512 ...)
-(RotateLeftMaskedInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateRightInt32x4 ...) => (VPRORVD128 ...)
 (RotateRightInt32x8 ...) => (VPRORVD256 ...)
 (RotateRightInt32x16 ...) => (VPRORVD512 ...)
@@ -1334,18 +728,6 @@
 (RotateRightUint64x2 ...) => (VPRORVQ128 ...)
 (RotateRightUint64x4 ...) => (VPRORVQ256 ...)
 (RotateRightUint64x8 ...) => (VPRORVQ512 ...)
-(RotateRightMaskedInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RoundToEvenFloat32x4 x) => (VROUNDPS128 [0] x)
 (RoundToEvenFloat32x8 x) => (VROUNDPS256 [0] x)
 (RoundToEvenFloat64x2 x) => (VROUNDPD128 [0] x)
@@ -1356,36 +738,18 @@
 (RoundToEvenScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
 (RoundToEvenScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
 (RoundToEvenScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
-(RoundToEvenScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RoundToEvenScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
 (RoundToEvenScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
 (RoundToEvenScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
 (RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
 (RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
 (RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
-(RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
 (ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
 (ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
 (ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
 (ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
 (ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
-(ScaleMaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SetElemFloat32x4 ...) => (VPINSRD128 ...)
 (SetElemFloat64x2 ...) => (VPINSRQ128 ...)
 (SetElemInt8x16 ...) => (VPINSRB128 ...)
@@ -1481,51 +845,6 @@
 (ShiftAllLeftConcatUint64x2 ...) => (VPSHLDQ128 ...)
 (ShiftAllLeftConcatUint64x4 ...) => (VPSHLDQ256 ...)
 (ShiftAllLeftConcatUint64x8 ...) => (VPSHLDQ512 ...)
-(ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSLLWMasked128 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSLLWMasked256 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSLLWMasked512 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSLLDMasked128 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSLLDMasked256 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSLLDMasked512 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSLLQMasked128 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
 (VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
 (ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
@@ -1571,51 +890,6 @@
 (ShiftAllRightConcatUint64x2 ...) => (VPSHRDQ128 ...)
 (ShiftAllRightConcatUint64x4 ...) => (VPSHRDQ256 ...)
 (ShiftAllRightConcatUint64x8 ...) => (VPSHRDQ512 ...)
-(ShiftAllRightConcatMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
 (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@@ -1652,42 +926,6 @@
 (ShiftLeftConcatUint64x2 ...) => (VPSHLDVQ128 ...)
 (ShiftLeftConcatUint64x4 ...) => (VPSHLDVQ256 ...)
 (ShiftLeftConcatUint64x8 ...) => (VPSHLDVQ512 ...)
-(ShiftLeftConcatMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftRightInt16x8 ...) => (VPSRAVW128 ...)
 (ShiftRightInt16x16 ...) => (VPSRAVW256 ...)
 (ShiftRightInt16x32 ...) => (VPSRAVW512 ...)
@@ -1724,54 +962,12 @@
 (ShiftRightConcatUint64x2 ...) => (VPSHRDVQ128 ...)
 (ShiftRightConcatUint64x4 ...) => (VPSHRDVQ256 ...)
 (ShiftRightConcatUint64x8 ...) => (VPSHRDVQ512 ...)
-(ShiftRightConcatMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SqrtFloat32x4 ...) => (VSQRTPS128 ...)
 (SqrtFloat32x8 ...) => (VSQRTPS256 ...)
 (SqrtFloat32x16 ...) => (VSQRTPS512 ...)
 (SqrtFloat64x2 ...) => (VSQRTPD128 ...)
 (SqrtFloat64x4 ...) => (VSQRTPD256 ...)
 (SqrtFloat64x8 ...) => (VSQRTPD512 ...)
-(SqrtMaskedFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SubFloat32x4 ...) => (VSUBPS128 ...)
 (SubFloat32x8 ...) => (VSUBPS256 ...)
 (SubFloat32x16 ...) => (VSUBPS512 ...)
@@ -1802,36 +998,6 @@
 (SubUint64x2 ...) => (VPSUBQ128 ...)
 (SubUint64x4 ...) => (VPSUBQ256 ...)
 (SubUint64x8 ...) => (VPSUBQ512 ...)
-(SubMaskedFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
 (SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
 (SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
@@ -1858,18 +1024,6 @@
 (SubSaturatedUint16x8 ...) => (VPSUBUSW128 ...)
 (SubSaturatedUint16x16 ...) => (VPSUBUSW256 ...)
 (SubSaturatedUint16x32 ...) => (VPSUBUSW512 ...)
-(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (TruncFloat32x4 x) => (VROUNDPS128 [3] x)
 (TruncFloat32x8 x) => (VROUNDPS256 [3] x)
 (TruncFloat64x2 x) => (VROUNDPD128 [3] x)
@@ -1880,24 +1034,12 @@
 (TruncScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
 (TruncScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
 (TruncScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
-(TruncScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (TruncScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
 (TruncScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
 (TruncScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
 (TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
 (TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
 (TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
-(TruncScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (XorInt8x16 ...) => (VPXOR128 ...)
 (XorInt8x32 ...) => (VPXOR256 ...)
 (XorInt8x64 ...) => (VPXORD512 ...)
@@ -1922,18 +1064,6 @@
 (XorUint64x2 ...) => (VPXOR128 ...)
 (XorUint64x4 ...) => (VPXOR256 ...)
 (XorUint64x8 ...) => (VPXORQ512 ...)
-(XorMaskedInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (blendInt8x16 ...) => (VPBLENDVB128 ...)
 (blendInt8x32 ...) => (VPBLENDVB256 ...)
 (blendMaskedInt8x64 x y mask) => (VPBLENDMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index d98c0d8152a..08bfe369511 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -16,36 +16,15 @@ func simdGenericOps() []opData {
 		{name: "AbsInt64x2", argLength: 1, commutative: false},
 		{name: "AbsInt64x4", argLength: 1, commutative: false},
 		{name: "AbsInt64x8", argLength: 1, commutative: false},
-		{name: "AbsMaskedInt8x16", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt8x32", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt8x64", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt16x8", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt16x16", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt16x32", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt32x4", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt32x8", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt32x16", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt64x2", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt64x4", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt64x8", argLength: 2, commutative: false},
 		{name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
 		{name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
 		{name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
-		{name: "AddDotProdPairsSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "AddDotProdPairsSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "AddDotProdPairsSaturatedMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
-		{name: "AddDotProdQuadrupleMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
-		{name: "AddDotProdQuadrupleSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleSaturatedMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "AddFloat32x4", argLength: 2, commutative: true},
 		{name: "AddFloat32x8", argLength: 2, commutative: true},
 		{name: "AddFloat32x16", argLength: 2, commutative: true},
@@ -64,36 +43,6 @@ func simdGenericOps() []opData {
 		{name: "AddInt64x2", argLength: 2, commutative: true},
 		{name: "AddInt64x4", argLength: 2, commutative: true},
 		{name: "AddInt64x8", argLength: 2, commutative: true},
-		{name: "AddMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "AddMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "AddMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "AddMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "AddMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "AddMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "AddMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "AddMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "AddMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "AddMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "AddMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "AddMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "AddMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "AddMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "AddMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "AddMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "AddMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "AddMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "AddMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "AddMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "AddMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "AddMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "AddMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "AddMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "AddMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "AddPairsFloat32x4", argLength: 2, commutative: false},
 		{name: "AddPairsFloat32x8", argLength: 2, commutative: false},
 		{name: "AddPairsFloat64x2", argLength: 2, commutative: false},
@@ -114,18 +63,6 @@ func simdGenericOps() []opData {
 		{name: "AddSaturatedInt16x8", argLength: 2, commutative: true},
 		{name: "AddSaturatedInt16x16", argLength: 2, commutative: true},
 		{name: "AddSaturatedInt16x32", argLength: 2, commutative: true},
-		{name: "AddSaturatedMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "AddSaturatedUint8x16", argLength: 2, commutative: true},
 		{name: "AddSaturatedUint8x32", argLength: 2, commutative: true},
 		{name: "AddSaturatedUint8x64", argLength: 2, commutative: true},
@@ -160,18 +97,6 @@ func simdGenericOps() []opData {
 		{name: "AndInt64x2", argLength: 2, commutative: true},
 		{name: "AndInt64x4", argLength: 2, commutative: true},
 		{name: "AndInt64x8", argLength: 2, commutative: true},
-		{name: "AndMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "AndMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "AndMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "AndMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "AndMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "AndMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "AndMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "AndMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "AndMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "AndMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "AndMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "AndMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "AndNotInt8x16", argLength: 2, commutative: false},
 		{name: "AndNotInt8x32", argLength: 2, commutative: false},
 		{name: "AndNotInt8x64", argLength: 2, commutative: false},
@@ -184,18 +109,6 @@ func simdGenericOps() []opData {
 		{name: "AndNotInt64x2", argLength: 2, commutative: false},
 		{name: "AndNotInt64x4", argLength: 2, commutative: false},
 		{name: "AndNotInt64x8", argLength: 2, commutative: false},
-		{name: "AndNotMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "AndNotUint8x16", argLength: 2, commutative: false},
 		{name: "AndNotUint8x32", argLength: 2, commutative: false},
 		{name: "AndNotUint8x64", argLength: 2, commutative: false},
@@ -220,12 +133,6 @@ func simdGenericOps() []opData {
 		{name: "AndUint64x2", argLength: 2, commutative: true},
 		{name: "AndUint64x4", argLength: 2, commutative: true},
 		{name: "AndUint64x8", argLength: 2, commutative: true},
-		{name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "AverageUint8x16", argLength: 2, commutative: true},
 		{name: "AverageUint8x32", argLength: 2, commutative: true},
 		{name: "AverageUint8x64", argLength: 2, commutative: true},
@@ -238,16 +145,6 @@ func simdGenericOps() []opData {
 		{name: "Broadcast128Int16x8", argLength: 1, commutative: false},
 		{name: "Broadcast128Int32x4", argLength: 1, commutative: false},
 		{name: "Broadcast128Int64x2", argLength: 1, commutative: false},
-		{name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
 		{name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
 		{name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
 		{name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
@@ -258,16 +155,6 @@ func simdGenericOps() []opData {
 		{name: "Broadcast256Int16x8", argLength: 1, commutative: false},
 		{name: "Broadcast256Int32x4", argLength: 1, commutative: false},
 		{name: "Broadcast256Int64x2", argLength: 1, commutative: false},
-		{name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
 		{name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
 		{name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
 		{name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
@@ -278,16 +165,6 @@ func simdGenericOps() []opData {
 		{name: "Broadcast512Int16x8", argLength: 1, commutative: false},
 		{name: "Broadcast512Int32x4", argLength: 1, commutative: false},
 		{name: "Broadcast512Int64x2", argLength: 1, commutative: false},
-		{name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
 		{name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
 		{name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
 		{name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
@@ -329,15 +206,9 @@ func simdGenericOps() []opData {
 		{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
 		{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
 		{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
-		{name: "ConvertToInt32MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ConvertToInt32MaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ConvertToInt32MaskedFloat32x16", argLength: 2, commutative: false},
 		{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
 		{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
 		{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
-		{name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
 		{name: "CopySignInt8x16", argLength: 2, commutative: false},
 		{name: "CopySignInt8x32", argLength: 2, commutative: false},
 		{name: "CopySignInt16x8", argLength: 2, commutative: false},
@@ -350,21 +221,9 @@ func simdGenericOps() []opData {
 		{name: "DivFloat64x2", argLength: 2, commutative: false},
 		{name: "DivFloat64x4", argLength: 2, commutative: false},
 		{name: "DivFloat64x8", argLength: 2, commutative: false},
-		{name: "DivMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
 		{name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
 		{name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
 		{name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
-		{name: "DotProdPairsMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "DotProdPairsMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "DotProdPairsMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "DotProdPairsSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "DotProdPairsSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "DotProdPairsSaturatedMaskedUint8x64", argLength: 3, commutative: false},
 		{name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
 		{name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
 		{name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
@@ -386,36 +245,6 @@ func simdGenericOps() []opData {
 		{name: "EqualInt64x2", argLength: 2, commutative: true},
 		{name: "EqualInt64x4", argLength: 2, commutative: true},
 		{name: "EqualInt64x8", argLength: 2, commutative: true},
-		{name: "EqualMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "EqualUint8x16", argLength: 2, commutative: true},
 		{name: "EqualUint8x32", argLength: 2, commutative: true},
 		{name: "EqualUint8x64", argLength: 2, commutative: true},
@@ -462,9 +291,6 @@ func simdGenericOps() []opData {
 		{name: "FloorFloat32x8", argLength: 1, commutative: false},
 		{name: "FloorFloat64x2", argLength: 1, commutative: false},
 		{name: "FloorFloat64x4", argLength: 1, commutative: false},
-		{name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false},
 		{name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false},
 		{name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false},
 		{name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false},
@@ -518,36 +344,6 @@ func simdGenericOps() []opData {
 		{name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
-		{name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
@@ -570,36 +366,6 @@ func simdGenericOps() []opData {
 		{name: "GreaterInt64x2", argLength: 2, commutative: false},
 		{name: "GreaterInt64x4", argLength: 2, commutative: false},
 		{name: "GreaterInt64x8", argLength: 2, commutative: false},
-		{name: "GreaterMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "GreaterUint8x64", argLength: 2, commutative: false},
 		{name: "GreaterUint16x32", argLength: 2, commutative: false},
 		{name: "GreaterUint32x16", argLength: 2, commutative: false},
@@ -610,12 +376,6 @@ func simdGenericOps() []opData {
 		{name: "IsNanFloat64x2", argLength: 2, commutative: true},
 		{name: "IsNanFloat64x4", argLength: 2, commutative: true},
 		{name: "IsNanFloat64x8", argLength: 2, commutative: true},
-		{name: "IsNanMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat64x8", argLength: 3, commutative: true},
 		{name: "LessEqualFloat32x4", argLength: 2, commutative: false},
 		{name: "LessEqualFloat32x8", argLength: 2, commutative: false},
 		{name: "LessEqualFloat32x16", argLength: 2, commutative: false},
@@ -626,36 +386,6 @@ func simdGenericOps() []opData {
 		{name: "LessEqualInt16x32", argLength: 2, commutative: false},
 		{name: "LessEqualInt32x16", argLength: 2, commutative: false},
 		{name: "LessEqualInt64x8", argLength: 2, commutative: false},
-		{name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "LessEqualUint8x64", argLength: 2, commutative: false},
 		{name: "LessEqualUint16x32", argLength: 2, commutative: false},
 		{name: "LessEqualUint32x16", argLength: 2, commutative: false},
@@ -670,36 +400,6 @@ func simdGenericOps() []opData {
 		{name: "LessInt16x32", argLength: 2, commutative: false},
 		{name: "LessInt32x16", argLength: 2, commutative: false},
 		{name: "LessInt64x8", argLength: 2, commutative: false},
-		{name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "LessMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "LessMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "LessMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "LessMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "LessMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "LessMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "LessMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "LessMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "LessMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "LessMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "LessMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "LessMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "LessMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "LessMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "LessMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "LessMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "LessMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "LessMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "LessMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "LessMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "LessMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "LessMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "LessMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "LessMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "LessUint8x64", argLength: 2, commutative: false},
 		{name: "LessUint16x32", argLength: 2, commutative: false},
 		{name: "LessUint32x16", argLength: 2, commutative: false},
@@ -722,36 +422,6 @@ func simdGenericOps() []opData {
 		{name: "MaxInt64x2", argLength: 2, commutative: true},
 		{name: "MaxInt64x4", argLength: 2, commutative: true},
 		{name: "MaxInt64x8", argLength: 2, commutative: true},
-		{name: "MaxMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "MaxUint8x16", argLength: 2, commutative: true},
 		{name: "MaxUint8x32", argLength: 2, commutative: true},
 		{name: "MaxUint8x64", argLength: 2, commutative: true},
@@ -782,36 +452,6 @@ func simdGenericOps() []opData {
 		{name: "MinInt64x2", argLength: 2, commutative: true},
 		{name: "MinInt64x4", argLength: 2, commutative: true},
 		{name: "MinInt64x8", argLength: 2, commutative: true},
-		{name: "MinMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "MinMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "MinMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "MinMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "MinMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MinMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MinMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MinMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "MinMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "MinMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "MinMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "MinMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "MinMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "MinMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "MinMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "MinMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "MinMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MinMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MinMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "MinMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "MinMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "MinMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "MinMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "MinMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "MinMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "MinUint8x16", argLength: 2, commutative: true},
 		{name: "MinUint8x32", argLength: 2, commutative: true},
 		{name: "MinUint8x64", argLength: 2, commutative: true},
@@ -830,24 +470,12 @@ func simdGenericOps() []opData {
 		{name: "MulAddFloat64x2", argLength: 3, commutative: false},
 		{name: "MulAddFloat64x4", argLength: 3, commutative: false},
 		{name: "MulAddFloat64x8", argLength: 3, commutative: false},
-		{name: "MulAddMaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat64x8", argLength: 4, commutative: false},
 		{name: "MulAddSubFloat32x4", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat32x8", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat32x16", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat64x2", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat64x4", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat64x8", argLength: 3, commutative: false},
-		{name: "MulAddSubMaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat64x8", argLength: 4, commutative: false},
 		{name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
 		{name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
 		{name: "MulEvenWidenUint32x4", argLength: 2, commutative: true},
@@ -861,12 +489,6 @@ func simdGenericOps() []opData {
 		{name: "MulHighInt16x8", argLength: 2, commutative: true},
 		{name: "MulHighInt16x16", argLength: 2, commutative: true},
 		{name: "MulHighInt16x32", argLength: 2, commutative: true},
-		{name: "MulHighMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MulHighMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MulHighMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MulHighMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MulHighMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "MulHighUint16x8", argLength: 2, commutative: true},
 		{name: "MulHighUint16x16", argLength: 2, commutative: true},
 		{name: "MulHighUint16x32", argLength: 2, commutative: true},
@@ -879,42 +501,12 @@ func simdGenericOps() []opData {
 		{name: "MulInt64x2", argLength: 2, commutative: true},
 		{name: "MulInt64x4", argLength: 2, commutative: true},
 		{name: "MulInt64x8", argLength: 2, commutative: true},
-		{name: "MulMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "MulMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MulMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MulMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MulMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "MulMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "MulMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "MulMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "MulMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "MulMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "MulMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MulMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MulMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "MulMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "MulMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "MulMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "MulMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "MulMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "MulMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "MulSubAddFloat32x4", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat32x8", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat32x16", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat64x2", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat64x4", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat64x8", argLength: 3, commutative: false},
-		{name: "MulSubAddMaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat64x8", argLength: 4, commutative: false},
 		{name: "MulUint16x8", argLength: 2, commutative: true},
 		{name: "MulUint16x16", argLength: 2, commutative: true},
 		{name: "MulUint16x32", argLength: 2, commutative: true},
@@ -934,36 +526,6 @@ func simdGenericOps() []opData {
 		{name: "NotEqualInt16x32", argLength: 2, commutative: true},
 		{name: "NotEqualInt32x16", argLength: 2, commutative: true},
 		{name: "NotEqualInt64x8", argLength: 2, commutative: true},
-		{name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "NotEqualUint8x64", argLength: 2, commutative: true},
 		{name: "NotEqualUint16x32", argLength: 2, commutative: true},
 		{name: "NotEqualUint32x16", argLength: 2, commutative: true},
@@ -980,30 +542,6 @@ func simdGenericOps() []opData {
 		{name: "OnesCountInt64x2", argLength: 1, commutative: false},
 		{name: "OnesCountInt64x4", argLength: 1, commutative: false},
 		{name: "OnesCountInt64x8", argLength: 1, commutative: false},
-		{name: "OnesCountMaskedInt8x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt8x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt8x64", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt16x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt16x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt16x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt32x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt32x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt32x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt64x2", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt64x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt64x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint8x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint8x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint8x64", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint16x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint16x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint16x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint32x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint32x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint32x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint64x2", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint64x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint64x8", argLength: 2, commutative: false},
 		{name: "OnesCountUint8x16", argLength: 1, commutative: false},
 		{name: "OnesCountUint8x32", argLength: 1, commutative: false},
 		{name: "OnesCountUint8x64", argLength: 1, commutative: false},
@@ -1028,18 +566,6 @@ func simdGenericOps() []opData {
 		{name: "OrInt64x2", argLength: 2, commutative: true},
 		{name: "OrInt64x4", argLength: 2, commutative: true},
 		{name: "OrInt64x8", argLength: 2, commutative: true},
-		{name: "OrMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "OrMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "OrMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "OrMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "OrMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "OrMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "OrMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "OrMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "OrMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "OrMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "OrUint8x16", argLength: 2, commutative: true},
 		{name: "OrUint8x32", argLength: 2, commutative: true},
 		{name: "OrUint8x64", argLength: 2, commutative: true},
@@ -1070,36 +596,6 @@ func simdGenericOps() []opData {
 		{name: "Permute2Int64x2", argLength: 3, commutative: false},
 		{name: "Permute2Int64x4", argLength: 3, commutative: false},
 		{name: "Permute2Int64x8", argLength: 3, commutative: false},
-		{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt8x64", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint8x64", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
 		{name: "Permute2Uint8x16", argLength: 3, commutative: false},
 		{name: "Permute2Uint8x32", argLength: 3, commutative: false},
 		{name: "Permute2Uint8x64", argLength: 3, commutative: false},
@@ -1126,30 +622,6 @@ func simdGenericOps() []opData {
 		{name: "PermuteInt32x16", argLength: 2, commutative: false},
 		{name: "PermuteInt64x4", argLength: 2, commutative: false},
 		{name: "PermuteInt64x8", argLength: 2, commutative: false},
-		{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "PermuteUint8x16", argLength: 2, commutative: false},
 		{name: "PermuteUint8x32", argLength: 2, commutative: false},
 		{name: "PermuteUint8x64", argLength: 2, commutative: false},
@@ -1166,42 +638,18 @@ func simdGenericOps() []opData {
 		{name: "ReciprocalFloat64x2", argLength: 1, commutative: false},
 		{name: "ReciprocalFloat64x4", argLength: 1, commutative: false},
 		{name: "ReciprocalFloat64x8", argLength: 1, commutative: false},
-		{name: "ReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "ReciprocalSqrtFloat32x4", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat32x8", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat32x16", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat64x2", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat64x4", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat64x8", argLength: 1, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat32x16", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat64x4", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "RotateLeftInt32x4", argLength: 2, commutative: false},
 		{name: "RotateLeftInt32x8", argLength: 2, commutative: false},
 		{name: "RotateLeftInt32x16", argLength: 2, commutative: false},
 		{name: "RotateLeftInt64x2", argLength: 2, commutative: false},
 		{name: "RotateLeftInt64x4", argLength: 2, commutative: false},
 		{name: "RotateLeftInt64x8", argLength: 2, commutative: false},
-		{name: "RotateLeftMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x8", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
@@ -1214,18 +662,6 @@ func simdGenericOps() []opData {
 		{name: "RotateRightInt64x2", argLength: 2, commutative: false},
 		{name: "RotateRightInt64x4", argLength: 2, commutative: false},
 		{name: "RotateRightInt64x8", argLength: 2, commutative: false},
-		{name: "RotateRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "RotateRightUint32x4", argLength: 2, commutative: false},
 		{name: "RotateRightUint32x8", argLength: 2, commutative: false},
 		{name: "RotateRightUint32x16", argLength: 2, commutative: false},
@@ -1242,12 +678,6 @@ func simdGenericOps() []opData {
 		{name: "ScaleFloat64x2", argLength: 2, commutative: false},
 		{name: "ScaleFloat64x4", argLength: 2, commutative: false},
 		{name: "ScaleFloat64x8", argLength: 2, commutative: false},
-		{name: "ScaleMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
 		{name: "SetHiFloat32x8", argLength: 2, commutative: false},
 		{name: "SetHiFloat32x16", argLength: 2, commutative: false},
 		{name: "SetHiFloat64x4", argLength: 2, commutative: false},
@@ -1297,24 +727,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false},
@@ -1333,24 +745,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftAllRightUint16x32", argLength: 2, commutative: false},
@@ -1369,24 +763,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftLeftConcatInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt16x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt16x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt16x32", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt32x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt64x2", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt64x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt64x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint16x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint16x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint16x32", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint32x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint32x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint32x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint64x2", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint64x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint64x8", argLength: 4, commutative: false},
 		{name: "ShiftLeftConcatUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatUint16x32", argLength: 3, commutative: false},
@@ -1405,24 +781,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftLeftMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
@@ -1441,24 +799,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightConcatInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightConcatMaskedInt16x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt16x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt16x32", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt32x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt64x2", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt64x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt64x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint16x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint16x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint16x32", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint32x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint32x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint32x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint64x2", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint64x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint64x8", argLength: 4, commutative: false},
 		{name: "ShiftRightConcatUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatUint16x32", argLength: 3, commutative: false},
@@ -1477,24 +817,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftRightInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftRightInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftRightUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftRightUint16x32", argLength: 2, commutative: false},
@@ -1510,12 +832,6 @@ func simdGenericOps() []opData {
 		{name: "SqrtFloat64x2", argLength: 1, commutative: false},
 		{name: "SqrtFloat64x4", argLength: 1, commutative: false},
 		{name: "SqrtFloat64x8", argLength: 1, commutative: false},
-		{name: "SqrtMaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat32x16", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat64x4", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "SubFloat32x4", argLength: 2, commutative: false},
 		{name: "SubFloat32x8", argLength: 2, commutative: false},
 		{name: "SubFloat32x16", argLength: 2, commutative: false},
@@ -1534,36 +850,6 @@ func simdGenericOps() []opData {
 		{name: "SubInt64x2", argLength: 2, commutative: false},
 		{name: "SubInt64x4", argLength: 2, commutative: false},
 		{name: "SubInt64x8", argLength: 2, commutative: false},
-		{name: "SubMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "SubMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "SubMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "SubMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "SubMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "SubMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "SubMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "SubMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "SubMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "SubMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "SubMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "SubMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "SubMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "SubMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "SubMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "SubMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "SubMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "SubMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "SubMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "SubMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "SubMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "SubMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "SubMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "SubMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "SubMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "SubPairsFloat32x4", argLength: 2, commutative: false},
 		{name: "SubPairsFloat32x8", argLength: 2, commutative: false},
 		{name: "SubPairsFloat64x2", argLength: 2, commutative: false},
@@ -1584,18 +870,6 @@ func simdGenericOps() []opData {
 		{name: "SubSaturatedInt16x8", argLength: 2, commutative: false},
 		{name: "SubSaturatedInt16x16", argLength: 2, commutative: false},
 		{name: "SubSaturatedInt16x32", argLength: 2, commutative: false},
-		{name: "SubSaturatedMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "SubSaturatedUint8x16", argLength: 2, commutative: false},
 		{name: "SubSaturatedUint8x32", argLength: 2, commutative: false},
 		{name: "SubSaturatedUint8x64", argLength: 2, commutative: false},
@@ -1630,18 +904,6 @@ func simdGenericOps() []opData {
 		{name: "XorInt64x2", argLength: 2, commutative: true},
 		{name: "XorInt64x4", argLength: 2, commutative: true},
 		{name: "XorInt64x8", argLength: 2, commutative: true},
-		{name: "XorMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "XorMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "XorMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "XorMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "XorMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "XorMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "XorMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "XorMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "XorMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "XorMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "XorMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "XorMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "XorUint8x16", argLength: 2, commutative: true},
 		{name: "XorUint8x32", argLength: 2, commutative: true},
 		{name: "XorUint8x64", argLength: 2, commutative: true},
@@ -1666,57 +928,27 @@ func simdGenericOps() []opData {
 		{name: "CeilScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformInverseMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformInverseMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformInverseMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformInverseUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformInverseUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformInverseUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1736,18 +968,6 @@ func simdGenericOps() []opData {
 		{name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1760,18 +980,6 @@ func simdGenericOps() []opData {
 		{name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1784,24 +992,12 @@ func simdGenericOps() []opData {
 		{name: "RoundToEvenScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1821,24 +1017,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1857,24 +1035,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllRightConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1890,23 +1050,11 @@ func simdGenericOps() []opData {
 		{name: "TruncScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 	}
 }
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index b45cccd96bb..9f6e10c95cb 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -4648,36 +4648,15 @@ const (
 	OpAbsInt64x2
 	OpAbsInt64x4
 	OpAbsInt64x8
-	OpAbsMaskedInt8x16
-	OpAbsMaskedInt8x32
-	OpAbsMaskedInt8x64
-	OpAbsMaskedInt16x8
-	OpAbsMaskedInt16x16
-	OpAbsMaskedInt16x32
-	OpAbsMaskedInt32x4
-	OpAbsMaskedInt32x8
-	OpAbsMaskedInt32x16
-	OpAbsMaskedInt64x2
-	OpAbsMaskedInt64x4
-	OpAbsMaskedInt64x8
 	OpAddDotProdPairsSaturatedInt32x4
 	OpAddDotProdPairsSaturatedInt32x8
 	OpAddDotProdPairsSaturatedInt32x16
-	OpAddDotProdPairsSaturatedMaskedInt32x4
-	OpAddDotProdPairsSaturatedMaskedInt32x8
-	OpAddDotProdPairsSaturatedMaskedInt32x16
 	OpAddDotProdQuadrupleInt32x4
 	OpAddDotProdQuadrupleInt32x8
 	OpAddDotProdQuadrupleInt32x16
-	OpAddDotProdQuadrupleMaskedInt32x4
-	OpAddDotProdQuadrupleMaskedInt32x8
-	OpAddDotProdQuadrupleMaskedInt32x16
 	OpAddDotProdQuadrupleSaturatedInt32x4
 	OpAddDotProdQuadrupleSaturatedInt32x8
 	OpAddDotProdQuadrupleSaturatedInt32x16
-	OpAddDotProdQuadrupleSaturatedMaskedInt32x4
-	OpAddDotProdQuadrupleSaturatedMaskedInt32x8
-	OpAddDotProdQuadrupleSaturatedMaskedInt32x16
 	OpAddFloat32x4
 	OpAddFloat32x8
 	OpAddFloat32x16
@@ -4696,36 +4675,6 @@ const (
 	OpAddInt64x2
 	OpAddInt64x4
 	OpAddInt64x8
-	OpAddMaskedFloat32x4
-	OpAddMaskedFloat32x8
-	OpAddMaskedFloat32x16
-	OpAddMaskedFloat64x2
-	OpAddMaskedFloat64x4
-	OpAddMaskedFloat64x8
-	OpAddMaskedInt8x16
-	OpAddMaskedInt8x32
-	OpAddMaskedInt8x64
-	OpAddMaskedInt16x8
-	OpAddMaskedInt16x16
-	OpAddMaskedInt16x32
-	OpAddMaskedInt32x4
-	OpAddMaskedInt32x8
-	OpAddMaskedInt32x16
-	OpAddMaskedInt64x2
-	OpAddMaskedInt64x4
-	OpAddMaskedInt64x8
-	OpAddMaskedUint8x16
-	OpAddMaskedUint8x32
-	OpAddMaskedUint8x64
-	OpAddMaskedUint16x8
-	OpAddMaskedUint16x16
-	OpAddMaskedUint16x32
-	OpAddMaskedUint32x4
-	OpAddMaskedUint32x8
-	OpAddMaskedUint32x16
-	OpAddMaskedUint64x2
-	OpAddMaskedUint64x4
-	OpAddMaskedUint64x8
 	OpAddPairsFloat32x4
 	OpAddPairsFloat32x8
 	OpAddPairsFloat64x2
@@ -4746,18 +4695,6 @@ const (
 	OpAddSaturatedInt16x8
 	OpAddSaturatedInt16x16
 	OpAddSaturatedInt16x32
-	OpAddSaturatedMaskedInt8x16
-	OpAddSaturatedMaskedInt8x32
-	OpAddSaturatedMaskedInt8x64
-	OpAddSaturatedMaskedInt16x8
-	OpAddSaturatedMaskedInt16x16
-	OpAddSaturatedMaskedInt16x32
-	OpAddSaturatedMaskedUint8x16
-	OpAddSaturatedMaskedUint8x32
-	OpAddSaturatedMaskedUint8x64
-	OpAddSaturatedMaskedUint16x8
-	OpAddSaturatedMaskedUint16x16
-	OpAddSaturatedMaskedUint16x32
 	OpAddSaturatedUint8x16
 	OpAddSaturatedUint8x32
 	OpAddSaturatedUint8x64
@@ -4792,18 +4729,6 @@ const (
 	OpAndInt64x2
 	OpAndInt64x4
 	OpAndInt64x8
-	OpAndMaskedInt32x4
-	OpAndMaskedInt32x8
-	OpAndMaskedInt32x16
-	OpAndMaskedInt64x2
-	OpAndMaskedInt64x4
-	OpAndMaskedInt64x8
-	OpAndMaskedUint32x4
-	OpAndMaskedUint32x8
-	OpAndMaskedUint32x16
-	OpAndMaskedUint64x2
-	OpAndMaskedUint64x4
-	OpAndMaskedUint64x8
 	OpAndNotInt8x16
 	OpAndNotInt8x32
 	OpAndNotInt8x64
@@ -4816,18 +4741,6 @@ const (
 	OpAndNotInt64x2
 	OpAndNotInt64x4
 	OpAndNotInt64x8
-	OpAndNotMaskedInt32x4
-	OpAndNotMaskedInt32x8
-	OpAndNotMaskedInt32x16
-	OpAndNotMaskedInt64x2
-	OpAndNotMaskedInt64x4
-	OpAndNotMaskedInt64x8
-	OpAndNotMaskedUint32x4
-	OpAndNotMaskedUint32x8
-	OpAndNotMaskedUint32x16
-	OpAndNotMaskedUint64x2
-	OpAndNotMaskedUint64x4
-	OpAndNotMaskedUint64x8
 	OpAndNotUint8x16
 	OpAndNotUint8x32
 	OpAndNotUint8x64
@@ -4852,12 +4765,6 @@ const (
 	OpAndUint64x2
 	OpAndUint64x4
 	OpAndUint64x8
-	OpAverageMaskedUint8x16
-	OpAverageMaskedUint8x32
-	OpAverageMaskedUint8x64
-	OpAverageMaskedUint16x8
-	OpAverageMaskedUint16x16
-	OpAverageMaskedUint16x32
 	OpAverageUint8x16
 	OpAverageUint8x32
 	OpAverageUint8x64
@@ -4870,16 +4777,6 @@ const (
 	OpBroadcast128Int16x8
 	OpBroadcast128Int32x4
 	OpBroadcast128Int64x2
-	OpBroadcast128MaskedFloat32x4
-	OpBroadcast128MaskedFloat64x2
-	OpBroadcast128MaskedInt8x16
-	OpBroadcast128MaskedInt16x8
-	OpBroadcast128MaskedInt32x4
-	OpBroadcast128MaskedInt64x2
-	OpBroadcast128MaskedUint8x16
-	OpBroadcast128MaskedUint16x8
-	OpBroadcast128MaskedUint32x4
-	OpBroadcast128MaskedUint64x2
 	OpBroadcast128Uint8x16
 	OpBroadcast128Uint16x8
 	OpBroadcast128Uint32x4
@@ -4890,16 +4787,6 @@ const (
 	OpBroadcast256Int16x8
 	OpBroadcast256Int32x4
 	OpBroadcast256Int64x2
-	OpBroadcast256MaskedFloat32x4
-	OpBroadcast256MaskedFloat64x2
-	OpBroadcast256MaskedInt8x16
-	OpBroadcast256MaskedInt16x8
-	OpBroadcast256MaskedInt32x4
-	OpBroadcast256MaskedInt64x2
-	OpBroadcast256MaskedUint8x16
-	OpBroadcast256MaskedUint16x8
-	OpBroadcast256MaskedUint32x4
-	OpBroadcast256MaskedUint64x2
 	OpBroadcast256Uint8x16
 	OpBroadcast256Uint16x8
 	OpBroadcast256Uint32x4
@@ -4910,16 +4797,6 @@ const (
 	OpBroadcast512Int16x8
 	OpBroadcast512Int32x4
 	OpBroadcast512Int64x2
-	OpBroadcast512MaskedFloat32x4
-	OpBroadcast512MaskedFloat64x2
-	OpBroadcast512MaskedInt8x16
-	OpBroadcast512MaskedInt16x8
-	OpBroadcast512MaskedInt32x4
-	OpBroadcast512MaskedInt64x2
-	OpBroadcast512MaskedUint8x16
-	OpBroadcast512MaskedUint16x8
-	OpBroadcast512MaskedUint32x4
-	OpBroadcast512MaskedUint64x2
 	OpBroadcast512Uint8x16
 	OpBroadcast512Uint16x8
 	OpBroadcast512Uint32x4
@@ -4961,15 +4838,9 @@ const (
 	OpConvertToInt32Float32x4
 	OpConvertToInt32Float32x8
 	OpConvertToInt32Float32x16
-	OpConvertToInt32MaskedFloat32x4
-	OpConvertToInt32MaskedFloat32x8
-	OpConvertToInt32MaskedFloat32x16
 	OpConvertToUint32Float32x4
 	OpConvertToUint32Float32x8
 	OpConvertToUint32Float32x16
-	OpConvertToUint32MaskedFloat32x4
-	OpConvertToUint32MaskedFloat32x8
-	OpConvertToUint32MaskedFloat32x16
 	OpCopySignInt8x16
 	OpCopySignInt8x32
 	OpCopySignInt16x8
@@ -4982,21 +4853,9 @@ const (
 	OpDivFloat64x2
 	OpDivFloat64x4
 	OpDivFloat64x8
-	OpDivMaskedFloat32x4
-	OpDivMaskedFloat32x8
-	OpDivMaskedFloat32x16
-	OpDivMaskedFloat64x2
-	OpDivMaskedFloat64x4
-	OpDivMaskedFloat64x8
 	OpDotProdPairsInt16x8
 	OpDotProdPairsInt16x16
 	OpDotProdPairsInt16x32
-	OpDotProdPairsMaskedInt16x8
-	OpDotProdPairsMaskedInt16x16
-	OpDotProdPairsMaskedInt16x32
-	OpDotProdPairsSaturatedMaskedUint8x16
-	OpDotProdPairsSaturatedMaskedUint8x32
-	OpDotProdPairsSaturatedMaskedUint8x64
 	OpDotProdPairsSaturatedUint8x16
 	OpDotProdPairsSaturatedUint8x32
 	OpDotProdPairsSaturatedUint8x64
@@ -5018,36 +4877,6 @@ const (
 	OpEqualInt64x2
 	OpEqualInt64x4
 	OpEqualInt64x8
-	OpEqualMaskedFloat32x4
-	OpEqualMaskedFloat32x8
-	OpEqualMaskedFloat32x16
-	OpEqualMaskedFloat64x2
-	OpEqualMaskedFloat64x4
-	OpEqualMaskedFloat64x8
-	OpEqualMaskedInt8x16
-	OpEqualMaskedInt8x32
-	OpEqualMaskedInt8x64
-	OpEqualMaskedInt16x8
-	OpEqualMaskedInt16x16
-	OpEqualMaskedInt16x32
-	OpEqualMaskedInt32x4
-	OpEqualMaskedInt32x8
-	OpEqualMaskedInt32x16
-	OpEqualMaskedInt64x2
-	OpEqualMaskedInt64x4
-	OpEqualMaskedInt64x8
-	OpEqualMaskedUint8x16
-	OpEqualMaskedUint8x32
-	OpEqualMaskedUint8x64
-	OpEqualMaskedUint16x8
-	OpEqualMaskedUint16x16
-	OpEqualMaskedUint16x32
-	OpEqualMaskedUint32x4
-	OpEqualMaskedUint32x8
-	OpEqualMaskedUint32x16
-	OpEqualMaskedUint64x2
-	OpEqualMaskedUint64x4
-	OpEqualMaskedUint64x8
 	OpEqualUint8x16
 	OpEqualUint8x32
 	OpEqualUint8x64
@@ -5094,9 +4923,6 @@ const (
 	OpFloorFloat32x8
 	OpFloorFloat64x2
 	OpFloorFloat64x4
-	OpGaloisFieldMulMaskedUint8x16
-	OpGaloisFieldMulMaskedUint8x32
-	OpGaloisFieldMulMaskedUint8x64
 	OpGaloisFieldMulUint8x16
 	OpGaloisFieldMulUint8x32
 	OpGaloisFieldMulUint8x64
@@ -5150,36 +4976,6 @@ const (
 	OpGreaterEqualInt16x32
 	OpGreaterEqualInt32x16
 	OpGreaterEqualInt64x8
-	OpGreaterEqualMaskedFloat32x4
-	OpGreaterEqualMaskedFloat32x8
-	OpGreaterEqualMaskedFloat32x16
-	OpGreaterEqualMaskedFloat64x2
-	OpGreaterEqualMaskedFloat64x4
-	OpGreaterEqualMaskedFloat64x8
-	OpGreaterEqualMaskedInt8x16
-	OpGreaterEqualMaskedInt8x32
-	OpGreaterEqualMaskedInt8x64
-	OpGreaterEqualMaskedInt16x8
-	OpGreaterEqualMaskedInt16x16
-	OpGreaterEqualMaskedInt16x32
-	OpGreaterEqualMaskedInt32x4
-	OpGreaterEqualMaskedInt32x8
-	OpGreaterEqualMaskedInt32x16
-	OpGreaterEqualMaskedInt64x2
-	OpGreaterEqualMaskedInt64x4
-	OpGreaterEqualMaskedInt64x8
-	OpGreaterEqualMaskedUint8x16
-	OpGreaterEqualMaskedUint8x32
-	OpGreaterEqualMaskedUint8x64
-	OpGreaterEqualMaskedUint16x8
-	OpGreaterEqualMaskedUint16x16
-	OpGreaterEqualMaskedUint16x32
-	OpGreaterEqualMaskedUint32x4
-	OpGreaterEqualMaskedUint32x8
-	OpGreaterEqualMaskedUint32x16
-	OpGreaterEqualMaskedUint64x2
-	OpGreaterEqualMaskedUint64x4
-	OpGreaterEqualMaskedUint64x8
 	OpGreaterEqualUint8x64
 	OpGreaterEqualUint16x32
 	OpGreaterEqualUint32x16
@@ -5202,36 +4998,6 @@ const (
 	OpGreaterInt64x2
 	OpGreaterInt64x4
 	OpGreaterInt64x8
-	OpGreaterMaskedFloat32x4
-	OpGreaterMaskedFloat32x8
-	OpGreaterMaskedFloat32x16
-	OpGreaterMaskedFloat64x2
-	OpGreaterMaskedFloat64x4
-	OpGreaterMaskedFloat64x8
-	OpGreaterMaskedInt8x16
-	OpGreaterMaskedInt8x32
-	OpGreaterMaskedInt8x64
-	OpGreaterMaskedInt16x8
-	OpGreaterMaskedInt16x16
-	OpGreaterMaskedInt16x32
-	OpGreaterMaskedInt32x4
-	OpGreaterMaskedInt32x8
-	OpGreaterMaskedInt32x16
-	OpGreaterMaskedInt64x2
-	OpGreaterMaskedInt64x4
-	OpGreaterMaskedInt64x8
-	OpGreaterMaskedUint8x16
-	OpGreaterMaskedUint8x32
-	OpGreaterMaskedUint8x64
-	OpGreaterMaskedUint16x8
-	OpGreaterMaskedUint16x16
-	OpGreaterMaskedUint16x32
-	OpGreaterMaskedUint32x4
-	OpGreaterMaskedUint32x8
-	OpGreaterMaskedUint32x16
-	OpGreaterMaskedUint64x2
-	OpGreaterMaskedUint64x4
-	OpGreaterMaskedUint64x8
 	OpGreaterUint8x64
 	OpGreaterUint16x32
 	OpGreaterUint32x16
@@ -5242,12 +5008,6 @@ const (
 	OpIsNanFloat64x2
 	OpIsNanFloat64x4
 	OpIsNanFloat64x8
-	OpIsNanMaskedFloat32x4
-	OpIsNanMaskedFloat32x8
-	OpIsNanMaskedFloat32x16
-	OpIsNanMaskedFloat64x2
-	OpIsNanMaskedFloat64x4
-	OpIsNanMaskedFloat64x8
 	OpLessEqualFloat32x4
 	OpLessEqualFloat32x8
 	OpLessEqualFloat32x16
@@ -5258,36 +5018,6 @@ const (
 	OpLessEqualInt16x32
 	OpLessEqualInt32x16
 	OpLessEqualInt64x8
-	OpLessEqualMaskedFloat32x4
-	OpLessEqualMaskedFloat32x8
-	OpLessEqualMaskedFloat32x16
-	OpLessEqualMaskedFloat64x2
-	OpLessEqualMaskedFloat64x4
-	OpLessEqualMaskedFloat64x8
-	OpLessEqualMaskedInt8x16
-	OpLessEqualMaskedInt8x32
-	OpLessEqualMaskedInt8x64
-	OpLessEqualMaskedInt16x8
-	OpLessEqualMaskedInt16x16
-	OpLessEqualMaskedInt16x32
-	OpLessEqualMaskedInt32x4
-	OpLessEqualMaskedInt32x8
-	OpLessEqualMaskedInt32x16
-	OpLessEqualMaskedInt64x2
-	OpLessEqualMaskedInt64x4
-	OpLessEqualMaskedInt64x8
-	OpLessEqualMaskedUint8x16
-	OpLessEqualMaskedUint8x32
-	OpLessEqualMaskedUint8x64
-	OpLessEqualMaskedUint16x8
-	OpLessEqualMaskedUint16x16
-	OpLessEqualMaskedUint16x32
-	OpLessEqualMaskedUint32x4
-	OpLessEqualMaskedUint32x8
-	OpLessEqualMaskedUint32x16
-	OpLessEqualMaskedUint64x2
-	OpLessEqualMaskedUint64x4
-	OpLessEqualMaskedUint64x8
 	OpLessEqualUint8x64
 	OpLessEqualUint16x32
 	OpLessEqualUint32x16
@@ -5302,36 +5032,6 @@ const (
 	OpLessInt16x32
 	OpLessInt32x16
 	OpLessInt64x8
-	OpLessMaskedFloat32x4
-	OpLessMaskedFloat32x8
-	OpLessMaskedFloat32x16
-	OpLessMaskedFloat64x2
-	OpLessMaskedFloat64x4
-	OpLessMaskedFloat64x8
-	OpLessMaskedInt8x16
-	OpLessMaskedInt8x32
-	OpLessMaskedInt8x64
-	OpLessMaskedInt16x8
-	OpLessMaskedInt16x16
-	OpLessMaskedInt16x32
-	OpLessMaskedInt32x4
-	OpLessMaskedInt32x8
-	OpLessMaskedInt32x16
-	OpLessMaskedInt64x2
-	OpLessMaskedInt64x4
-	OpLessMaskedInt64x8
-	OpLessMaskedUint8x16
-	OpLessMaskedUint8x32
-	OpLessMaskedUint8x64
-	OpLessMaskedUint16x8
-	OpLessMaskedUint16x16
-	OpLessMaskedUint16x32
-	OpLessMaskedUint32x4
-	OpLessMaskedUint32x8
-	OpLessMaskedUint32x16
-	OpLessMaskedUint64x2
-	OpLessMaskedUint64x4
-	OpLessMaskedUint64x8
 	OpLessUint8x64
 	OpLessUint16x32
 	OpLessUint32x16
@@ -5354,36 +5054,6 @@ const (
 	OpMaxInt64x2
 	OpMaxInt64x4
 	OpMaxInt64x8
-	OpMaxMaskedFloat32x4
-	OpMaxMaskedFloat32x8
-	OpMaxMaskedFloat32x16
-	OpMaxMaskedFloat64x2
-	OpMaxMaskedFloat64x4
-	OpMaxMaskedFloat64x8
-	OpMaxMaskedInt8x16
-	OpMaxMaskedInt8x32
-	OpMaxMaskedInt8x64
-	OpMaxMaskedInt16x8
-	OpMaxMaskedInt16x16
-	OpMaxMaskedInt16x32
-	OpMaxMaskedInt32x4
-	OpMaxMaskedInt32x8
-	OpMaxMaskedInt32x16
-	OpMaxMaskedInt64x2
-	OpMaxMaskedInt64x4
-	OpMaxMaskedInt64x8
-	OpMaxMaskedUint8x16
-	OpMaxMaskedUint8x32
-	OpMaxMaskedUint8x64
-	OpMaxMaskedUint16x8
-	OpMaxMaskedUint16x16
-	OpMaxMaskedUint16x32
-	OpMaxMaskedUint32x4
-	OpMaxMaskedUint32x8
-	OpMaxMaskedUint32x16
-	OpMaxMaskedUint64x2
-	OpMaxMaskedUint64x4
-	OpMaxMaskedUint64x8
 	OpMaxUint8x16
 	OpMaxUint8x32
 	OpMaxUint8x64
@@ -5414,36 +5084,6 @@ const (
 	OpMinInt64x2
 	OpMinInt64x4
 	OpMinInt64x8
-	OpMinMaskedFloat32x4
-	OpMinMaskedFloat32x8
-	OpMinMaskedFloat32x16
-	OpMinMaskedFloat64x2
-	OpMinMaskedFloat64x4
-	OpMinMaskedFloat64x8
-	OpMinMaskedInt8x16
-	OpMinMaskedInt8x32
-	OpMinMaskedInt8x64
-	OpMinMaskedInt16x8
-	OpMinMaskedInt16x16
-	OpMinMaskedInt16x32
-	OpMinMaskedInt32x4
-	OpMinMaskedInt32x8
-	OpMinMaskedInt32x16
-	OpMinMaskedInt64x2
-	OpMinMaskedInt64x4
-	OpMinMaskedInt64x8
-	OpMinMaskedUint8x16
-	OpMinMaskedUint8x32
-	OpMinMaskedUint8x64
-	OpMinMaskedUint16x8
-	OpMinMaskedUint16x16
-	OpMinMaskedUint16x32
-	OpMinMaskedUint32x4
-	OpMinMaskedUint32x8
-	OpMinMaskedUint32x16
-	OpMinMaskedUint64x2
-	OpMinMaskedUint64x4
-	OpMinMaskedUint64x8
 	OpMinUint8x16
 	OpMinUint8x32
 	OpMinUint8x64
@@ -5462,24 +5102,12 @@ const (
 	OpMulAddFloat64x2
 	OpMulAddFloat64x4
 	OpMulAddFloat64x8
-	OpMulAddMaskedFloat32x4
-	OpMulAddMaskedFloat32x8
-	OpMulAddMaskedFloat32x16
-	OpMulAddMaskedFloat64x2
-	OpMulAddMaskedFloat64x4
-	OpMulAddMaskedFloat64x8
 	OpMulAddSubFloat32x4
 	OpMulAddSubFloat32x8
 	OpMulAddSubFloat32x16
 	OpMulAddSubFloat64x2
 	OpMulAddSubFloat64x4
 	OpMulAddSubFloat64x8
-	OpMulAddSubMaskedFloat32x4
-	OpMulAddSubMaskedFloat32x8
-	OpMulAddSubMaskedFloat32x16
-	OpMulAddSubMaskedFloat64x2
-	OpMulAddSubMaskedFloat64x4
-	OpMulAddSubMaskedFloat64x8
 	OpMulEvenWidenInt32x4
 	OpMulEvenWidenInt32x8
 	OpMulEvenWidenUint32x4
@@ -5493,12 +5121,6 @@ const (
 	OpMulHighInt16x8
 	OpMulHighInt16x16
 	OpMulHighInt16x32
-	OpMulHighMaskedInt16x8
-	OpMulHighMaskedInt16x16
-	OpMulHighMaskedInt16x32
-	OpMulHighMaskedUint16x8
-	OpMulHighMaskedUint16x16
-	OpMulHighMaskedUint16x32
 	OpMulHighUint16x8
 	OpMulHighUint16x16
 	OpMulHighUint16x32
@@ -5511,42 +5133,12 @@ const (
 	OpMulInt64x2
 	OpMulInt64x4
 	OpMulInt64x8
-	OpMulMaskedFloat32x4
-	OpMulMaskedFloat32x8
-	OpMulMaskedFloat32x16
-	OpMulMaskedFloat64x2
-	OpMulMaskedFloat64x4
-	OpMulMaskedFloat64x8
-	OpMulMaskedInt16x8
-	OpMulMaskedInt16x16
-	OpMulMaskedInt16x32
-	OpMulMaskedInt32x4
-	OpMulMaskedInt32x8
-	OpMulMaskedInt32x16
-	OpMulMaskedInt64x2
-	OpMulMaskedInt64x4
-	OpMulMaskedInt64x8
-	OpMulMaskedUint16x8
-	OpMulMaskedUint16x16
-	OpMulMaskedUint16x32
-	OpMulMaskedUint32x4
-	OpMulMaskedUint32x8
-	OpMulMaskedUint32x16
-	OpMulMaskedUint64x2
-	OpMulMaskedUint64x4
-	OpMulMaskedUint64x8
 	OpMulSubAddFloat32x4
 	OpMulSubAddFloat32x8
 	OpMulSubAddFloat32x16
 	OpMulSubAddFloat64x2
 	OpMulSubAddFloat64x4
 	OpMulSubAddFloat64x8
-	OpMulSubAddMaskedFloat32x4
-	OpMulSubAddMaskedFloat32x8
-	OpMulSubAddMaskedFloat32x16
-	OpMulSubAddMaskedFloat64x2
-	OpMulSubAddMaskedFloat64x4
-	OpMulSubAddMaskedFloat64x8
 	OpMulUint16x8
 	OpMulUint16x16
 	OpMulUint16x32
@@ -5566,36 +5158,6 @@ const (
 	OpNotEqualInt16x32
 	OpNotEqualInt32x16
 	OpNotEqualInt64x8
-	OpNotEqualMaskedFloat32x4
-	OpNotEqualMaskedFloat32x8
-	OpNotEqualMaskedFloat32x16
-	OpNotEqualMaskedFloat64x2
-	OpNotEqualMaskedFloat64x4
-	OpNotEqualMaskedFloat64x8
-	OpNotEqualMaskedInt8x16
-	OpNotEqualMaskedInt8x32
-	OpNotEqualMaskedInt8x64
-	OpNotEqualMaskedInt16x8
-	OpNotEqualMaskedInt16x16
-	OpNotEqualMaskedInt16x32
-	OpNotEqualMaskedInt32x4
-	OpNotEqualMaskedInt32x8
-	OpNotEqualMaskedInt32x16
-	OpNotEqualMaskedInt64x2
-	OpNotEqualMaskedInt64x4
-	OpNotEqualMaskedInt64x8
-	OpNotEqualMaskedUint8x16
-	OpNotEqualMaskedUint8x32
-	OpNotEqualMaskedUint8x64
-	OpNotEqualMaskedUint16x8
-	OpNotEqualMaskedUint16x16
-	OpNotEqualMaskedUint16x32
-	OpNotEqualMaskedUint32x4
-	OpNotEqualMaskedUint32x8
-	OpNotEqualMaskedUint32x16
-	OpNotEqualMaskedUint64x2
-	OpNotEqualMaskedUint64x4
-	OpNotEqualMaskedUint64x8
 	OpNotEqualUint8x64
 	OpNotEqualUint16x32
 	OpNotEqualUint32x16
@@ -5612,30 +5174,6 @@ const (
 	OpOnesCountInt64x2
 	OpOnesCountInt64x4
 	OpOnesCountInt64x8
-	OpOnesCountMaskedInt8x16
-	OpOnesCountMaskedInt8x32
-	OpOnesCountMaskedInt8x64
-	OpOnesCountMaskedInt16x8
-	OpOnesCountMaskedInt16x16
-	OpOnesCountMaskedInt16x32
-	OpOnesCountMaskedInt32x4
-	OpOnesCountMaskedInt32x8
-	OpOnesCountMaskedInt32x16
-	OpOnesCountMaskedInt64x2
-	OpOnesCountMaskedInt64x4
-	OpOnesCountMaskedInt64x8
-	OpOnesCountMaskedUint8x16
-	OpOnesCountMaskedUint8x32
-	OpOnesCountMaskedUint8x64
-	OpOnesCountMaskedUint16x8
-	OpOnesCountMaskedUint16x16
-	OpOnesCountMaskedUint16x32
-	OpOnesCountMaskedUint32x4
-	OpOnesCountMaskedUint32x8
-	OpOnesCountMaskedUint32x16
-	OpOnesCountMaskedUint64x2
-	OpOnesCountMaskedUint64x4
-	OpOnesCountMaskedUint64x8
 	OpOnesCountUint8x16
 	OpOnesCountUint8x32
 	OpOnesCountUint8x64
@@ -5660,18 +5198,6 @@ const (
 	OpOrInt64x2
 	OpOrInt64x4
 	OpOrInt64x8
-	OpOrMaskedInt32x4
-	OpOrMaskedInt32x8
-	OpOrMaskedInt32x16
-	OpOrMaskedInt64x2
-	OpOrMaskedInt64x4
-	OpOrMaskedInt64x8
-	OpOrMaskedUint32x4
-	OpOrMaskedUint32x8
-	OpOrMaskedUint32x16
-	OpOrMaskedUint64x2
-	OpOrMaskedUint64x4
-	OpOrMaskedUint64x8
 	OpOrUint8x16
 	OpOrUint8x32
 	OpOrUint8x64
@@ -5702,36 +5228,6 @@ const (
 	OpPermute2Int64x2
 	OpPermute2Int64x4
 	OpPermute2Int64x8
-	OpPermute2MaskedFloat32x4
-	OpPermute2MaskedFloat32x8
-	OpPermute2MaskedFloat32x16
-	OpPermute2MaskedFloat64x2
-	OpPermute2MaskedFloat64x4
-	OpPermute2MaskedFloat64x8
-	OpPermute2MaskedInt8x16
-	OpPermute2MaskedInt8x32
-	OpPermute2MaskedInt8x64
-	OpPermute2MaskedInt16x8
-	OpPermute2MaskedInt16x16
-	OpPermute2MaskedInt16x32
-	OpPermute2MaskedInt32x4
-	OpPermute2MaskedInt32x8
-	OpPermute2MaskedInt32x16
-	OpPermute2MaskedInt64x2
-	OpPermute2MaskedInt64x4
-	OpPermute2MaskedInt64x8
-	OpPermute2MaskedUint8x16
-	OpPermute2MaskedUint8x32
-	OpPermute2MaskedUint8x64
-	OpPermute2MaskedUint16x8
-	OpPermute2MaskedUint16x16
-	OpPermute2MaskedUint16x32
-	OpPermute2MaskedUint32x4
-	OpPermute2MaskedUint32x8
-	OpPermute2MaskedUint32x16
-	OpPermute2MaskedUint64x2
-	OpPermute2MaskedUint64x4
-	OpPermute2MaskedUint64x8
 	OpPermute2Uint8x16
 	OpPermute2Uint8x32
 	OpPermute2Uint8x64
@@ -5758,30 +5254,6 @@ const (
 	OpPermuteInt32x16
 	OpPermuteInt64x4
 	OpPermuteInt64x8
-	OpPermuteMaskedFloat32x8
-	OpPermuteMaskedFloat32x16
-	OpPermuteMaskedFloat64x4
-	OpPermuteMaskedFloat64x8
-	OpPermuteMaskedInt8x16
-	OpPermuteMaskedInt8x32
-	OpPermuteMaskedInt8x64
-	OpPermuteMaskedInt16x8
-	OpPermuteMaskedInt16x16
-	OpPermuteMaskedInt16x32
-	OpPermuteMaskedInt32x8
-	OpPermuteMaskedInt32x16
-	OpPermuteMaskedInt64x4
-	OpPermuteMaskedInt64x8
-	OpPermuteMaskedUint8x16
-	OpPermuteMaskedUint8x32
-	OpPermuteMaskedUint8x64
-	OpPermuteMaskedUint16x8
-	OpPermuteMaskedUint16x16
-	OpPermuteMaskedUint16x32
-	OpPermuteMaskedUint32x8
-	OpPermuteMaskedUint32x16
-	OpPermuteMaskedUint64x4
-	OpPermuteMaskedUint64x8
 	OpPermuteUint8x16
 	OpPermuteUint8x32
 	OpPermuteUint8x64
@@ -5798,42 +5270,18 @@ const (
 	OpReciprocalFloat64x2
 	OpReciprocalFloat64x4
 	OpReciprocalFloat64x8
-	OpReciprocalMaskedFloat32x4
-	OpReciprocalMaskedFloat32x8
-	OpReciprocalMaskedFloat32x16
-	OpReciprocalMaskedFloat64x2
-	OpReciprocalMaskedFloat64x4
-	OpReciprocalMaskedFloat64x8
 	OpReciprocalSqrtFloat32x4
 	OpReciprocalSqrtFloat32x8
 	OpReciprocalSqrtFloat32x16
 	OpReciprocalSqrtFloat64x2
 	OpReciprocalSqrtFloat64x4
 	OpReciprocalSqrtFloat64x8
-	OpReciprocalSqrtMaskedFloat32x4
-	OpReciprocalSqrtMaskedFloat32x8
-	OpReciprocalSqrtMaskedFloat32x16
-	OpReciprocalSqrtMaskedFloat64x2
-	OpReciprocalSqrtMaskedFloat64x4
-	OpReciprocalSqrtMaskedFloat64x8
 	OpRotateLeftInt32x4
 	OpRotateLeftInt32x8
 	OpRotateLeftInt32x16
 	OpRotateLeftInt64x2
 	OpRotateLeftInt64x4
 	OpRotateLeftInt64x8
-	OpRotateLeftMaskedInt32x4
-	OpRotateLeftMaskedInt32x8
-	OpRotateLeftMaskedInt32x16
-	OpRotateLeftMaskedInt64x2
-	OpRotateLeftMaskedInt64x4
-	OpRotateLeftMaskedInt64x8
-	OpRotateLeftMaskedUint32x4
-	OpRotateLeftMaskedUint32x8
-	OpRotateLeftMaskedUint32x16
-	OpRotateLeftMaskedUint64x2
-	OpRotateLeftMaskedUint64x4
-	OpRotateLeftMaskedUint64x8
 	OpRotateLeftUint32x4
 	OpRotateLeftUint32x8
 	OpRotateLeftUint32x16
@@ -5846,18 +5294,6 @@ const (
 	OpRotateRightInt64x2
 	OpRotateRightInt64x4
 	OpRotateRightInt64x8
-	OpRotateRightMaskedInt32x4
-	OpRotateRightMaskedInt32x8
-	OpRotateRightMaskedInt32x16
-	OpRotateRightMaskedInt64x2
-	OpRotateRightMaskedInt64x4
-	OpRotateRightMaskedInt64x8
-	OpRotateRightMaskedUint32x4
-	OpRotateRightMaskedUint32x8
-	OpRotateRightMaskedUint32x16
-	OpRotateRightMaskedUint64x2
-	OpRotateRightMaskedUint64x4
-	OpRotateRightMaskedUint64x8
 	OpRotateRightUint32x4
 	OpRotateRightUint32x8
 	OpRotateRightUint32x16
@@ -5874,12 +5310,6 @@ const (
 	OpScaleFloat64x2
 	OpScaleFloat64x4
 	OpScaleFloat64x8
-	OpScaleMaskedFloat32x4
-	OpScaleMaskedFloat32x8
-	OpScaleMaskedFloat32x16
-	OpScaleMaskedFloat64x2
-	OpScaleMaskedFloat64x4
-	OpScaleMaskedFloat64x8
 	OpSetHiFloat32x8
 	OpSetHiFloat32x16
 	OpSetHiFloat64x4
@@ -5929,24 +5359,6 @@ const (
 	OpShiftAllLeftInt64x2
 	OpShiftAllLeftInt64x4
 	OpShiftAllLeftInt64x8
-	OpShiftAllLeftMaskedInt16x8
-	OpShiftAllLeftMaskedInt16x16
-	OpShiftAllLeftMaskedInt16x32
-	OpShiftAllLeftMaskedInt32x4
-	OpShiftAllLeftMaskedInt32x8
-	OpShiftAllLeftMaskedInt32x16
-	OpShiftAllLeftMaskedInt64x2
-	OpShiftAllLeftMaskedInt64x4
-	OpShiftAllLeftMaskedInt64x8
-	OpShiftAllLeftMaskedUint16x8
-	OpShiftAllLeftMaskedUint16x16
-	OpShiftAllLeftMaskedUint16x32
-	OpShiftAllLeftMaskedUint32x4
-	OpShiftAllLeftMaskedUint32x8
-	OpShiftAllLeftMaskedUint32x16
-	OpShiftAllLeftMaskedUint64x2
-	OpShiftAllLeftMaskedUint64x4
-	OpShiftAllLeftMaskedUint64x8
 	OpShiftAllLeftUint16x8
 	OpShiftAllLeftUint16x16
 	OpShiftAllLeftUint16x32
@@ -5965,24 +5377,6 @@ const (
 	OpShiftAllRightInt64x2
 	OpShiftAllRightInt64x4
 	OpShiftAllRightInt64x8
-	OpShiftAllRightMaskedInt16x8
-	OpShiftAllRightMaskedInt16x16
-	OpShiftAllRightMaskedInt16x32
-	OpShiftAllRightMaskedInt32x4
-	OpShiftAllRightMaskedInt32x8
-	OpShiftAllRightMaskedInt32x16
-	OpShiftAllRightMaskedInt64x2
-	OpShiftAllRightMaskedInt64x4
-	OpShiftAllRightMaskedInt64x8
-	OpShiftAllRightMaskedUint16x8
-	OpShiftAllRightMaskedUint16x16
-	OpShiftAllRightMaskedUint16x32
-	OpShiftAllRightMaskedUint32x4
-	OpShiftAllRightMaskedUint32x8
-	OpShiftAllRightMaskedUint32x16
-	OpShiftAllRightMaskedUint64x2
-	OpShiftAllRightMaskedUint64x4
-	OpShiftAllRightMaskedUint64x8
 	OpShiftAllRightUint16x8
 	OpShiftAllRightUint16x16
 	OpShiftAllRightUint16x32
@@ -6001,24 +5395,6 @@ const (
 	OpShiftLeftConcatInt64x2
 	OpShiftLeftConcatInt64x4
 	OpShiftLeftConcatInt64x8
-	OpShiftLeftConcatMaskedInt16x8
-	OpShiftLeftConcatMaskedInt16x16
-	OpShiftLeftConcatMaskedInt16x32
-	OpShiftLeftConcatMaskedInt32x4
-	OpShiftLeftConcatMaskedInt32x8
-	OpShiftLeftConcatMaskedInt32x16
-	OpShiftLeftConcatMaskedInt64x2
-	OpShiftLeftConcatMaskedInt64x4
-	OpShiftLeftConcatMaskedInt64x8
-	OpShiftLeftConcatMaskedUint16x8
-	OpShiftLeftConcatMaskedUint16x16
-	OpShiftLeftConcatMaskedUint16x32
-	OpShiftLeftConcatMaskedUint32x4
-	OpShiftLeftConcatMaskedUint32x8
-	OpShiftLeftConcatMaskedUint32x16
-	OpShiftLeftConcatMaskedUint64x2
-	OpShiftLeftConcatMaskedUint64x4
-	OpShiftLeftConcatMaskedUint64x8
 	OpShiftLeftConcatUint16x8
 	OpShiftLeftConcatUint16x16
 	OpShiftLeftConcatUint16x32
@@ -6037,24 +5413,6 @@ const (
 	OpShiftLeftInt64x2
 	OpShiftLeftInt64x4
 	OpShiftLeftInt64x8
-	OpShiftLeftMaskedInt16x8
-	OpShiftLeftMaskedInt16x16
-	OpShiftLeftMaskedInt16x32
-	OpShiftLeftMaskedInt32x4
-	OpShiftLeftMaskedInt32x8
-	OpShiftLeftMaskedInt32x16
-	OpShiftLeftMaskedInt64x2
-	OpShiftLeftMaskedInt64x4
-	OpShiftLeftMaskedInt64x8
-	OpShiftLeftMaskedUint16x8
-	OpShiftLeftMaskedUint16x16
-	OpShiftLeftMaskedUint16x32
-	OpShiftLeftMaskedUint32x4
-	OpShiftLeftMaskedUint32x8
-	OpShiftLeftMaskedUint32x16
-	OpShiftLeftMaskedUint64x2
-	OpShiftLeftMaskedUint64x4
-	OpShiftLeftMaskedUint64x8
 	OpShiftLeftUint16x8
 	OpShiftLeftUint16x16
 	OpShiftLeftUint16x32
@@ -6073,24 +5431,6 @@ const (
 	OpShiftRightConcatInt64x2
 	OpShiftRightConcatInt64x4
 	OpShiftRightConcatInt64x8
-	OpShiftRightConcatMaskedInt16x8
-	OpShiftRightConcatMaskedInt16x16
-	OpShiftRightConcatMaskedInt16x32
-	OpShiftRightConcatMaskedInt32x4
-	OpShiftRightConcatMaskedInt32x8
-	OpShiftRightConcatMaskedInt32x16
-	OpShiftRightConcatMaskedInt64x2
-	OpShiftRightConcatMaskedInt64x4
-	OpShiftRightConcatMaskedInt64x8
-	OpShiftRightConcatMaskedUint16x8
-	OpShiftRightConcatMaskedUint16x16
-	OpShiftRightConcatMaskedUint16x32
-	OpShiftRightConcatMaskedUint32x4
-	OpShiftRightConcatMaskedUint32x8
-	OpShiftRightConcatMaskedUint32x16
-	OpShiftRightConcatMaskedUint64x2
-	OpShiftRightConcatMaskedUint64x4
-	OpShiftRightConcatMaskedUint64x8
 	OpShiftRightConcatUint16x8
 	OpShiftRightConcatUint16x16
 	OpShiftRightConcatUint16x32
@@ -6109,24 +5449,6 @@ const (
 	OpShiftRightInt64x2
 	OpShiftRightInt64x4
 	OpShiftRightInt64x8
-	OpShiftRightMaskedInt16x8
-	OpShiftRightMaskedInt16x16
-	OpShiftRightMaskedInt16x32
-	OpShiftRightMaskedInt32x4
-	OpShiftRightMaskedInt32x8
-	OpShiftRightMaskedInt32x16
-	OpShiftRightMaskedInt64x2
-	OpShiftRightMaskedInt64x4
-	OpShiftRightMaskedInt64x8
-	OpShiftRightMaskedUint16x8
-	OpShiftRightMaskedUint16x16
-	OpShiftRightMaskedUint16x32
-	OpShiftRightMaskedUint32x4
-	OpShiftRightMaskedUint32x8
-	OpShiftRightMaskedUint32x16
-	OpShiftRightMaskedUint64x2
-	OpShiftRightMaskedUint64x4
-	OpShiftRightMaskedUint64x8
 	OpShiftRightUint16x8
 	OpShiftRightUint16x16
 	OpShiftRightUint16x32
@@ -6142,12 +5464,6 @@ const (
 	OpSqrtFloat64x2
 	OpSqrtFloat64x4
 	OpSqrtFloat64x8
-	OpSqrtMaskedFloat32x4
-	OpSqrtMaskedFloat32x8
-	OpSqrtMaskedFloat32x16
-	OpSqrtMaskedFloat64x2
-	OpSqrtMaskedFloat64x4
-	OpSqrtMaskedFloat64x8
 	OpSubFloat32x4
 	OpSubFloat32x8
 	OpSubFloat32x16
@@ -6166,36 +5482,6 @@ const (
 	OpSubInt64x2
 	OpSubInt64x4
 	OpSubInt64x8
-	OpSubMaskedFloat32x4
-	OpSubMaskedFloat32x8
-	OpSubMaskedFloat32x16
-	OpSubMaskedFloat64x2
-	OpSubMaskedFloat64x4
-	OpSubMaskedFloat64x8
-	OpSubMaskedInt8x16
-	OpSubMaskedInt8x32
-	OpSubMaskedInt8x64
-	OpSubMaskedInt16x8
-	OpSubMaskedInt16x16
-	OpSubMaskedInt16x32
-	OpSubMaskedInt32x4
-	OpSubMaskedInt32x8
-	OpSubMaskedInt32x16
-	OpSubMaskedInt64x2
-	OpSubMaskedInt64x4
-	OpSubMaskedInt64x8
-	OpSubMaskedUint8x16
-	OpSubMaskedUint8x32
-	OpSubMaskedUint8x64
-	OpSubMaskedUint16x8
-	OpSubMaskedUint16x16
-	OpSubMaskedUint16x32
-	OpSubMaskedUint32x4
-	OpSubMaskedUint32x8
-	OpSubMaskedUint32x16
-	OpSubMaskedUint64x2
-	OpSubMaskedUint64x4
-	OpSubMaskedUint64x8
 	OpSubPairsFloat32x4
 	OpSubPairsFloat32x8
 	OpSubPairsFloat64x2
@@ -6216,18 +5502,6 @@ const (
 	OpSubSaturatedInt16x8
 	OpSubSaturatedInt16x16
 	OpSubSaturatedInt16x32
-	OpSubSaturatedMaskedInt8x16
-	OpSubSaturatedMaskedInt8x32
-	OpSubSaturatedMaskedInt8x64
-	OpSubSaturatedMaskedInt16x8
-	OpSubSaturatedMaskedInt16x16
-	OpSubSaturatedMaskedInt16x32
-	OpSubSaturatedMaskedUint8x16
-	OpSubSaturatedMaskedUint8x32
-	OpSubSaturatedMaskedUint8x64
-	OpSubSaturatedMaskedUint16x8
-	OpSubSaturatedMaskedUint16x16
-	OpSubSaturatedMaskedUint16x32
 	OpSubSaturatedUint8x16
 	OpSubSaturatedUint8x32
 	OpSubSaturatedUint8x64
@@ -6262,18 +5536,6 @@ const (
 	OpXorInt64x2
 	OpXorInt64x4
 	OpXorInt64x8
-	OpXorMaskedInt32x4
-	OpXorMaskedInt32x8
-	OpXorMaskedInt32x16
-	OpXorMaskedInt64x2
-	OpXorMaskedInt64x4
-	OpXorMaskedInt64x8
-	OpXorMaskedUint32x4
-	OpXorMaskedUint32x8
-	OpXorMaskedUint32x16
-	OpXorMaskedUint64x2
-	OpXorMaskedUint64x4
-	OpXorMaskedUint64x8
 	OpXorUint8x16
 	OpXorUint8x32
 	OpXorUint8x64
@@ -6298,57 +5560,27 @@ const (
 	OpCeilScaledFloat64x2
 	OpCeilScaledFloat64x4
 	OpCeilScaledFloat64x8
-	OpCeilScaledMaskedFloat32x4
-	OpCeilScaledMaskedFloat32x8
-	OpCeilScaledMaskedFloat32x16
-	OpCeilScaledMaskedFloat64x2
-	OpCeilScaledMaskedFloat64x4
-	OpCeilScaledMaskedFloat64x8
 	OpCeilScaledResidueFloat32x4
 	OpCeilScaledResidueFloat32x8
 	OpCeilScaledResidueFloat32x16
 	OpCeilScaledResidueFloat64x2
 	OpCeilScaledResidueFloat64x4
 	OpCeilScaledResidueFloat64x8
-	OpCeilScaledResidueMaskedFloat32x4
-	OpCeilScaledResidueMaskedFloat32x8
-	OpCeilScaledResidueMaskedFloat32x16
-	OpCeilScaledResidueMaskedFloat64x2
-	OpCeilScaledResidueMaskedFloat64x4
-	OpCeilScaledResidueMaskedFloat64x8
 	OpFloorScaledFloat32x4
 	OpFloorScaledFloat32x8
 	OpFloorScaledFloat32x16
 	OpFloorScaledFloat64x2
 	OpFloorScaledFloat64x4
 	OpFloorScaledFloat64x8
-	OpFloorScaledMaskedFloat32x4
-	OpFloorScaledMaskedFloat32x8
-	OpFloorScaledMaskedFloat32x16
-	OpFloorScaledMaskedFloat64x2
-	OpFloorScaledMaskedFloat64x4
-	OpFloorScaledMaskedFloat64x8
 	OpFloorScaledResidueFloat32x4
 	OpFloorScaledResidueFloat32x8
 	OpFloorScaledResidueFloat32x16
 	OpFloorScaledResidueFloat64x2
 	OpFloorScaledResidueFloat64x4
 	OpFloorScaledResidueFloat64x8
-	OpFloorScaledResidueMaskedFloat32x4
-	OpFloorScaledResidueMaskedFloat32x8
-	OpFloorScaledResidueMaskedFloat32x16
-	OpFloorScaledResidueMaskedFloat64x2
-	OpFloorScaledResidueMaskedFloat64x4
-	OpFloorScaledResidueMaskedFloat64x8
-	OpGaloisFieldAffineTransformInverseMaskedUint8x16
-	OpGaloisFieldAffineTransformInverseMaskedUint8x32
-	OpGaloisFieldAffineTransformInverseMaskedUint8x64
 	OpGaloisFieldAffineTransformInverseUint8x16
 	OpGaloisFieldAffineTransformInverseUint8x32
 	OpGaloisFieldAffineTransformInverseUint8x64
-	OpGaloisFieldAffineTransformMaskedUint8x16
-	OpGaloisFieldAffineTransformMaskedUint8x32
-	OpGaloisFieldAffineTransformMaskedUint8x64
 	OpGaloisFieldAffineTransformUint8x16
 	OpGaloisFieldAffineTransformUint8x32
 	OpGaloisFieldAffineTransformUint8x64
@@ -6368,18 +5600,6 @@ const (
 	OpRotateAllLeftInt64x2
 	OpRotateAllLeftInt64x4
 	OpRotateAllLeftInt64x8
-	OpRotateAllLeftMaskedInt32x4
-	OpRotateAllLeftMaskedInt32x8
-	OpRotateAllLeftMaskedInt32x16
-	OpRotateAllLeftMaskedInt64x2
-	OpRotateAllLeftMaskedInt64x4
-	OpRotateAllLeftMaskedInt64x8
-	OpRotateAllLeftMaskedUint32x4
-	OpRotateAllLeftMaskedUint32x8
-	OpRotateAllLeftMaskedUint32x16
-	OpRotateAllLeftMaskedUint64x2
-	OpRotateAllLeftMaskedUint64x4
-	OpRotateAllLeftMaskedUint64x8
 	OpRotateAllLeftUint32x4
 	OpRotateAllLeftUint32x8
 	OpRotateAllLeftUint32x16
@@ -6392,18 +5612,6 @@ const (
 	OpRotateAllRightInt64x2
 	OpRotateAllRightInt64x4
 	OpRotateAllRightInt64x8
-	OpRotateAllRightMaskedInt32x4
-	OpRotateAllRightMaskedInt32x8
-	OpRotateAllRightMaskedInt32x16
-	OpRotateAllRightMaskedInt64x2
-	OpRotateAllRightMaskedInt64x4
-	OpRotateAllRightMaskedInt64x8
-	OpRotateAllRightMaskedUint32x4
-	OpRotateAllRightMaskedUint32x8
-	OpRotateAllRightMaskedUint32x16
-	OpRotateAllRightMaskedUint64x2
-	OpRotateAllRightMaskedUint64x4
-	OpRotateAllRightMaskedUint64x8
 	OpRotateAllRightUint32x4
 	OpRotateAllRightUint32x8
 	OpRotateAllRightUint32x16
@@ -6416,24 +5624,12 @@ const (
 	OpRoundToEvenScaledFloat64x2
 	OpRoundToEvenScaledFloat64x4
 	OpRoundToEvenScaledFloat64x8
-	OpRoundToEvenScaledMaskedFloat32x4
-	OpRoundToEvenScaledMaskedFloat32x8
-	OpRoundToEvenScaledMaskedFloat32x16
-	OpRoundToEvenScaledMaskedFloat64x2
-	OpRoundToEvenScaledMaskedFloat64x4
-	OpRoundToEvenScaledMaskedFloat64x8
 	OpRoundToEvenScaledResidueFloat32x4
 	OpRoundToEvenScaledResidueFloat32x8
 	OpRoundToEvenScaledResidueFloat32x16
 	OpRoundToEvenScaledResidueFloat64x2
 	OpRoundToEvenScaledResidueFloat64x4
 	OpRoundToEvenScaledResidueFloat64x8
-	OpRoundToEvenScaledResidueMaskedFloat32x4
-	OpRoundToEvenScaledResidueMaskedFloat32x8
-	OpRoundToEvenScaledResidueMaskedFloat32x16
-	OpRoundToEvenScaledResidueMaskedFloat64x2
-	OpRoundToEvenScaledResidueMaskedFloat64x4
-	OpRoundToEvenScaledResidueMaskedFloat64x8
 	OpSetElemFloat32x4
 	OpSetElemFloat64x2
 	OpSetElemInt8x16
@@ -6453,24 +5649,6 @@ const (
 	OpShiftAllLeftConcatInt64x2
 	OpShiftAllLeftConcatInt64x4
 	OpShiftAllLeftConcatInt64x8
-	OpShiftAllLeftConcatMaskedInt16x8
-	OpShiftAllLeftConcatMaskedInt16x16
-	OpShiftAllLeftConcatMaskedInt16x32
-	OpShiftAllLeftConcatMaskedInt32x4
-	OpShiftAllLeftConcatMaskedInt32x8
-	OpShiftAllLeftConcatMaskedInt32x16
-	OpShiftAllLeftConcatMaskedInt64x2
-	OpShiftAllLeftConcatMaskedInt64x4
-	OpShiftAllLeftConcatMaskedInt64x8
-	OpShiftAllLeftConcatMaskedUint16x8
-	OpShiftAllLeftConcatMaskedUint16x16
-	OpShiftAllLeftConcatMaskedUint16x32
-	OpShiftAllLeftConcatMaskedUint32x4
-	OpShiftAllLeftConcatMaskedUint32x8
-	OpShiftAllLeftConcatMaskedUint32x16
-	OpShiftAllLeftConcatMaskedUint64x2
-	OpShiftAllLeftConcatMaskedUint64x4
-	OpShiftAllLeftConcatMaskedUint64x8
 	OpShiftAllLeftConcatUint16x8
 	OpShiftAllLeftConcatUint16x16
 	OpShiftAllLeftConcatUint16x32
@@ -6489,24 +5667,6 @@ const (
 	OpShiftAllRightConcatInt64x2
 	OpShiftAllRightConcatInt64x4
 	OpShiftAllRightConcatInt64x8
-	OpShiftAllRightConcatMaskedInt16x8
-	OpShiftAllRightConcatMaskedInt16x16
-	OpShiftAllRightConcatMaskedInt16x32
-	OpShiftAllRightConcatMaskedInt32x4
-	OpShiftAllRightConcatMaskedInt32x8
-	OpShiftAllRightConcatMaskedInt32x16
-	OpShiftAllRightConcatMaskedInt64x2
-	OpShiftAllRightConcatMaskedInt64x4
-	OpShiftAllRightConcatMaskedInt64x8
-	OpShiftAllRightConcatMaskedUint16x8
-	OpShiftAllRightConcatMaskedUint16x16
-	OpShiftAllRightConcatMaskedUint16x32
-	OpShiftAllRightConcatMaskedUint32x4
-	OpShiftAllRightConcatMaskedUint32x8
-	OpShiftAllRightConcatMaskedUint32x16
-	OpShiftAllRightConcatMaskedUint64x2
-	OpShiftAllRightConcatMaskedUint64x4
-	OpShiftAllRightConcatMaskedUint64x8
 	OpShiftAllRightConcatUint16x8
 	OpShiftAllRightConcatUint16x16
 	OpShiftAllRightConcatUint16x32
@@ -6522,24 +5682,12 @@ const (
 	OpTruncScaledFloat64x2
 	OpTruncScaledFloat64x4
 	OpTruncScaledFloat64x8
-	OpTruncScaledMaskedFloat32x4
-	OpTruncScaledMaskedFloat32x8
-	OpTruncScaledMaskedFloat32x16
-	OpTruncScaledMaskedFloat64x2
-	OpTruncScaledMaskedFloat64x4
-	OpTruncScaledMaskedFloat64x8
 	OpTruncScaledResidueFloat32x4
 	OpTruncScaledResidueFloat32x8
 	OpTruncScaledResidueFloat32x16
 	OpTruncScaledResidueFloat64x2
 	OpTruncScaledResidueFloat64x4
 	OpTruncScaledResidueFloat64x8
-	OpTruncScaledResidueMaskedFloat32x4
-	OpTruncScaledResidueMaskedFloat32x8
-	OpTruncScaledResidueMaskedFloat32x16
-	OpTruncScaledResidueMaskedFloat64x2
-	OpTruncScaledResidueMaskedFloat64x4
-	OpTruncScaledResidueMaskedFloat64x8
 )
 
 var opcodeTable = [...]opInfo{
@@ -63838,66 +62986,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "AbsMaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt8x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt8x64",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "AddDotProdPairsSaturatedInt32x4",
 		argLen:  3,
@@ -63913,21 +63001,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "AddDotProdPairsSaturatedMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdPairsSaturatedMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdPairsSaturatedMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "AddDotProdQuadrupleInt32x4",
 		argLen:  3,
@@ -63943,21 +63016,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "AddDotProdQuadrupleMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "AddDotProdQuadrupleSaturatedInt32x4",
 		argLen:  3,
@@ -63973,21 +63031,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "AddDotProdQuadrupleSaturatedMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleSaturatedMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleSaturatedMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:        "AddFloat32x4",
 		argLen:      2,
@@ -64096,186 +63139,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AddMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "AddPairsFloat32x4",
 		argLen:  2,
@@ -64382,78 +63245,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AddSaturatedMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "AddSaturatedUint8x16",
 		argLen:      2,
@@ -64654,78 +63445,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AndMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "AndNotInt8x16",
 		argLen:  2,
@@ -64786,66 +63505,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "AndNotMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "AndNotUint8x16",
 		argLen:  2,
@@ -64978,42 +63637,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AverageMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "AverageUint8x16",
 		argLen:      2,
@@ -65080,56 +63703,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "Broadcast128MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "Broadcast128Uint8x16",
 		argLen:  1,
@@ -65180,56 +63753,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "Broadcast256MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "Broadcast256Uint8x16",
 		argLen:  1,
@@ -65280,56 +63803,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "Broadcast512MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "Broadcast512Uint8x16",
 		argLen:  1,
@@ -65535,21 +64008,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ConvertToInt32MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToInt32MaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToInt32MaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "ConvertToUint32Float32x4",
 		argLen:  1,
@@ -65565,21 +64023,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ConvertToUint32MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToUint32MaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToUint32MaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "CopySignInt8x16",
 		argLen:  2,
@@ -65640,36 +64083,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "DivMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "DotProdPairsInt16x8",
 		argLen:  2,
@@ -65685,36 +64098,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "DotProdPairsMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsSaturatedMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsSaturatedMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsSaturatedMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "DotProdPairsSaturatedUint8x16",
 		argLen:  2,
@@ -65838,186 +64221,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "EqualMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "EqualUint8x16",
 		argLen:      2,
@@ -66260,21 +64463,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "GaloisFieldMulMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldMulMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldMulMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GaloisFieldMulUint8x16",
 		argLen:  2,
@@ -66540,156 +64728,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualUint8x64",
 		argLen:  2,
@@ -66800,156 +64838,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GreaterUint8x64",
 		argLen:  2,
@@ -67006,42 +64894,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "IsNanMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "LessEqualFloat32x4",
 		argLen:  2,
@@ -67092,156 +64944,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "LessEqualUint8x64",
 		argLen:  2,
@@ -67312,156 +65014,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "LessUint8x64",
 		argLen:  2,
@@ -67590,186 +65142,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MaxMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "MaxUint8x16",
 		argLen:      2,
@@ -67950,186 +65322,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MinMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "MinUint8x16",
 		argLen:      2,
@@ -68232,36 +65424,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "MulAddMaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "MulAddSubFloat32x4",
 		argLen:  3,
@@ -68292,36 +65454,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "MulAddSubMaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:        "MulEvenWidenInt32x4",
 		argLen:      2,
@@ -68400,42 +65532,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MulHighMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "MulHighUint16x8",
 		argLen:      2,
@@ -68508,150 +65604,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MulMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "MulSubAddFloat32x4",
 		argLen:  3,
@@ -68682,36 +65634,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "MulSubAddMaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:        "MulUint16x8",
 		argLen:      2,
@@ -68826,186 +65748,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualUint8x64",
 		argLen:      2,
@@ -69090,126 +65832,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "OnesCountMaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt8x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt8x64",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt64x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint8x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint8x64",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "OnesCountUint8x16",
 		argLen:  1,
@@ -69342,78 +65964,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "OrMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "OrUint8x16",
 		argLen:      2,
@@ -69576,156 +66126,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "Permute2MaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt8x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt8x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt8x64",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint8x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint8x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint8x64",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "Permute2Uint8x16",
 		argLen:  3,
@@ -69856,126 +66256,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "PermuteMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "PermuteUint8x16",
 		argLen:  2,
@@ -70056,36 +66336,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ReciprocalMaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "ReciprocalSqrtFloat32x4",
 		argLen:  1,
@@ -70116,36 +66366,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RotateLeftInt32x4",
 		argLen:  2,
@@ -70176,66 +66396,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "RotateLeftMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "RotateLeftUint32x4",
 		argLen:  2,
@@ -70296,66 +66456,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "RotateRightMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "RotateRightUint32x4",
 		argLen:  2,
@@ -70436,36 +66536,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ScaleMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SetHiFloat32x8",
 		argLen:  2,
@@ -70711,96 +66781,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllLeftMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllLeftUint16x8",
 		argLen:  2,
@@ -70891,96 +66871,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllRightUint16x8",
 		argLen:  2,
@@ -71071,96 +66961,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftLeftConcatMaskedInt16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftConcatUint16x8",
 		argLen:  3,
@@ -71251,96 +67051,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftLeftMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftUint16x8",
 		argLen:  2,
@@ -71431,96 +67141,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightConcatMaskedInt16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "ShiftRightConcatUint16x8",
 		argLen:  3,
@@ -71611,96 +67231,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftRightUint16x8",
 		argLen:  2,
@@ -71776,36 +67306,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "SqrtMaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "SubFloat32x4",
 		argLen:  2,
@@ -71896,156 +67396,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "SubMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubPairsFloat32x4",
 		argLen:  2,
@@ -72146,66 +67496,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "SubSaturatedMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubSaturatedUint8x16",
 		argLen:  2,
@@ -72388,78 +67678,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "XorMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "XorUint8x16",
 		argLen:      2,
@@ -72598,42 +67816,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "CeilScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "CeilScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -72670,42 +67852,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "CeilScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "FloorScaledFloat32x4",
 		auxType: auxUInt8,
@@ -72742,42 +67888,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "FloorScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "FloorScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -72814,60 +67924,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "FloorScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformInverseMaskedUint8x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformInverseMaskedUint8x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformInverseMaskedUint8x64",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GaloisFieldAffineTransformInverseUint8x16",
 		auxType: auxUInt8,
@@ -72886,24 +67942,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GaloisFieldAffineTransformMaskedUint8x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformMaskedUint8x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformMaskedUint8x64",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GaloisFieldAffineTransformUint8x16",
 		auxType: auxUInt8,
@@ -73018,78 +68056,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RotateAllLeftMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RotateAllLeftUint32x4",
 		auxType: auxUInt8,
@@ -73162,78 +68128,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RotateAllRightMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RotateAllRightUint32x4",
 		auxType: auxUInt8,
@@ -73306,42 +68200,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RoundToEvenScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -73378,42 +68236,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "SetElemFloat32x4",
 		auxType: auxUInt8,
@@ -73528,114 +68350,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllLeftConcatUint16x8",
 		auxType: auxUInt8,
@@ -73744,114 +68458,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllRightConcatUint16x8",
 		auxType: auxUInt8,
@@ -73942,42 +68548,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "TruncScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "TruncScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -74014,42 +68584,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "TruncScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 }
 
 func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 69393014c78..87b1e0586d7 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -537,72 +537,36 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpAMD64VPSLLD256(v)
 	case OpAMD64VPSLLD512:
 		return rewriteValueAMD64_OpAMD64VPSLLD512(v)
-	case OpAMD64VPSLLDMasked128:
-		return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v)
-	case OpAMD64VPSLLDMasked256:
-		return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v)
-	case OpAMD64VPSLLDMasked512:
-		return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v)
 	case OpAMD64VPSLLQ128:
 		return rewriteValueAMD64_OpAMD64VPSLLQ128(v)
 	case OpAMD64VPSLLQ256:
 		return rewriteValueAMD64_OpAMD64VPSLLQ256(v)
 	case OpAMD64VPSLLQ512:
 		return rewriteValueAMD64_OpAMD64VPSLLQ512(v)
-	case OpAMD64VPSLLQMasked128:
-		return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v)
-	case OpAMD64VPSLLQMasked256:
-		return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v)
-	case OpAMD64VPSLLQMasked512:
-		return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v)
 	case OpAMD64VPSLLW128:
 		return rewriteValueAMD64_OpAMD64VPSLLW128(v)
 	case OpAMD64VPSLLW256:
 		return rewriteValueAMD64_OpAMD64VPSLLW256(v)
 	case OpAMD64VPSLLW512:
 		return rewriteValueAMD64_OpAMD64VPSLLW512(v)
-	case OpAMD64VPSLLWMasked128:
-		return rewriteValueAMD64_OpAMD64VPSLLWMasked128(v)
-	case OpAMD64VPSLLWMasked256:
-		return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v)
-	case OpAMD64VPSLLWMasked512:
-		return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v)
 	case OpAMD64VPSRAD128:
 		return rewriteValueAMD64_OpAMD64VPSRAD128(v)
 	case OpAMD64VPSRAD256:
 		return rewriteValueAMD64_OpAMD64VPSRAD256(v)
 	case OpAMD64VPSRAD512:
 		return rewriteValueAMD64_OpAMD64VPSRAD512(v)
-	case OpAMD64VPSRADMasked128:
-		return rewriteValueAMD64_OpAMD64VPSRADMasked128(v)
-	case OpAMD64VPSRADMasked256:
-		return rewriteValueAMD64_OpAMD64VPSRADMasked256(v)
-	case OpAMD64VPSRADMasked512:
-		return rewriteValueAMD64_OpAMD64VPSRADMasked512(v)
 	case OpAMD64VPSRAQ128:
 		return rewriteValueAMD64_OpAMD64VPSRAQ128(v)
 	case OpAMD64VPSRAQ256:
 		return rewriteValueAMD64_OpAMD64VPSRAQ256(v)
 	case OpAMD64VPSRAQ512:
 		return rewriteValueAMD64_OpAMD64VPSRAQ512(v)
-	case OpAMD64VPSRAQMasked128:
-		return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v)
-	case OpAMD64VPSRAQMasked256:
-		return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v)
-	case OpAMD64VPSRAQMasked512:
-		return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v)
 	case OpAMD64VPSRAW128:
 		return rewriteValueAMD64_OpAMD64VPSRAW128(v)
 	case OpAMD64VPSRAW256:
 		return rewriteValueAMD64_OpAMD64VPSRAW256(v)
 	case OpAMD64VPSRAW512:
 		return rewriteValueAMD64_OpAMD64VPSRAW512(v)
-	case OpAMD64VPSRAWMasked128:
-		return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v)
-	case OpAMD64VPSRAWMasked256:
-		return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
-	case OpAMD64VPSRAWMasked512:
-		return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
 	case OpAMD64XADDLlock:
 		return rewriteValueAMD64_OpAMD64XADDLlock(v)
 	case OpAMD64XADDQlock:
@@ -667,30 +631,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAbsInt8x64:
 		v.Op = OpAMD64VPABSB512
 		return true
-	case OpAbsMaskedInt16x16:
-		return rewriteValueAMD64_OpAbsMaskedInt16x16(v)
-	case OpAbsMaskedInt16x32:
-		return rewriteValueAMD64_OpAbsMaskedInt16x32(v)
-	case OpAbsMaskedInt16x8:
-		return rewriteValueAMD64_OpAbsMaskedInt16x8(v)
-	case OpAbsMaskedInt32x16:
-		return rewriteValueAMD64_OpAbsMaskedInt32x16(v)
-	case OpAbsMaskedInt32x4:
-		return rewriteValueAMD64_OpAbsMaskedInt32x4(v)
-	case OpAbsMaskedInt32x8:
-		return rewriteValueAMD64_OpAbsMaskedInt32x8(v)
-	case OpAbsMaskedInt64x2:
-		return rewriteValueAMD64_OpAbsMaskedInt64x2(v)
-	case OpAbsMaskedInt64x4:
-		return rewriteValueAMD64_OpAbsMaskedInt64x4(v)
-	case OpAbsMaskedInt64x8:
-		return rewriteValueAMD64_OpAbsMaskedInt64x8(v)
-	case OpAbsMaskedInt8x16:
-		return rewriteValueAMD64_OpAbsMaskedInt8x16(v)
-	case OpAbsMaskedInt8x32:
-		return rewriteValueAMD64_OpAbsMaskedInt8x32(v)
-	case OpAbsMaskedInt8x64:
-		return rewriteValueAMD64_OpAbsMaskedInt8x64(v)
 	case OpAdd16:
 		v.Op = OpAMD64ADDL
 		return true
@@ -718,12 +658,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddDotProdPairsSaturatedInt32x8:
 		v.Op = OpAMD64VPDPWSSDS256
 		return true
-	case OpAddDotProdPairsSaturatedMaskedInt32x16:
-		return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v)
-	case OpAddDotProdPairsSaturatedMaskedInt32x4:
-		return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v)
-	case OpAddDotProdPairsSaturatedMaskedInt32x8:
-		return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v)
 	case OpAddDotProdQuadrupleInt32x16:
 		v.Op = OpAMD64VPDPBUSD512
 		return true
@@ -733,12 +667,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddDotProdQuadrupleInt32x8:
 		v.Op = OpAMD64VPDPBUSD256
 		return true
-	case OpAddDotProdQuadrupleMaskedInt32x16:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v)
-	case OpAddDotProdQuadrupleMaskedInt32x4:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v)
-	case OpAddDotProdQuadrupleMaskedInt32x8:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v)
 	case OpAddDotProdQuadrupleSaturatedInt32x16:
 		v.Op = OpAMD64VPDPBUSDS512
 		return true
@@ -748,12 +676,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddDotProdQuadrupleSaturatedInt32x8:
 		v.Op = OpAMD64VPDPBUSDS256
 		return true
-	case OpAddDotProdQuadrupleSaturatedMaskedInt32x16:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v)
-	case OpAddDotProdQuadrupleSaturatedMaskedInt32x4:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v)
-	case OpAddDotProdQuadrupleSaturatedMaskedInt32x8:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v)
 	case OpAddFloat32x16:
 		v.Op = OpAMD64VADDPS512
 		return true
@@ -808,66 +730,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddInt8x64:
 		v.Op = OpAMD64VPADDB512
 		return true
-	case OpAddMaskedFloat32x16:
-		return rewriteValueAMD64_OpAddMaskedFloat32x16(v)
-	case OpAddMaskedFloat32x4:
-		return rewriteValueAMD64_OpAddMaskedFloat32x4(v)
-	case OpAddMaskedFloat32x8:
-		return rewriteValueAMD64_OpAddMaskedFloat32x8(v)
-	case OpAddMaskedFloat64x2:
-		return rewriteValueAMD64_OpAddMaskedFloat64x2(v)
-	case OpAddMaskedFloat64x4:
-		return rewriteValueAMD64_OpAddMaskedFloat64x4(v)
-	case OpAddMaskedFloat64x8:
-		return rewriteValueAMD64_OpAddMaskedFloat64x8(v)
-	case OpAddMaskedInt16x16:
-		return rewriteValueAMD64_OpAddMaskedInt16x16(v)
-	case OpAddMaskedInt16x32:
-		return rewriteValueAMD64_OpAddMaskedInt16x32(v)
-	case OpAddMaskedInt16x8:
-		return rewriteValueAMD64_OpAddMaskedInt16x8(v)
-	case OpAddMaskedInt32x16:
-		return rewriteValueAMD64_OpAddMaskedInt32x16(v)
-	case OpAddMaskedInt32x4:
-		return rewriteValueAMD64_OpAddMaskedInt32x4(v)
-	case OpAddMaskedInt32x8:
-		return rewriteValueAMD64_OpAddMaskedInt32x8(v)
-	case OpAddMaskedInt64x2:
-		return rewriteValueAMD64_OpAddMaskedInt64x2(v)
-	case OpAddMaskedInt64x4:
-		return rewriteValueAMD64_OpAddMaskedInt64x4(v)
-	case OpAddMaskedInt64x8:
-		return rewriteValueAMD64_OpAddMaskedInt64x8(v)
-	case OpAddMaskedInt8x16:
-		return rewriteValueAMD64_OpAddMaskedInt8x16(v)
-	case OpAddMaskedInt8x32:
-		return rewriteValueAMD64_OpAddMaskedInt8x32(v)
-	case OpAddMaskedInt8x64:
-		return rewriteValueAMD64_OpAddMaskedInt8x64(v)
-	case OpAddMaskedUint16x16:
-		return rewriteValueAMD64_OpAddMaskedUint16x16(v)
-	case OpAddMaskedUint16x32:
-		return rewriteValueAMD64_OpAddMaskedUint16x32(v)
-	case OpAddMaskedUint16x8:
-		return rewriteValueAMD64_OpAddMaskedUint16x8(v)
-	case OpAddMaskedUint32x16:
-		return rewriteValueAMD64_OpAddMaskedUint32x16(v)
-	case OpAddMaskedUint32x4:
-		return rewriteValueAMD64_OpAddMaskedUint32x4(v)
-	case OpAddMaskedUint32x8:
-		return rewriteValueAMD64_OpAddMaskedUint32x8(v)
-	case OpAddMaskedUint64x2:
-		return rewriteValueAMD64_OpAddMaskedUint64x2(v)
-	case OpAddMaskedUint64x4:
-		return rewriteValueAMD64_OpAddMaskedUint64x4(v)
-	case OpAddMaskedUint64x8:
-		return rewriteValueAMD64_OpAddMaskedUint64x8(v)
-	case OpAddMaskedUint8x16:
-		return rewriteValueAMD64_OpAddMaskedUint8x16(v)
-	case OpAddMaskedUint8x32:
-		return rewriteValueAMD64_OpAddMaskedUint8x32(v)
-	case OpAddMaskedUint8x64:
-		return rewriteValueAMD64_OpAddMaskedUint8x64(v)
 	case OpAddPairsFloat32x4:
 		v.Op = OpAMD64VHADDPS128
 		return true
@@ -931,30 +793,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddSaturatedInt8x64:
 		v.Op = OpAMD64VPADDSB512
 		return true
-	case OpAddSaturatedMaskedInt16x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v)
-	case OpAddSaturatedMaskedInt16x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v)
-	case OpAddSaturatedMaskedInt16x8:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v)
-	case OpAddSaturatedMaskedInt8x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v)
-	case OpAddSaturatedMaskedInt8x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v)
-	case OpAddSaturatedMaskedInt8x64:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v)
-	case OpAddSaturatedMaskedUint16x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v)
-	case OpAddSaturatedMaskedUint16x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v)
-	case OpAddSaturatedMaskedUint16x8:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v)
-	case OpAddSaturatedMaskedUint8x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v)
-	case OpAddSaturatedMaskedUint8x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v)
-	case OpAddSaturatedMaskedUint8x64:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v)
 	case OpAddSaturatedUint16x16:
 		v.Op = OpAMD64VPADDUSW256
 		return true
@@ -1074,30 +912,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndInt8x64:
 		v.Op = OpAMD64VPANDD512
 		return true
-	case OpAndMaskedInt32x16:
-		return rewriteValueAMD64_OpAndMaskedInt32x16(v)
-	case OpAndMaskedInt32x4:
-		return rewriteValueAMD64_OpAndMaskedInt32x4(v)
-	case OpAndMaskedInt32x8:
-		return rewriteValueAMD64_OpAndMaskedInt32x8(v)
-	case OpAndMaskedInt64x2:
-		return rewriteValueAMD64_OpAndMaskedInt64x2(v)
-	case OpAndMaskedInt64x4:
-		return rewriteValueAMD64_OpAndMaskedInt64x4(v)
-	case OpAndMaskedInt64x8:
-		return rewriteValueAMD64_OpAndMaskedInt64x8(v)
-	case OpAndMaskedUint32x16:
-		return rewriteValueAMD64_OpAndMaskedUint32x16(v)
-	case OpAndMaskedUint32x4:
-		return rewriteValueAMD64_OpAndMaskedUint32x4(v)
-	case OpAndMaskedUint32x8:
-		return rewriteValueAMD64_OpAndMaskedUint32x8(v)
-	case OpAndMaskedUint64x2:
-		return rewriteValueAMD64_OpAndMaskedUint64x2(v)
-	case OpAndMaskedUint64x4:
-		return rewriteValueAMD64_OpAndMaskedUint64x4(v)
-	case OpAndMaskedUint64x8:
-		return rewriteValueAMD64_OpAndMaskedUint64x8(v)
 	case OpAndNotInt16x16:
 		v.Op = OpAMD64VPANDN256
 		return true
@@ -1134,30 +948,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndNotInt8x64:
 		v.Op = OpAMD64VPANDND512
 		return true
-	case OpAndNotMaskedInt32x16:
-		return rewriteValueAMD64_OpAndNotMaskedInt32x16(v)
-	case OpAndNotMaskedInt32x4:
-		return rewriteValueAMD64_OpAndNotMaskedInt32x4(v)
-	case OpAndNotMaskedInt32x8:
-		return rewriteValueAMD64_OpAndNotMaskedInt32x8(v)
-	case OpAndNotMaskedInt64x2:
-		return rewriteValueAMD64_OpAndNotMaskedInt64x2(v)
-	case OpAndNotMaskedInt64x4:
-		return rewriteValueAMD64_OpAndNotMaskedInt64x4(v)
-	case OpAndNotMaskedInt64x8:
-		return rewriteValueAMD64_OpAndNotMaskedInt64x8(v)
-	case OpAndNotMaskedUint32x16:
-		return rewriteValueAMD64_OpAndNotMaskedUint32x16(v)
-	case OpAndNotMaskedUint32x4:
-		return rewriteValueAMD64_OpAndNotMaskedUint32x4(v)
-	case OpAndNotMaskedUint32x8:
-		return rewriteValueAMD64_OpAndNotMaskedUint32x8(v)
-	case OpAndNotMaskedUint64x2:
-		return rewriteValueAMD64_OpAndNotMaskedUint64x2(v)
-	case OpAndNotMaskedUint64x4:
-		return rewriteValueAMD64_OpAndNotMaskedUint64x4(v)
-	case OpAndNotMaskedUint64x8:
-		return rewriteValueAMD64_OpAndNotMaskedUint64x8(v)
 	case OpAndNotUint16x16:
 		v.Op = OpAMD64VPANDN256
 		return true
@@ -1276,18 +1066,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpAtomicStore8(v)
 	case OpAtomicStorePtrNoWB:
 		return rewriteValueAMD64_OpAtomicStorePtrNoWB(v)
-	case OpAverageMaskedUint16x16:
-		return rewriteValueAMD64_OpAverageMaskedUint16x16(v)
-	case OpAverageMaskedUint16x32:
-		return rewriteValueAMD64_OpAverageMaskedUint16x32(v)
-	case OpAverageMaskedUint16x8:
-		return rewriteValueAMD64_OpAverageMaskedUint16x8(v)
-	case OpAverageMaskedUint8x16:
-		return rewriteValueAMD64_OpAverageMaskedUint8x16(v)
-	case OpAverageMaskedUint8x32:
-		return rewriteValueAMD64_OpAverageMaskedUint8x32(v)
-	case OpAverageMaskedUint8x64:
-		return rewriteValueAMD64_OpAverageMaskedUint8x64(v)
 	case OpAverageUint16x16:
 		v.Op = OpAMD64VPAVGW256
 		return true
@@ -1335,26 +1113,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpBroadcast128Int8x16:
 		v.Op = OpAMD64VPBROADCASTB128
 		return true
-	case OpBroadcast128MaskedFloat32x4:
-		return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
-	case OpBroadcast128MaskedFloat64x2:
-		return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
-	case OpBroadcast128MaskedInt16x8:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
-	case OpBroadcast128MaskedInt32x4:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
-	case OpBroadcast128MaskedInt64x2:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
-	case OpBroadcast128MaskedInt8x16:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
-	case OpBroadcast128MaskedUint16x8:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
-	case OpBroadcast128MaskedUint32x4:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
-	case OpBroadcast128MaskedUint64x2:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
-	case OpBroadcast128MaskedUint8x16:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
 	case OpBroadcast128Uint16x8:
 		v.Op = OpAMD64VPBROADCASTW128
 		return true
@@ -1385,26 +1143,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpBroadcast256Int8x16:
 		v.Op = OpAMD64VPBROADCASTB256
 		return true
-	case OpBroadcast256MaskedFloat32x4:
-		return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
-	case OpBroadcast256MaskedFloat64x2:
-		return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
-	case OpBroadcast256MaskedInt16x8:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
-	case OpBroadcast256MaskedInt32x4:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
-	case OpBroadcast256MaskedInt64x2:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
-	case OpBroadcast256MaskedInt8x16:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
-	case OpBroadcast256MaskedUint16x8:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
-	case OpBroadcast256MaskedUint32x4:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
-	case OpBroadcast256MaskedUint64x2:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
-	case OpBroadcast256MaskedUint8x16:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
 	case OpBroadcast256Uint16x8:
 		v.Op = OpAMD64VPBROADCASTW256
 		return true
@@ -1435,26 +1173,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpBroadcast512Int8x16:
 		v.Op = OpAMD64VPBROADCASTB512
 		return true
-	case OpBroadcast512MaskedFloat32x4:
-		return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
-	case OpBroadcast512MaskedFloat64x2:
-		return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
-	case OpBroadcast512MaskedInt16x8:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
-	case OpBroadcast512MaskedInt32x4:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
-	case OpBroadcast512MaskedInt64x2:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
-	case OpBroadcast512MaskedInt8x16:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
-	case OpBroadcast512MaskedUint16x8:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
-	case OpBroadcast512MaskedUint32x4:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
-	case OpBroadcast512MaskedUint64x2:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
-	case OpBroadcast512MaskedUint8x16:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
 	case OpBroadcast512Uint16x8:
 		v.Op = OpAMD64VPBROADCASTW512
 		return true
@@ -1497,18 +1215,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpCeilScaledFloat64x4(v)
 	case OpCeilScaledFloat64x8:
 		return rewriteValueAMD64_OpCeilScaledFloat64x8(v)
-	case OpCeilScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v)
-	case OpCeilScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v)
-	case OpCeilScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v)
-	case OpCeilScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v)
-	case OpCeilScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v)
-	case OpCeilScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v)
 	case OpCeilScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v)
 	case OpCeilScaledResidueFloat32x4:
@@ -1521,18 +1227,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v)
 	case OpCeilScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v)
-	case OpCeilScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v)
-	case OpCeilScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v)
-	case OpCeilScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v)
-	case OpCeilScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v)
-	case OpCeilScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v)
-	case OpCeilScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v)
 	case OpClosureCall:
 		v.Op = OpAMD64CALLclosure
 		return true
@@ -1639,12 +1333,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpConvertToInt32Float32x8:
 		v.Op = OpAMD64VCVTTPS2DQ256
 		return true
-	case OpConvertToInt32MaskedFloat32x16:
-		return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v)
-	case OpConvertToInt32MaskedFloat32x4:
-		return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v)
-	case OpConvertToInt32MaskedFloat32x8:
-		return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v)
 	case OpConvertToUint32Float32x16:
 		v.Op = OpAMD64VCVTPS2UDQ512
 		return true
@@ -1654,12 +1342,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpConvertToUint32Float32x8:
 		v.Op = OpAMD64VCVTPS2UDQ256
 		return true
-	case OpConvertToUint32MaskedFloat32x16:
-		return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v)
-	case OpConvertToUint32MaskedFloat32x4:
-		return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v)
-	case OpConvertToUint32MaskedFloat32x8:
-		return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v)
 	case OpCopySignInt16x16:
 		v.Op = OpAMD64VPSIGNW256
 		return true
@@ -1818,18 +1500,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpDivFloat64x8:
 		v.Op = OpAMD64VDIVPD512
 		return true
-	case OpDivMaskedFloat32x16:
-		return rewriteValueAMD64_OpDivMaskedFloat32x16(v)
-	case OpDivMaskedFloat32x4:
-		return rewriteValueAMD64_OpDivMaskedFloat32x4(v)
-	case OpDivMaskedFloat32x8:
-		return rewriteValueAMD64_OpDivMaskedFloat32x8(v)
-	case OpDivMaskedFloat64x2:
-		return rewriteValueAMD64_OpDivMaskedFloat64x2(v)
-	case OpDivMaskedFloat64x4:
-		return rewriteValueAMD64_OpDivMaskedFloat64x4(v)
-	case OpDivMaskedFloat64x8:
-		return rewriteValueAMD64_OpDivMaskedFloat64x8(v)
 	case OpDotProdPairsInt16x16:
 		v.Op = OpAMD64VPMADDWD256
 		return true
@@ -1839,18 +1509,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpDotProdPairsInt16x8:
 		v.Op = OpAMD64VPMADDWD128
 		return true
-	case OpDotProdPairsMaskedInt16x16:
-		return rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v)
-	case OpDotProdPairsMaskedInt16x32:
-		return rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v)
-	case OpDotProdPairsMaskedInt16x8:
-		return rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v)
-	case OpDotProdPairsSaturatedMaskedUint8x16:
-		return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v)
-	case OpDotProdPairsSaturatedMaskedUint8x32:
-		return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v)
-	case OpDotProdPairsSaturatedMaskedUint8x64:
-		return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v)
 	case OpDotProdPairsSaturatedUint8x16:
 		v.Op = OpAMD64VPMADDUBSW128
 		return true
@@ -1920,66 +1578,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return true
 	case OpEqualInt8x64:
 		return rewriteValueAMD64_OpEqualInt8x64(v)
-	case OpEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpEqualMaskedFloat32x16(v)
-	case OpEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpEqualMaskedFloat32x4(v)
-	case OpEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpEqualMaskedFloat32x8(v)
-	case OpEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpEqualMaskedFloat64x2(v)
-	case OpEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpEqualMaskedFloat64x4(v)
-	case OpEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpEqualMaskedFloat64x8(v)
-	case OpEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpEqualMaskedInt16x16(v)
-	case OpEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpEqualMaskedInt16x32(v)
-	case OpEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpEqualMaskedInt16x8(v)
-	case OpEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpEqualMaskedInt32x16(v)
-	case OpEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpEqualMaskedInt32x4(v)
-	case OpEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpEqualMaskedInt32x8(v)
-	case OpEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpEqualMaskedInt64x2(v)
-	case OpEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpEqualMaskedInt64x4(v)
-	case OpEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpEqualMaskedInt64x8(v)
-	case OpEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpEqualMaskedInt8x16(v)
-	case OpEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpEqualMaskedInt8x32(v)
-	case OpEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpEqualMaskedInt8x64(v)
-	case OpEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpEqualMaskedUint16x16(v)
-	case OpEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpEqualMaskedUint16x32(v)
-	case OpEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpEqualMaskedUint16x8(v)
-	case OpEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpEqualMaskedUint32x16(v)
-	case OpEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpEqualMaskedUint32x4(v)
-	case OpEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpEqualMaskedUint32x8(v)
-	case OpEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpEqualMaskedUint64x2(v)
-	case OpEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpEqualMaskedUint64x4(v)
-	case OpEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpEqualMaskedUint64x8(v)
-	case OpEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpEqualMaskedUint8x16(v)
-	case OpEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpEqualMaskedUint8x32(v)
-	case OpEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpEqualMaskedUint8x64(v)
 	case OpEqualUint16x16:
 		v.Op = OpAMD64VPCMPEQW256
 		return true
@@ -2096,18 +1694,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpFloorScaledFloat64x4(v)
 	case OpFloorScaledFloat64x8:
 		return rewriteValueAMD64_OpFloorScaledFloat64x8(v)
-	case OpFloorScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v)
-	case OpFloorScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v)
-	case OpFloorScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v)
-	case OpFloorScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v)
-	case OpFloorScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v)
-	case OpFloorScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v)
 	case OpFloorScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v)
 	case OpFloorScaledResidueFloat32x4:
@@ -2120,24 +1706,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v)
 	case OpFloorScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v)
-	case OpFloorScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v)
-	case OpFloorScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v)
-	case OpFloorScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v)
-	case OpFloorScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v)
-	case OpFloorScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v)
-	case OpFloorScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v)
-	case OpGaloisFieldAffineTransformInverseMaskedUint8x16:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v)
-	case OpGaloisFieldAffineTransformInverseMaskedUint8x32:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v)
-	case OpGaloisFieldAffineTransformInverseMaskedUint8x64:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v)
 	case OpGaloisFieldAffineTransformInverseUint8x16:
 		v.Op = OpAMD64VGF2P8AFFINEINVQB128
 		return true
@@ -2147,12 +1715,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpGaloisFieldAffineTransformInverseUint8x64:
 		v.Op = OpAMD64VGF2P8AFFINEINVQB512
 		return true
-	case OpGaloisFieldAffineTransformMaskedUint8x16:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v)
-	case OpGaloisFieldAffineTransformMaskedUint8x32:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v)
-	case OpGaloisFieldAffineTransformMaskedUint8x64:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v)
 	case OpGaloisFieldAffineTransformUint8x16:
 		v.Op = OpAMD64VGF2P8AFFINEQB128
 		return true
@@ -2162,12 +1724,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpGaloisFieldAffineTransformUint8x64:
 		v.Op = OpAMD64VGF2P8AFFINEQB512
 		return true
-	case OpGaloisFieldMulMaskedUint8x16:
-		return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v)
-	case OpGaloisFieldMulMaskedUint8x32:
-		return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v)
-	case OpGaloisFieldMulMaskedUint8x64:
-		return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v)
 	case OpGaloisFieldMulUint8x16:
 		v.Op = OpAMD64VGF2P8MULB128
 		return true
@@ -2318,66 +1874,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpGreaterEqualInt64x8(v)
 	case OpGreaterEqualInt8x64:
 		return rewriteValueAMD64_OpGreaterEqualInt8x64(v)
-	case OpGreaterEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v)
-	case OpGreaterEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v)
-	case OpGreaterEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v)
-	case OpGreaterEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v)
-	case OpGreaterEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v)
-	case OpGreaterEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v)
-	case OpGreaterEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v)
-	case OpGreaterEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v)
-	case OpGreaterEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v)
-	case OpGreaterEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v)
-	case OpGreaterEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v)
-	case OpGreaterEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v)
-	case OpGreaterEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v)
-	case OpGreaterEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v)
-	case OpGreaterEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v)
-	case OpGreaterEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v)
-	case OpGreaterEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v)
-	case OpGreaterEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v)
-	case OpGreaterEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v)
-	case OpGreaterEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v)
-	case OpGreaterEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v)
-	case OpGreaterEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v)
-	case OpGreaterEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v)
-	case OpGreaterEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v)
-	case OpGreaterEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v)
-	case OpGreaterEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v)
-	case OpGreaterEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v)
-	case OpGreaterEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v)
-	case OpGreaterEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v)
-	case OpGreaterEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v)
 	case OpGreaterEqualUint16x32:
 		return rewriteValueAMD64_OpGreaterEqualUint16x32(v)
 	case OpGreaterEqualUint32x16:
@@ -2430,66 +1926,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return true
 	case OpGreaterInt8x64:
 		return rewriteValueAMD64_OpGreaterInt8x64(v)
-	case OpGreaterMaskedFloat32x16:
-		return rewriteValueAMD64_OpGreaterMaskedFloat32x16(v)
-	case OpGreaterMaskedFloat32x4:
-		return rewriteValueAMD64_OpGreaterMaskedFloat32x4(v)
-	case OpGreaterMaskedFloat32x8:
-		return rewriteValueAMD64_OpGreaterMaskedFloat32x8(v)
-	case OpGreaterMaskedFloat64x2:
-		return rewriteValueAMD64_OpGreaterMaskedFloat64x2(v)
-	case OpGreaterMaskedFloat64x4:
-		return rewriteValueAMD64_OpGreaterMaskedFloat64x4(v)
-	case OpGreaterMaskedFloat64x8:
-		return rewriteValueAMD64_OpGreaterMaskedFloat64x8(v)
-	case OpGreaterMaskedInt16x16:
-		return rewriteValueAMD64_OpGreaterMaskedInt16x16(v)
-	case OpGreaterMaskedInt16x32:
-		return rewriteValueAMD64_OpGreaterMaskedInt16x32(v)
-	case OpGreaterMaskedInt16x8:
-		return rewriteValueAMD64_OpGreaterMaskedInt16x8(v)
-	case OpGreaterMaskedInt32x16:
-		return rewriteValueAMD64_OpGreaterMaskedInt32x16(v)
-	case OpGreaterMaskedInt32x4:
-		return rewriteValueAMD64_OpGreaterMaskedInt32x4(v)
-	case OpGreaterMaskedInt32x8:
-		return rewriteValueAMD64_OpGreaterMaskedInt32x8(v)
-	case OpGreaterMaskedInt64x2:
-		return rewriteValueAMD64_OpGreaterMaskedInt64x2(v)
-	case OpGreaterMaskedInt64x4:
-		return rewriteValueAMD64_OpGreaterMaskedInt64x4(v)
-	case OpGreaterMaskedInt64x8:
-		return rewriteValueAMD64_OpGreaterMaskedInt64x8(v)
-	case OpGreaterMaskedInt8x16:
-		return rewriteValueAMD64_OpGreaterMaskedInt8x16(v)
-	case OpGreaterMaskedInt8x32:
-		return rewriteValueAMD64_OpGreaterMaskedInt8x32(v)
-	case OpGreaterMaskedInt8x64:
-		return rewriteValueAMD64_OpGreaterMaskedInt8x64(v)
-	case OpGreaterMaskedUint16x16:
-		return rewriteValueAMD64_OpGreaterMaskedUint16x16(v)
-	case OpGreaterMaskedUint16x32:
-		return rewriteValueAMD64_OpGreaterMaskedUint16x32(v)
-	case OpGreaterMaskedUint16x8:
-		return rewriteValueAMD64_OpGreaterMaskedUint16x8(v)
-	case OpGreaterMaskedUint32x16:
-		return rewriteValueAMD64_OpGreaterMaskedUint32x16(v)
-	case OpGreaterMaskedUint32x4:
-		return rewriteValueAMD64_OpGreaterMaskedUint32x4(v)
-	case OpGreaterMaskedUint32x8:
-		return rewriteValueAMD64_OpGreaterMaskedUint32x8(v)
-	case OpGreaterMaskedUint64x2:
-		return rewriteValueAMD64_OpGreaterMaskedUint64x2(v)
-	case OpGreaterMaskedUint64x4:
-		return rewriteValueAMD64_OpGreaterMaskedUint64x4(v)
-	case OpGreaterMaskedUint64x8:
-		return rewriteValueAMD64_OpGreaterMaskedUint64x8(v)
-	case OpGreaterMaskedUint8x16:
-		return rewriteValueAMD64_OpGreaterMaskedUint8x16(v)
-	case OpGreaterMaskedUint8x32:
-		return rewriteValueAMD64_OpGreaterMaskedUint8x32(v)
-	case OpGreaterMaskedUint8x64:
-		return rewriteValueAMD64_OpGreaterMaskedUint8x64(v)
 	case OpGreaterUint16x32:
 		return rewriteValueAMD64_OpGreaterUint16x32(v)
 	case OpGreaterUint32x16:
@@ -2529,18 +1965,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpIsNanFloat64x4(v)
 	case OpIsNanFloat64x8:
 		return rewriteValueAMD64_OpIsNanFloat64x8(v)
-	case OpIsNanMaskedFloat32x16:
-		return rewriteValueAMD64_OpIsNanMaskedFloat32x16(v)
-	case OpIsNanMaskedFloat32x4:
-		return rewriteValueAMD64_OpIsNanMaskedFloat32x4(v)
-	case OpIsNanMaskedFloat32x8:
-		return rewriteValueAMD64_OpIsNanMaskedFloat32x8(v)
-	case OpIsNanMaskedFloat64x2:
-		return rewriteValueAMD64_OpIsNanMaskedFloat64x2(v)
-	case OpIsNanMaskedFloat64x4:
-		return rewriteValueAMD64_OpIsNanMaskedFloat64x4(v)
-	case OpIsNanMaskedFloat64x8:
-		return rewriteValueAMD64_OpIsNanMaskedFloat64x8(v)
 	case OpIsNonNil:
 		return rewriteValueAMD64_OpIsNonNil(v)
 	case OpIsSliceInBounds:
@@ -2605,66 +2029,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessEqualInt64x8(v)
 	case OpLessEqualInt8x64:
 		return rewriteValueAMD64_OpLessEqualInt8x64(v)
-	case OpLessEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v)
-	case OpLessEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v)
-	case OpLessEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v)
-	case OpLessEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v)
-	case OpLessEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v)
-	case OpLessEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v)
-	case OpLessEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpLessEqualMaskedInt16x16(v)
-	case OpLessEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpLessEqualMaskedInt16x32(v)
-	case OpLessEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpLessEqualMaskedInt16x8(v)
-	case OpLessEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpLessEqualMaskedInt32x16(v)
-	case OpLessEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpLessEqualMaskedInt32x4(v)
-	case OpLessEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpLessEqualMaskedInt32x8(v)
-	case OpLessEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpLessEqualMaskedInt64x2(v)
-	case OpLessEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpLessEqualMaskedInt64x4(v)
-	case OpLessEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpLessEqualMaskedInt64x8(v)
-	case OpLessEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpLessEqualMaskedInt8x16(v)
-	case OpLessEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpLessEqualMaskedInt8x32(v)
-	case OpLessEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpLessEqualMaskedInt8x64(v)
-	case OpLessEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpLessEqualMaskedUint16x16(v)
-	case OpLessEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpLessEqualMaskedUint16x32(v)
-	case OpLessEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpLessEqualMaskedUint16x8(v)
-	case OpLessEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpLessEqualMaskedUint32x16(v)
-	case OpLessEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpLessEqualMaskedUint32x4(v)
-	case OpLessEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpLessEqualMaskedUint32x8(v)
-	case OpLessEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpLessEqualMaskedUint64x2(v)
-	case OpLessEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpLessEqualMaskedUint64x4(v)
-	case OpLessEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpLessEqualMaskedUint64x8(v)
-	case OpLessEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpLessEqualMaskedUint8x16(v)
-	case OpLessEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v)
-	case OpLessEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v)
 	case OpLessEqualUint16x32:
 		return rewriteValueAMD64_OpLessEqualUint16x32(v)
 	case OpLessEqualUint32x16:
@@ -2693,66 +2057,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessInt64x8(v)
 	case OpLessInt8x64:
 		return rewriteValueAMD64_OpLessInt8x64(v)
-	case OpLessMaskedFloat32x16:
-		return rewriteValueAMD64_OpLessMaskedFloat32x16(v)
-	case OpLessMaskedFloat32x4:
-		return rewriteValueAMD64_OpLessMaskedFloat32x4(v)
-	case OpLessMaskedFloat32x8:
-		return rewriteValueAMD64_OpLessMaskedFloat32x8(v)
-	case OpLessMaskedFloat64x2:
-		return rewriteValueAMD64_OpLessMaskedFloat64x2(v)
-	case OpLessMaskedFloat64x4:
-		return rewriteValueAMD64_OpLessMaskedFloat64x4(v)
-	case OpLessMaskedFloat64x8:
-		return rewriteValueAMD64_OpLessMaskedFloat64x8(v)
-	case OpLessMaskedInt16x16:
-		return rewriteValueAMD64_OpLessMaskedInt16x16(v)
-	case OpLessMaskedInt16x32:
-		return rewriteValueAMD64_OpLessMaskedInt16x32(v)
-	case OpLessMaskedInt16x8:
-		return rewriteValueAMD64_OpLessMaskedInt16x8(v)
-	case OpLessMaskedInt32x16:
-		return rewriteValueAMD64_OpLessMaskedInt32x16(v)
-	case OpLessMaskedInt32x4:
-		return rewriteValueAMD64_OpLessMaskedInt32x4(v)
-	case OpLessMaskedInt32x8:
-		return rewriteValueAMD64_OpLessMaskedInt32x8(v)
-	case OpLessMaskedInt64x2:
-		return rewriteValueAMD64_OpLessMaskedInt64x2(v)
-	case OpLessMaskedInt64x4:
-		return rewriteValueAMD64_OpLessMaskedInt64x4(v)
-	case OpLessMaskedInt64x8:
-		return rewriteValueAMD64_OpLessMaskedInt64x8(v)
-	case OpLessMaskedInt8x16:
-		return rewriteValueAMD64_OpLessMaskedInt8x16(v)
-	case OpLessMaskedInt8x32:
-		return rewriteValueAMD64_OpLessMaskedInt8x32(v)
-	case OpLessMaskedInt8x64:
-		return rewriteValueAMD64_OpLessMaskedInt8x64(v)
-	case OpLessMaskedUint16x16:
-		return rewriteValueAMD64_OpLessMaskedUint16x16(v)
-	case OpLessMaskedUint16x32:
-		return rewriteValueAMD64_OpLessMaskedUint16x32(v)
-	case OpLessMaskedUint16x8:
-		return rewriteValueAMD64_OpLessMaskedUint16x8(v)
-	case OpLessMaskedUint32x16:
-		return rewriteValueAMD64_OpLessMaskedUint32x16(v)
-	case OpLessMaskedUint32x4:
-		return rewriteValueAMD64_OpLessMaskedUint32x4(v)
-	case OpLessMaskedUint32x8:
-		return rewriteValueAMD64_OpLessMaskedUint32x8(v)
-	case OpLessMaskedUint64x2:
-		return rewriteValueAMD64_OpLessMaskedUint64x2(v)
-	case OpLessMaskedUint64x4:
-		return rewriteValueAMD64_OpLessMaskedUint64x4(v)
-	case OpLessMaskedUint64x8:
-		return rewriteValueAMD64_OpLessMaskedUint64x8(v)
-	case OpLessMaskedUint8x16:
-		return rewriteValueAMD64_OpLessMaskedUint8x16(v)
-	case OpLessMaskedUint8x32:
-		return rewriteValueAMD64_OpLessMaskedUint8x32(v)
-	case OpLessMaskedUint8x64:
-		return rewriteValueAMD64_OpLessMaskedUint8x64(v)
 	case OpLessUint16x32:
 		return rewriteValueAMD64_OpLessUint16x32(v)
 	case OpLessUint32x16:
@@ -2887,66 +2191,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMaxInt8x64:
 		v.Op = OpAMD64VPMAXSB512
 		return true
-	case OpMaxMaskedFloat32x16:
-		return rewriteValueAMD64_OpMaxMaskedFloat32x16(v)
-	case OpMaxMaskedFloat32x4:
-		return rewriteValueAMD64_OpMaxMaskedFloat32x4(v)
-	case OpMaxMaskedFloat32x8:
-		return rewriteValueAMD64_OpMaxMaskedFloat32x8(v)
-	case OpMaxMaskedFloat64x2:
-		return rewriteValueAMD64_OpMaxMaskedFloat64x2(v)
-	case OpMaxMaskedFloat64x4:
-		return rewriteValueAMD64_OpMaxMaskedFloat64x4(v)
-	case OpMaxMaskedFloat64x8:
-		return rewriteValueAMD64_OpMaxMaskedFloat64x8(v)
-	case OpMaxMaskedInt16x16:
-		return rewriteValueAMD64_OpMaxMaskedInt16x16(v)
-	case OpMaxMaskedInt16x32:
-		return rewriteValueAMD64_OpMaxMaskedInt16x32(v)
-	case OpMaxMaskedInt16x8:
-		return rewriteValueAMD64_OpMaxMaskedInt16x8(v)
-	case OpMaxMaskedInt32x16:
-		return rewriteValueAMD64_OpMaxMaskedInt32x16(v)
-	case OpMaxMaskedInt32x4:
-		return rewriteValueAMD64_OpMaxMaskedInt32x4(v)
-	case OpMaxMaskedInt32x8:
-		return rewriteValueAMD64_OpMaxMaskedInt32x8(v)
-	case OpMaxMaskedInt64x2:
-		return rewriteValueAMD64_OpMaxMaskedInt64x2(v)
-	case OpMaxMaskedInt64x4:
-		return rewriteValueAMD64_OpMaxMaskedInt64x4(v)
-	case OpMaxMaskedInt64x8:
-		return rewriteValueAMD64_OpMaxMaskedInt64x8(v)
-	case OpMaxMaskedInt8x16:
-		return rewriteValueAMD64_OpMaxMaskedInt8x16(v)
-	case OpMaxMaskedInt8x32:
-		return rewriteValueAMD64_OpMaxMaskedInt8x32(v)
-	case OpMaxMaskedInt8x64:
-		return rewriteValueAMD64_OpMaxMaskedInt8x64(v)
-	case OpMaxMaskedUint16x16:
-		return rewriteValueAMD64_OpMaxMaskedUint16x16(v)
-	case OpMaxMaskedUint16x32:
-		return rewriteValueAMD64_OpMaxMaskedUint16x32(v)
-	case OpMaxMaskedUint16x8:
-		return rewriteValueAMD64_OpMaxMaskedUint16x8(v)
-	case OpMaxMaskedUint32x16:
-		return rewriteValueAMD64_OpMaxMaskedUint32x16(v)
-	case OpMaxMaskedUint32x4:
-		return rewriteValueAMD64_OpMaxMaskedUint32x4(v)
-	case OpMaxMaskedUint32x8:
-		return rewriteValueAMD64_OpMaxMaskedUint32x8(v)
-	case OpMaxMaskedUint64x2:
-		return rewriteValueAMD64_OpMaxMaskedUint64x2(v)
-	case OpMaxMaskedUint64x4:
-		return rewriteValueAMD64_OpMaxMaskedUint64x4(v)
-	case OpMaxMaskedUint64x8:
-		return rewriteValueAMD64_OpMaxMaskedUint64x8(v)
-	case OpMaxMaskedUint8x16:
-		return rewriteValueAMD64_OpMaxMaskedUint8x16(v)
-	case OpMaxMaskedUint8x32:
-		return rewriteValueAMD64_OpMaxMaskedUint8x32(v)
-	case OpMaxMaskedUint8x64:
-		return rewriteValueAMD64_OpMaxMaskedUint8x64(v)
 	case OpMaxUint16x16:
 		v.Op = OpAMD64VPMAXUW256
 		return true
@@ -3041,66 +2285,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMinInt8x64:
 		v.Op = OpAMD64VPMINSB512
 		return true
-	case OpMinMaskedFloat32x16:
-		return rewriteValueAMD64_OpMinMaskedFloat32x16(v)
-	case OpMinMaskedFloat32x4:
-		return rewriteValueAMD64_OpMinMaskedFloat32x4(v)
-	case OpMinMaskedFloat32x8:
-		return rewriteValueAMD64_OpMinMaskedFloat32x8(v)
-	case OpMinMaskedFloat64x2:
-		return rewriteValueAMD64_OpMinMaskedFloat64x2(v)
-	case OpMinMaskedFloat64x4:
-		return rewriteValueAMD64_OpMinMaskedFloat64x4(v)
-	case OpMinMaskedFloat64x8:
-		return rewriteValueAMD64_OpMinMaskedFloat64x8(v)
-	case OpMinMaskedInt16x16:
-		return rewriteValueAMD64_OpMinMaskedInt16x16(v)
-	case OpMinMaskedInt16x32:
-		return rewriteValueAMD64_OpMinMaskedInt16x32(v)
-	case OpMinMaskedInt16x8:
-		return rewriteValueAMD64_OpMinMaskedInt16x8(v)
-	case OpMinMaskedInt32x16:
-		return rewriteValueAMD64_OpMinMaskedInt32x16(v)
-	case OpMinMaskedInt32x4:
-		return rewriteValueAMD64_OpMinMaskedInt32x4(v)
-	case OpMinMaskedInt32x8:
-		return rewriteValueAMD64_OpMinMaskedInt32x8(v)
-	case OpMinMaskedInt64x2:
-		return rewriteValueAMD64_OpMinMaskedInt64x2(v)
-	case OpMinMaskedInt64x4:
-		return rewriteValueAMD64_OpMinMaskedInt64x4(v)
-	case OpMinMaskedInt64x8:
-		return rewriteValueAMD64_OpMinMaskedInt64x8(v)
-	case OpMinMaskedInt8x16:
-		return rewriteValueAMD64_OpMinMaskedInt8x16(v)
-	case OpMinMaskedInt8x32:
-		return rewriteValueAMD64_OpMinMaskedInt8x32(v)
-	case OpMinMaskedInt8x64:
-		return rewriteValueAMD64_OpMinMaskedInt8x64(v)
-	case OpMinMaskedUint16x16:
-		return rewriteValueAMD64_OpMinMaskedUint16x16(v)
-	case OpMinMaskedUint16x32:
-		return rewriteValueAMD64_OpMinMaskedUint16x32(v)
-	case OpMinMaskedUint16x8:
-		return rewriteValueAMD64_OpMinMaskedUint16x8(v)
-	case OpMinMaskedUint32x16:
-		return rewriteValueAMD64_OpMinMaskedUint32x16(v)
-	case OpMinMaskedUint32x4:
-		return rewriteValueAMD64_OpMinMaskedUint32x4(v)
-	case OpMinMaskedUint32x8:
-		return rewriteValueAMD64_OpMinMaskedUint32x8(v)
-	case OpMinMaskedUint64x2:
-		return rewriteValueAMD64_OpMinMaskedUint64x2(v)
-	case OpMinMaskedUint64x4:
-		return rewriteValueAMD64_OpMinMaskedUint64x4(v)
-	case OpMinMaskedUint64x8:
-		return rewriteValueAMD64_OpMinMaskedUint64x8(v)
-	case OpMinMaskedUint8x16:
-		return rewriteValueAMD64_OpMinMaskedUint8x16(v)
-	case OpMinMaskedUint8x32:
-		return rewriteValueAMD64_OpMinMaskedUint8x32(v)
-	case OpMinMaskedUint8x64:
-		return rewriteValueAMD64_OpMinMaskedUint8x64(v)
 	case OpMinUint16x16:
 		v.Op = OpAMD64VPMINUW256
 		return true
@@ -3194,18 +2378,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulAddFloat64x8:
 		v.Op = OpAMD64VFMADD213PD512
 		return true
-	case OpMulAddMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulAddMaskedFloat32x16(v)
-	case OpMulAddMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulAddMaskedFloat32x4(v)
-	case OpMulAddMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulAddMaskedFloat32x8(v)
-	case OpMulAddMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulAddMaskedFloat64x2(v)
-	case OpMulAddMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulAddMaskedFloat64x4(v)
-	case OpMulAddMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulAddMaskedFloat64x8(v)
 	case OpMulAddSubFloat32x16:
 		v.Op = OpAMD64VFMADDSUB213PS512
 		return true
@@ -3224,18 +2396,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulAddSubFloat64x8:
 		v.Op = OpAMD64VFMADDSUB213PD512
 		return true
-	case OpMulAddSubMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v)
-	case OpMulAddSubMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v)
-	case OpMulAddSubMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v)
-	case OpMulAddSubMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v)
-	case OpMulAddSubMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v)
-	case OpMulAddSubMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v)
 	case OpMulEvenWidenInt32x4:
 		v.Op = OpAMD64VPMULDQ128
 		return true
@@ -3275,18 +2435,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulHighInt16x8:
 		v.Op = OpAMD64VPMULHW128
 		return true
-	case OpMulHighMaskedInt16x16:
-		return rewriteValueAMD64_OpMulHighMaskedInt16x16(v)
-	case OpMulHighMaskedInt16x32:
-		return rewriteValueAMD64_OpMulHighMaskedInt16x32(v)
-	case OpMulHighMaskedInt16x8:
-		return rewriteValueAMD64_OpMulHighMaskedInt16x8(v)
-	case OpMulHighMaskedUint16x16:
-		return rewriteValueAMD64_OpMulHighMaskedUint16x16(v)
-	case OpMulHighMaskedUint16x32:
-		return rewriteValueAMD64_OpMulHighMaskedUint16x32(v)
-	case OpMulHighMaskedUint16x8:
-		return rewriteValueAMD64_OpMulHighMaskedUint16x8(v)
 	case OpMulHighUint16x16:
 		v.Op = OpAMD64VPMULHUW256
 		return true
@@ -3323,54 +2471,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulInt64x8:
 		v.Op = OpAMD64VPMULLQ512
 		return true
-	case OpMulMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulMaskedFloat32x16(v)
-	case OpMulMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulMaskedFloat32x4(v)
-	case OpMulMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulMaskedFloat32x8(v)
-	case OpMulMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulMaskedFloat64x2(v)
-	case OpMulMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulMaskedFloat64x4(v)
-	case OpMulMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulMaskedFloat64x8(v)
-	case OpMulMaskedInt16x16:
-		return rewriteValueAMD64_OpMulMaskedInt16x16(v)
-	case OpMulMaskedInt16x32:
-		return rewriteValueAMD64_OpMulMaskedInt16x32(v)
-	case OpMulMaskedInt16x8:
-		return rewriteValueAMD64_OpMulMaskedInt16x8(v)
-	case OpMulMaskedInt32x16:
-		return rewriteValueAMD64_OpMulMaskedInt32x16(v)
-	case OpMulMaskedInt32x4:
-		return rewriteValueAMD64_OpMulMaskedInt32x4(v)
-	case OpMulMaskedInt32x8:
-		return rewriteValueAMD64_OpMulMaskedInt32x8(v)
-	case OpMulMaskedInt64x2:
-		return rewriteValueAMD64_OpMulMaskedInt64x2(v)
-	case OpMulMaskedInt64x4:
-		return rewriteValueAMD64_OpMulMaskedInt64x4(v)
-	case OpMulMaskedInt64x8:
-		return rewriteValueAMD64_OpMulMaskedInt64x8(v)
-	case OpMulMaskedUint16x16:
-		return rewriteValueAMD64_OpMulMaskedUint16x16(v)
-	case OpMulMaskedUint16x32:
-		return rewriteValueAMD64_OpMulMaskedUint16x32(v)
-	case OpMulMaskedUint16x8:
-		return rewriteValueAMD64_OpMulMaskedUint16x8(v)
-	case OpMulMaskedUint32x16:
-		return rewriteValueAMD64_OpMulMaskedUint32x16(v)
-	case OpMulMaskedUint32x4:
-		return rewriteValueAMD64_OpMulMaskedUint32x4(v)
-	case OpMulMaskedUint32x8:
-		return rewriteValueAMD64_OpMulMaskedUint32x8(v)
-	case OpMulMaskedUint64x2:
-		return rewriteValueAMD64_OpMulMaskedUint64x2(v)
-	case OpMulMaskedUint64x4:
-		return rewriteValueAMD64_OpMulMaskedUint64x4(v)
-	case OpMulMaskedUint64x8:
-		return rewriteValueAMD64_OpMulMaskedUint64x8(v)
 	case OpMulSubAddFloat32x16:
 		v.Op = OpAMD64VFMSUBADD213PS512
 		return true
@@ -3389,18 +2489,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulSubAddFloat64x8:
 		v.Op = OpAMD64VFMSUBADD213PD512
 		return true
-	case OpMulSubAddMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v)
-	case OpMulSubAddMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v)
-	case OpMulSubAddMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v)
-	case OpMulSubAddMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v)
-	case OpMulSubAddMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v)
-	case OpMulSubAddMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v)
 	case OpMulUint16x16:
 		v.Op = OpAMD64VPMULLW256
 		return true
@@ -3485,66 +2573,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpNotEqualInt64x8(v)
 	case OpNotEqualInt8x64:
 		return rewriteValueAMD64_OpNotEqualInt8x64(v)
-	case OpNotEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v)
-	case OpNotEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v)
-	case OpNotEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v)
-	case OpNotEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v)
-	case OpNotEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v)
-	case OpNotEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v)
-	case OpNotEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpNotEqualMaskedInt16x16(v)
-	case OpNotEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpNotEqualMaskedInt16x32(v)
-	case OpNotEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpNotEqualMaskedInt16x8(v)
-	case OpNotEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpNotEqualMaskedInt32x16(v)
-	case OpNotEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpNotEqualMaskedInt32x4(v)
-	case OpNotEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpNotEqualMaskedInt32x8(v)
-	case OpNotEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpNotEqualMaskedInt64x2(v)
-	case OpNotEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpNotEqualMaskedInt64x4(v)
-	case OpNotEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpNotEqualMaskedInt64x8(v)
-	case OpNotEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpNotEqualMaskedInt8x16(v)
-	case OpNotEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpNotEqualMaskedInt8x32(v)
-	case OpNotEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpNotEqualMaskedInt8x64(v)
-	case OpNotEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpNotEqualMaskedUint16x16(v)
-	case OpNotEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpNotEqualMaskedUint16x32(v)
-	case OpNotEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpNotEqualMaskedUint16x8(v)
-	case OpNotEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpNotEqualMaskedUint32x16(v)
-	case OpNotEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpNotEqualMaskedUint32x4(v)
-	case OpNotEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpNotEqualMaskedUint32x8(v)
-	case OpNotEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpNotEqualMaskedUint64x2(v)
-	case OpNotEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpNotEqualMaskedUint64x4(v)
-	case OpNotEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpNotEqualMaskedUint64x8(v)
-	case OpNotEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpNotEqualMaskedUint8x16(v)
-	case OpNotEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v)
-	case OpNotEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v)
 	case OpNotEqualUint16x32:
 		return rewriteValueAMD64_OpNotEqualUint16x32(v)
 	case OpNotEqualUint32x16:
@@ -3591,54 +2619,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOnesCountInt8x64:
 		v.Op = OpAMD64VPOPCNTB512
 		return true
-	case OpOnesCountMaskedInt16x16:
-		return rewriteValueAMD64_OpOnesCountMaskedInt16x16(v)
-	case OpOnesCountMaskedInt16x32:
-		return rewriteValueAMD64_OpOnesCountMaskedInt16x32(v)
-	case OpOnesCountMaskedInt16x8:
-		return rewriteValueAMD64_OpOnesCountMaskedInt16x8(v)
-	case OpOnesCountMaskedInt32x16:
-		return rewriteValueAMD64_OpOnesCountMaskedInt32x16(v)
-	case OpOnesCountMaskedInt32x4:
-		return rewriteValueAMD64_OpOnesCountMaskedInt32x4(v)
-	case OpOnesCountMaskedInt32x8:
-		return rewriteValueAMD64_OpOnesCountMaskedInt32x8(v)
-	case OpOnesCountMaskedInt64x2:
-		return rewriteValueAMD64_OpOnesCountMaskedInt64x2(v)
-	case OpOnesCountMaskedInt64x4:
-		return rewriteValueAMD64_OpOnesCountMaskedInt64x4(v)
-	case OpOnesCountMaskedInt64x8:
-		return rewriteValueAMD64_OpOnesCountMaskedInt64x8(v)
-	case OpOnesCountMaskedInt8x16:
-		return rewriteValueAMD64_OpOnesCountMaskedInt8x16(v)
-	case OpOnesCountMaskedInt8x32:
-		return rewriteValueAMD64_OpOnesCountMaskedInt8x32(v)
-	case OpOnesCountMaskedInt8x64:
-		return rewriteValueAMD64_OpOnesCountMaskedInt8x64(v)
-	case OpOnesCountMaskedUint16x16:
-		return rewriteValueAMD64_OpOnesCountMaskedUint16x16(v)
-	case OpOnesCountMaskedUint16x32:
-		return rewriteValueAMD64_OpOnesCountMaskedUint16x32(v)
-	case OpOnesCountMaskedUint16x8:
-		return rewriteValueAMD64_OpOnesCountMaskedUint16x8(v)
-	case OpOnesCountMaskedUint32x16:
-		return rewriteValueAMD64_OpOnesCountMaskedUint32x16(v)
-	case OpOnesCountMaskedUint32x4:
-		return rewriteValueAMD64_OpOnesCountMaskedUint32x4(v)
-	case OpOnesCountMaskedUint32x8:
-		return rewriteValueAMD64_OpOnesCountMaskedUint32x8(v)
-	case OpOnesCountMaskedUint64x2:
-		return rewriteValueAMD64_OpOnesCountMaskedUint64x2(v)
-	case OpOnesCountMaskedUint64x4:
-		return rewriteValueAMD64_OpOnesCountMaskedUint64x4(v)
-	case OpOnesCountMaskedUint64x8:
-		return rewriteValueAMD64_OpOnesCountMaskedUint64x8(v)
-	case OpOnesCountMaskedUint8x16:
-		return rewriteValueAMD64_OpOnesCountMaskedUint8x16(v)
-	case OpOnesCountMaskedUint8x32:
-		return rewriteValueAMD64_OpOnesCountMaskedUint8x32(v)
-	case OpOnesCountMaskedUint8x64:
-		return rewriteValueAMD64_OpOnesCountMaskedUint8x64(v)
 	case OpOnesCountUint16x16:
 		v.Op = OpAMD64VPOPCNTW256
 		return true
@@ -3726,30 +2706,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOrInt8x64:
 		v.Op = OpAMD64VPORD512
 		return true
-	case OpOrMaskedInt32x16:
-		return rewriteValueAMD64_OpOrMaskedInt32x16(v)
-	case OpOrMaskedInt32x4:
-		return rewriteValueAMD64_OpOrMaskedInt32x4(v)
-	case OpOrMaskedInt32x8:
-		return rewriteValueAMD64_OpOrMaskedInt32x8(v)
-	case OpOrMaskedInt64x2:
-		return rewriteValueAMD64_OpOrMaskedInt64x2(v)
-	case OpOrMaskedInt64x4:
-		return rewriteValueAMD64_OpOrMaskedInt64x4(v)
-	case OpOrMaskedInt64x8:
-		return rewriteValueAMD64_OpOrMaskedInt64x8(v)
-	case OpOrMaskedUint32x16:
-		return rewriteValueAMD64_OpOrMaskedUint32x16(v)
-	case OpOrMaskedUint32x4:
-		return rewriteValueAMD64_OpOrMaskedUint32x4(v)
-	case OpOrMaskedUint32x8:
-		return rewriteValueAMD64_OpOrMaskedUint32x8(v)
-	case OpOrMaskedUint64x2:
-		return rewriteValueAMD64_OpOrMaskedUint64x2(v)
-	case OpOrMaskedUint64x4:
-		return rewriteValueAMD64_OpOrMaskedUint64x4(v)
-	case OpOrMaskedUint64x8:
-		return rewriteValueAMD64_OpOrMaskedUint64x8(v)
 	case OpOrUint16x16:
 		v.Op = OpAMD64VPOR256
 		return true
@@ -3843,66 +2799,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpPermute2Int8x64:
 		v.Op = OpAMD64VPERMI2B512
 		return true
-	case OpPermute2MaskedFloat32x16:
-		return rewriteValueAMD64_OpPermute2MaskedFloat32x16(v)
-	case OpPermute2MaskedFloat32x4:
-		return rewriteValueAMD64_OpPermute2MaskedFloat32x4(v)
-	case OpPermute2MaskedFloat32x8:
-		return rewriteValueAMD64_OpPermute2MaskedFloat32x8(v)
-	case OpPermute2MaskedFloat64x2:
-		return rewriteValueAMD64_OpPermute2MaskedFloat64x2(v)
-	case OpPermute2MaskedFloat64x4:
-		return rewriteValueAMD64_OpPermute2MaskedFloat64x4(v)
-	case OpPermute2MaskedFloat64x8:
-		return rewriteValueAMD64_OpPermute2MaskedFloat64x8(v)
-	case OpPermute2MaskedInt16x16:
-		return rewriteValueAMD64_OpPermute2MaskedInt16x16(v)
-	case OpPermute2MaskedInt16x32:
-		return rewriteValueAMD64_OpPermute2MaskedInt16x32(v)
-	case OpPermute2MaskedInt16x8:
-		return rewriteValueAMD64_OpPermute2MaskedInt16x8(v)
-	case OpPermute2MaskedInt32x16:
-		return rewriteValueAMD64_OpPermute2MaskedInt32x16(v)
-	case OpPermute2MaskedInt32x4:
-		return rewriteValueAMD64_OpPermute2MaskedInt32x4(v)
-	case OpPermute2MaskedInt32x8:
-		return rewriteValueAMD64_OpPermute2MaskedInt32x8(v)
-	case OpPermute2MaskedInt64x2:
-		return rewriteValueAMD64_OpPermute2MaskedInt64x2(v)
-	case OpPermute2MaskedInt64x4:
-		return rewriteValueAMD64_OpPermute2MaskedInt64x4(v)
-	case OpPermute2MaskedInt64x8:
-		return rewriteValueAMD64_OpPermute2MaskedInt64x8(v)
-	case OpPermute2MaskedInt8x16:
-		return rewriteValueAMD64_OpPermute2MaskedInt8x16(v)
-	case OpPermute2MaskedInt8x32:
-		return rewriteValueAMD64_OpPermute2MaskedInt8x32(v)
-	case OpPermute2MaskedInt8x64:
-		return rewriteValueAMD64_OpPermute2MaskedInt8x64(v)
-	case OpPermute2MaskedUint16x16:
-		return rewriteValueAMD64_OpPermute2MaskedUint16x16(v)
-	case OpPermute2MaskedUint16x32:
-		return rewriteValueAMD64_OpPermute2MaskedUint16x32(v)
-	case OpPermute2MaskedUint16x8:
-		return rewriteValueAMD64_OpPermute2MaskedUint16x8(v)
-	case OpPermute2MaskedUint32x16:
-		return rewriteValueAMD64_OpPermute2MaskedUint32x16(v)
-	case OpPermute2MaskedUint32x4:
-		return rewriteValueAMD64_OpPermute2MaskedUint32x4(v)
-	case OpPermute2MaskedUint32x8:
-		return rewriteValueAMD64_OpPermute2MaskedUint32x8(v)
-	case OpPermute2MaskedUint64x2:
-		return rewriteValueAMD64_OpPermute2MaskedUint64x2(v)
-	case OpPermute2MaskedUint64x4:
-		return rewriteValueAMD64_OpPermute2MaskedUint64x4(v)
-	case OpPermute2MaskedUint64x8:
-		return rewriteValueAMD64_OpPermute2MaskedUint64x8(v)
-	case OpPermute2MaskedUint8x16:
-		return rewriteValueAMD64_OpPermute2MaskedUint8x16(v)
-	case OpPermute2MaskedUint8x32:
-		return rewriteValueAMD64_OpPermute2MaskedUint8x32(v)
-	case OpPermute2MaskedUint8x64:
-		return rewriteValueAMD64_OpPermute2MaskedUint8x64(v)
 	case OpPermute2Uint16x16:
 		v.Op = OpAMD64VPERMI2W256
 		return true
@@ -3981,54 +2877,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpPermuteInt8x64:
 		v.Op = OpAMD64VPERMB512
 		return true
-	case OpPermuteMaskedFloat32x16:
-		return rewriteValueAMD64_OpPermuteMaskedFloat32x16(v)
-	case OpPermuteMaskedFloat32x8:
-		return rewriteValueAMD64_OpPermuteMaskedFloat32x8(v)
-	case OpPermuteMaskedFloat64x4:
-		return rewriteValueAMD64_OpPermuteMaskedFloat64x4(v)
-	case OpPermuteMaskedFloat64x8:
-		return rewriteValueAMD64_OpPermuteMaskedFloat64x8(v)
-	case OpPermuteMaskedInt16x16:
-		return rewriteValueAMD64_OpPermuteMaskedInt16x16(v)
-	case OpPermuteMaskedInt16x32:
-		return rewriteValueAMD64_OpPermuteMaskedInt16x32(v)
-	case OpPermuteMaskedInt16x8:
-		return rewriteValueAMD64_OpPermuteMaskedInt16x8(v)
-	case OpPermuteMaskedInt32x16:
-		return rewriteValueAMD64_OpPermuteMaskedInt32x16(v)
-	case OpPermuteMaskedInt32x8:
-		return rewriteValueAMD64_OpPermuteMaskedInt32x8(v)
-	case OpPermuteMaskedInt64x4:
-		return rewriteValueAMD64_OpPermuteMaskedInt64x4(v)
-	case OpPermuteMaskedInt64x8:
-		return rewriteValueAMD64_OpPermuteMaskedInt64x8(v)
-	case OpPermuteMaskedInt8x16:
-		return rewriteValueAMD64_OpPermuteMaskedInt8x16(v)
-	case OpPermuteMaskedInt8x32:
-		return rewriteValueAMD64_OpPermuteMaskedInt8x32(v)
-	case OpPermuteMaskedInt8x64:
-		return rewriteValueAMD64_OpPermuteMaskedInt8x64(v)
-	case OpPermuteMaskedUint16x16:
-		return rewriteValueAMD64_OpPermuteMaskedUint16x16(v)
-	case OpPermuteMaskedUint16x32:
-		return rewriteValueAMD64_OpPermuteMaskedUint16x32(v)
-	case OpPermuteMaskedUint16x8:
-		return rewriteValueAMD64_OpPermuteMaskedUint16x8(v)
-	case OpPermuteMaskedUint32x16:
-		return rewriteValueAMD64_OpPermuteMaskedUint32x16(v)
-	case OpPermuteMaskedUint32x8:
-		return rewriteValueAMD64_OpPermuteMaskedUint32x8(v)
-	case OpPermuteMaskedUint64x4:
-		return rewriteValueAMD64_OpPermuteMaskedUint64x4(v)
-	case OpPermuteMaskedUint64x8:
-		return rewriteValueAMD64_OpPermuteMaskedUint64x8(v)
-	case OpPermuteMaskedUint8x16:
-		return rewriteValueAMD64_OpPermuteMaskedUint8x16(v)
-	case OpPermuteMaskedUint8x32:
-		return rewriteValueAMD64_OpPermuteMaskedUint8x32(v)
-	case OpPermuteMaskedUint8x64:
-		return rewriteValueAMD64_OpPermuteMaskedUint8x64(v)
 	case OpPermuteUint16x16:
 		v.Op = OpAMD64VPERMW256
 		return true
@@ -4093,18 +2941,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpReciprocalFloat64x8:
 		v.Op = OpAMD64VRCP14PD512
 		return true
-	case OpReciprocalMaskedFloat32x16:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v)
-	case OpReciprocalMaskedFloat32x4:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v)
-	case OpReciprocalMaskedFloat32x8:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v)
-	case OpReciprocalMaskedFloat64x2:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v)
-	case OpReciprocalMaskedFloat64x4:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v)
-	case OpReciprocalMaskedFloat64x8:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v)
 	case OpReciprocalSqrtFloat32x16:
 		v.Op = OpAMD64VRSQRT14PS512
 		return true
@@ -4123,18 +2959,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpReciprocalSqrtFloat64x8:
 		v.Op = OpAMD64VRSQRT14PD512
 		return true
-	case OpReciprocalSqrtMaskedFloat32x16:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v)
-	case OpReciprocalSqrtMaskedFloat32x4:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v)
-	case OpReciprocalSqrtMaskedFloat32x8:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v)
-	case OpReciprocalSqrtMaskedFloat64x2:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v)
-	case OpReciprocalSqrtMaskedFloat64x4:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v)
-	case OpReciprocalSqrtMaskedFloat64x8:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v)
 	case OpRotateAllLeftInt32x16:
 		v.Op = OpAMD64VPROLD512
 		return true
@@ -4153,30 +2977,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateAllLeftInt64x8:
 		v.Op = OpAMD64VPROLQ512
 		return true
-	case OpRotateAllLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v)
-	case OpRotateAllLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v)
-	case OpRotateAllLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v)
-	case OpRotateAllLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v)
-	case OpRotateAllLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v)
-	case OpRotateAllLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v)
-	case OpRotateAllLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v)
-	case OpRotateAllLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v)
-	case OpRotateAllLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v)
-	case OpRotateAllLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v)
-	case OpRotateAllLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v)
-	case OpRotateAllLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v)
 	case OpRotateAllLeftUint32x16:
 		v.Op = OpAMD64VPROLD512
 		return true
@@ -4213,30 +3013,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateAllRightInt64x8:
 		v.Op = OpAMD64VPRORQ512
 		return true
-	case OpRotateAllRightMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v)
-	case OpRotateAllRightMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v)
-	case OpRotateAllRightMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v)
-	case OpRotateAllRightMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v)
-	case OpRotateAllRightMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v)
-	case OpRotateAllRightMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v)
-	case OpRotateAllRightMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v)
-	case OpRotateAllRightMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v)
-	case OpRotateAllRightMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v)
-	case OpRotateAllRightMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v)
-	case OpRotateAllRightMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v)
-	case OpRotateAllRightMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v)
 	case OpRotateAllRightUint32x16:
 		v.Op = OpAMD64VPRORD512
 		return true
@@ -4285,30 +3061,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateLeftInt64x8:
 		v.Op = OpAMD64VPROLVQ512
 		return true
-	case OpRotateLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v)
-	case OpRotateLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v)
-	case OpRotateLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v)
-	case OpRotateLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v)
-	case OpRotateLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v)
-	case OpRotateLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v)
-	case OpRotateLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v)
-	case OpRotateLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v)
-	case OpRotateLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v)
-	case OpRotateLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v)
-	case OpRotateLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v)
-	case OpRotateLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v)
 	case OpRotateLeftUint32x16:
 		v.Op = OpAMD64VPROLVD512
 		return true
@@ -4345,30 +3097,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateRightInt64x8:
 		v.Op = OpAMD64VPRORVQ512
 		return true
-	case OpRotateRightMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateRightMaskedInt32x16(v)
-	case OpRotateRightMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateRightMaskedInt32x4(v)
-	case OpRotateRightMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateRightMaskedInt32x8(v)
-	case OpRotateRightMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateRightMaskedInt64x2(v)
-	case OpRotateRightMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateRightMaskedInt64x4(v)
-	case OpRotateRightMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateRightMaskedInt64x8(v)
-	case OpRotateRightMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateRightMaskedUint32x16(v)
-	case OpRotateRightMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateRightMaskedUint32x4(v)
-	case OpRotateRightMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateRightMaskedUint32x8(v)
-	case OpRotateRightMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateRightMaskedUint64x2(v)
-	case OpRotateRightMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateRightMaskedUint64x4(v)
-	case OpRotateRightMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateRightMaskedUint64x8(v)
 	case OpRotateRightUint32x16:
 		v.Op = OpAMD64VPRORVD512
 		return true
@@ -4415,18 +3143,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v)
 	case OpRoundToEvenScaledFloat64x8:
 		return rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v)
-	case OpRoundToEvenScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v)
-	case OpRoundToEvenScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v)
-	case OpRoundToEvenScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v)
-	case OpRoundToEvenScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v)
-	case OpRoundToEvenScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v)
-	case OpRoundToEvenScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v)
 	case OpRoundToEvenScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v)
 	case OpRoundToEvenScaledResidueFloat32x4:
@@ -4439,18 +3155,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v)
 	case OpRoundToEvenScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v)
-	case OpRoundToEvenScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v)
-	case OpRoundToEvenScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v)
-	case OpRoundToEvenScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v)
-	case OpRoundToEvenScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v)
-	case OpRoundToEvenScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v)
-	case OpRoundToEvenScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v)
 	case OpRsh16Ux16:
 		return rewriteValueAMD64_OpRsh16Ux16(v)
 	case OpRsh16Ux32:
@@ -4533,18 +3237,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpScaleFloat64x8:
 		v.Op = OpAMD64VSCALEFPD512
 		return true
-	case OpScaleMaskedFloat32x16:
-		return rewriteValueAMD64_OpScaleMaskedFloat32x16(v)
-	case OpScaleMaskedFloat32x4:
-		return rewriteValueAMD64_OpScaleMaskedFloat32x4(v)
-	case OpScaleMaskedFloat32x8:
-		return rewriteValueAMD64_OpScaleMaskedFloat32x8(v)
-	case OpScaleMaskedFloat64x2:
-		return rewriteValueAMD64_OpScaleMaskedFloat64x2(v)
-	case OpScaleMaskedFloat64x4:
-		return rewriteValueAMD64_OpScaleMaskedFloat64x4(v)
-	case OpScaleMaskedFloat64x8:
-		return rewriteValueAMD64_OpScaleMaskedFloat64x8(v)
 	case OpSelect0:
 		return rewriteValueAMD64_OpSelect0(v)
 	case OpSelect1:
@@ -4688,42 +3380,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftConcatInt64x8:
 		v.Op = OpAMD64VPSHLDQ512
 		return true
-	case OpShiftAllLeftConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v)
-	case OpShiftAllLeftConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v)
-	case OpShiftAllLeftConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v)
-	case OpShiftAllLeftConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v)
-	case OpShiftAllLeftConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v)
-	case OpShiftAllLeftConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v)
-	case OpShiftAllLeftConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v)
-	case OpShiftAllLeftConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v)
-	case OpShiftAllLeftConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v)
-	case OpShiftAllLeftConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v)
-	case OpShiftAllLeftConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v)
-	case OpShiftAllLeftConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v)
-	case OpShiftAllLeftConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v)
-	case OpShiftAllLeftConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v)
-	case OpShiftAllLeftConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v)
-	case OpShiftAllLeftConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v)
-	case OpShiftAllLeftConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v)
-	case OpShiftAllLeftConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v)
 	case OpShiftAllLeftConcatUint16x16:
 		v.Op = OpAMD64VPSHLDW256
 		return true
@@ -4778,42 +3434,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftInt64x8:
 		v.Op = OpAMD64VPSLLQ512
 		return true
-	case OpShiftAllLeftMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
-	case OpShiftAllLeftMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v)
-	case OpShiftAllLeftMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v)
-	case OpShiftAllLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v)
-	case OpShiftAllLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v)
-	case OpShiftAllLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v)
-	case OpShiftAllLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v)
-	case OpShiftAllLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v)
-	case OpShiftAllLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v)
-	case OpShiftAllLeftMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v)
-	case OpShiftAllLeftMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v)
-	case OpShiftAllLeftMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v)
-	case OpShiftAllLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v)
-	case OpShiftAllLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v)
-	case OpShiftAllLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v)
-	case OpShiftAllLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v)
-	case OpShiftAllLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v)
-	case OpShiftAllLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v)
 	case OpShiftAllLeftUint16x16:
 		v.Op = OpAMD64VPSLLW256
 		return true
@@ -4868,42 +3488,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllRightConcatInt64x8:
 		v.Op = OpAMD64VPSHRDQ512
 		return true
-	case OpShiftAllRightConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v)
-	case OpShiftAllRightConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v)
-	case OpShiftAllRightConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v)
-	case OpShiftAllRightConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v)
-	case OpShiftAllRightConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v)
-	case OpShiftAllRightConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v)
-	case OpShiftAllRightConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v)
-	case OpShiftAllRightConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v)
-	case OpShiftAllRightConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v)
-	case OpShiftAllRightConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v)
-	case OpShiftAllRightConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v)
-	case OpShiftAllRightConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v)
-	case OpShiftAllRightConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v)
-	case OpShiftAllRightConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v)
-	case OpShiftAllRightConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v)
-	case OpShiftAllRightConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v)
-	case OpShiftAllRightConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v)
-	case OpShiftAllRightConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v)
 	case OpShiftAllRightConcatUint16x16:
 		v.Op = OpAMD64VPSHRDW256
 		return true
@@ -4958,42 +3542,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllRightInt64x8:
 		v.Op = OpAMD64VPSRAQ512
 		return true
-	case OpShiftAllRightMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
-	case OpShiftAllRightMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v)
-	case OpShiftAllRightMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v)
-	case OpShiftAllRightMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v)
-	case OpShiftAllRightMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v)
-	case OpShiftAllRightMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v)
-	case OpShiftAllRightMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v)
-	case OpShiftAllRightMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v)
-	case OpShiftAllRightMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v)
-	case OpShiftAllRightMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v)
-	case OpShiftAllRightMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v)
-	case OpShiftAllRightMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v)
-	case OpShiftAllRightMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v)
-	case OpShiftAllRightMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v)
-	case OpShiftAllRightMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v)
-	case OpShiftAllRightMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v)
-	case OpShiftAllRightMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v)
-	case OpShiftAllRightMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
 	case OpShiftAllRightUint16x16:
 		v.Op = OpAMD64VPSRLW256
 		return true
@@ -5048,42 +3596,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftLeftConcatInt64x8:
 		v.Op = OpAMD64VPSHLDVQ512
 		return true
-	case OpShiftLeftConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v)
-	case OpShiftLeftConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v)
-	case OpShiftLeftConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v)
-	case OpShiftLeftConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v)
-	case OpShiftLeftConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v)
-	case OpShiftLeftConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v)
-	case OpShiftLeftConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v)
-	case OpShiftLeftConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v)
-	case OpShiftLeftConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v)
-	case OpShiftLeftConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v)
-	case OpShiftLeftConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v)
-	case OpShiftLeftConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v)
-	case OpShiftLeftConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v)
-	case OpShiftLeftConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v)
-	case OpShiftLeftConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v)
-	case OpShiftLeftConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v)
-	case OpShiftLeftConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v)
-	case OpShiftLeftConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v)
 	case OpShiftLeftConcatUint16x16:
 		v.Op = OpAMD64VPSHLDVW256
 		return true
@@ -5138,42 +3650,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftLeftInt64x8:
 		v.Op = OpAMD64VPSLLVQ512
 		return true
-	case OpShiftLeftMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v)
-	case OpShiftLeftMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v)
-	case OpShiftLeftMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v)
-	case OpShiftLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v)
-	case OpShiftLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v)
-	case OpShiftLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v)
-	case OpShiftLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v)
-	case OpShiftLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v)
-	case OpShiftLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v)
-	case OpShiftLeftMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v)
-	case OpShiftLeftMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v)
-	case OpShiftLeftMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v)
-	case OpShiftLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v)
-	case OpShiftLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v)
-	case OpShiftLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v)
-	case OpShiftLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v)
-	case OpShiftLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v)
-	case OpShiftLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v)
 	case OpShiftLeftUint16x16:
 		v.Op = OpAMD64VPSLLVW256
 		return true
@@ -5228,42 +3704,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftRightConcatInt64x8:
 		v.Op = OpAMD64VPSHRDVQ512
 		return true
-	case OpShiftRightConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v)
-	case OpShiftRightConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v)
-	case OpShiftRightConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v)
-	case OpShiftRightConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v)
-	case OpShiftRightConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v)
-	case OpShiftRightConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v)
-	case OpShiftRightConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v)
-	case OpShiftRightConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v)
-	case OpShiftRightConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v)
-	case OpShiftRightConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v)
-	case OpShiftRightConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v)
-	case OpShiftRightConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v)
-	case OpShiftRightConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v)
-	case OpShiftRightConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v)
-	case OpShiftRightConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v)
-	case OpShiftRightConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v)
-	case OpShiftRightConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v)
-	case OpShiftRightConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v)
 	case OpShiftRightConcatUint16x16:
 		v.Op = OpAMD64VPSHRDVW256
 		return true
@@ -5318,42 +3758,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftRightInt64x8:
 		v.Op = OpAMD64VPSRAVQ512
 		return true
-	case OpShiftRightMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v)
-	case OpShiftRightMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftRightMaskedInt16x32(v)
-	case OpShiftRightMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftRightMaskedInt16x8(v)
-	case OpShiftRightMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftRightMaskedInt32x16(v)
-	case OpShiftRightMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftRightMaskedInt32x4(v)
-	case OpShiftRightMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftRightMaskedInt32x8(v)
-	case OpShiftRightMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftRightMaskedInt64x2(v)
-	case OpShiftRightMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftRightMaskedInt64x4(v)
-	case OpShiftRightMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftRightMaskedInt64x8(v)
-	case OpShiftRightMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftRightMaskedUint16x16(v)
-	case OpShiftRightMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftRightMaskedUint16x32(v)
-	case OpShiftRightMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftRightMaskedUint16x8(v)
-	case OpShiftRightMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftRightMaskedUint32x16(v)
-	case OpShiftRightMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftRightMaskedUint32x4(v)
-	case OpShiftRightMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftRightMaskedUint32x8(v)
-	case OpShiftRightMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftRightMaskedUint64x2(v)
-	case OpShiftRightMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v)
-	case OpShiftRightMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v)
 	case OpShiftRightUint16x16:
 		v.Op = OpAMD64VPSRLVW256
 		return true
@@ -5429,18 +3833,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSqrtFloat64x8:
 		v.Op = OpAMD64VSQRTPD512
 		return true
-	case OpSqrtMaskedFloat32x16:
-		return rewriteValueAMD64_OpSqrtMaskedFloat32x16(v)
-	case OpSqrtMaskedFloat32x4:
-		return rewriteValueAMD64_OpSqrtMaskedFloat32x4(v)
-	case OpSqrtMaskedFloat32x8:
-		return rewriteValueAMD64_OpSqrtMaskedFloat32x8(v)
-	case OpSqrtMaskedFloat64x2:
-		return rewriteValueAMD64_OpSqrtMaskedFloat64x2(v)
-	case OpSqrtMaskedFloat64x4:
-		return rewriteValueAMD64_OpSqrtMaskedFloat64x4(v)
-	case OpSqrtMaskedFloat64x8:
-		return rewriteValueAMD64_OpSqrtMaskedFloat64x8(v)
 	case OpStaticCall:
 		v.Op = OpAMD64CALLstatic
 		return true
@@ -5550,66 +3942,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSubInt8x64:
 		v.Op = OpAMD64VPSUBB512
 		return true
-	case OpSubMaskedFloat32x16:
-		return rewriteValueAMD64_OpSubMaskedFloat32x16(v)
-	case OpSubMaskedFloat32x4:
-		return rewriteValueAMD64_OpSubMaskedFloat32x4(v)
-	case OpSubMaskedFloat32x8:
-		return rewriteValueAMD64_OpSubMaskedFloat32x8(v)
-	case OpSubMaskedFloat64x2:
-		return rewriteValueAMD64_OpSubMaskedFloat64x2(v)
-	case OpSubMaskedFloat64x4:
-		return rewriteValueAMD64_OpSubMaskedFloat64x4(v)
-	case OpSubMaskedFloat64x8:
-		return rewriteValueAMD64_OpSubMaskedFloat64x8(v)
-	case OpSubMaskedInt16x16:
-		return rewriteValueAMD64_OpSubMaskedInt16x16(v)
-	case OpSubMaskedInt16x32:
-		return rewriteValueAMD64_OpSubMaskedInt16x32(v)
-	case OpSubMaskedInt16x8:
-		return rewriteValueAMD64_OpSubMaskedInt16x8(v)
-	case OpSubMaskedInt32x16:
-		return rewriteValueAMD64_OpSubMaskedInt32x16(v)
-	case OpSubMaskedInt32x4:
-		return rewriteValueAMD64_OpSubMaskedInt32x4(v)
-	case OpSubMaskedInt32x8:
-		return rewriteValueAMD64_OpSubMaskedInt32x8(v)
-	case OpSubMaskedInt64x2:
-		return rewriteValueAMD64_OpSubMaskedInt64x2(v)
-	case OpSubMaskedInt64x4:
-		return rewriteValueAMD64_OpSubMaskedInt64x4(v)
-	case OpSubMaskedInt64x8:
-		return rewriteValueAMD64_OpSubMaskedInt64x8(v)
-	case OpSubMaskedInt8x16:
-		return rewriteValueAMD64_OpSubMaskedInt8x16(v)
-	case OpSubMaskedInt8x32:
-		return rewriteValueAMD64_OpSubMaskedInt8x32(v)
-	case OpSubMaskedInt8x64:
-		return rewriteValueAMD64_OpSubMaskedInt8x64(v)
-	case OpSubMaskedUint16x16:
-		return rewriteValueAMD64_OpSubMaskedUint16x16(v)
-	case OpSubMaskedUint16x32:
-		return rewriteValueAMD64_OpSubMaskedUint16x32(v)
-	case OpSubMaskedUint16x8:
-		return rewriteValueAMD64_OpSubMaskedUint16x8(v)
-	case OpSubMaskedUint32x16:
-		return rewriteValueAMD64_OpSubMaskedUint32x16(v)
-	case OpSubMaskedUint32x4:
-		return rewriteValueAMD64_OpSubMaskedUint32x4(v)
-	case OpSubMaskedUint32x8:
-		return rewriteValueAMD64_OpSubMaskedUint32x8(v)
-	case OpSubMaskedUint64x2:
-		return rewriteValueAMD64_OpSubMaskedUint64x2(v)
-	case OpSubMaskedUint64x4:
-		return rewriteValueAMD64_OpSubMaskedUint64x4(v)
-	case OpSubMaskedUint64x8:
-		return rewriteValueAMD64_OpSubMaskedUint64x8(v)
-	case OpSubMaskedUint8x16:
-		return rewriteValueAMD64_OpSubMaskedUint8x16(v)
-	case OpSubMaskedUint8x32:
-		return rewriteValueAMD64_OpSubMaskedUint8x32(v)
-	case OpSubMaskedUint8x64:
-		return rewriteValueAMD64_OpSubMaskedUint8x64(v)
 	case OpSubPairsFloat32x4:
 		v.Op = OpAMD64VHSUBPS128
 		return true
@@ -5673,30 +4005,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSubSaturatedInt8x64:
 		v.Op = OpAMD64VPSUBSB512
 		return true
-	case OpSubSaturatedMaskedInt16x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v)
-	case OpSubSaturatedMaskedInt16x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v)
-	case OpSubSaturatedMaskedInt16x8:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v)
-	case OpSubSaturatedMaskedInt8x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v)
-	case OpSubSaturatedMaskedInt8x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v)
-	case OpSubSaturatedMaskedInt8x64:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v)
-	case OpSubSaturatedMaskedUint16x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v)
-	case OpSubSaturatedMaskedUint16x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v)
-	case OpSubSaturatedMaskedUint16x8:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v)
-	case OpSubSaturatedMaskedUint8x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v)
-	case OpSubSaturatedMaskedUint8x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v)
-	case OpSubSaturatedMaskedUint8x64:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v)
 	case OpSubSaturatedUint16x16:
 		v.Op = OpAMD64VPSUBUSW256
 		return true
@@ -5794,18 +4102,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpTruncScaledFloat64x4(v)
 	case OpTruncScaledFloat64x8:
 		return rewriteValueAMD64_OpTruncScaledFloat64x8(v)
-	case OpTruncScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v)
-	case OpTruncScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v)
-	case OpTruncScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v)
-	case OpTruncScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v)
-	case OpTruncScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v)
-	case OpTruncScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v)
 	case OpTruncScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v)
 	case OpTruncScaledResidueFloat32x4:
@@ -5818,18 +4114,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v)
 	case OpTruncScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v)
-	case OpTruncScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v)
-	case OpTruncScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v)
-	case OpTruncScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v)
-	case OpTruncScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v)
-	case OpTruncScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v)
-	case OpTruncScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v)
 	case OpWB:
 		v.Op = OpAMD64LoweredWB
 		return true
@@ -5881,30 +4165,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpXorInt8x64:
 		v.Op = OpAMD64VPXORD512
 		return true
-	case OpXorMaskedInt32x16:
-		return rewriteValueAMD64_OpXorMaskedInt32x16(v)
-	case OpXorMaskedInt32x4:
-		return rewriteValueAMD64_OpXorMaskedInt32x4(v)
-	case OpXorMaskedInt32x8:
-		return rewriteValueAMD64_OpXorMaskedInt32x8(v)
-	case OpXorMaskedInt64x2:
-		return rewriteValueAMD64_OpXorMaskedInt64x2(v)
-	case OpXorMaskedInt64x4:
-		return rewriteValueAMD64_OpXorMaskedInt64x4(v)
-	case OpXorMaskedInt64x8:
-		return rewriteValueAMD64_OpXorMaskedInt64x8(v)
-	case OpXorMaskedUint32x16:
-		return rewriteValueAMD64_OpXorMaskedUint32x16(v)
-	case OpXorMaskedUint32x4:
-		return rewriteValueAMD64_OpXorMaskedUint32x4(v)
-	case OpXorMaskedUint32x8:
-		return rewriteValueAMD64_OpXorMaskedUint32x8(v)
-	case OpXorMaskedUint64x2:
-		return rewriteValueAMD64_OpXorMaskedUint64x2(v)
-	case OpXorMaskedUint64x4:
-		return rewriteValueAMD64_OpXorMaskedUint64x4(v)
-	case OpXorMaskedUint64x8:
-		return rewriteValueAMD64_OpXorMaskedUint64x8(v)
 	case OpXorUint16x16:
 		v.Op = OpAMD64VPXOR256
 		return true
@@ -27893,66 +26153,6 @@ func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLDMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSLLDMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLDMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSLLDMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLDMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSLLDMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28007,66 +26207,6 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLQMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSLLQMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLQMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSLLQMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLQMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSLLQMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28121,66 +26261,6 @@ func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLWMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSLLWMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLWMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSLLWMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLWMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSLLWMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28235,66 +26315,6 @@ func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRADMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSRADMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRADMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSRADMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRADMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSRADMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28349,66 +26369,6 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAQMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSRAQMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAQMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSRAQMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAQMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSRAQMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28463,66 +26423,6 @@ func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAWMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSRAWMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAWMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSRAWMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAWMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSRAWMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -29423,1134 +27323,6 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAbsMaskedInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt16x16 x mask)
-	// result: (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt16x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt16x32 x mask)
-	// result: (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt16x8 x mask)
-	// result: (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt32x16 x mask)
-	// result: (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt32x4 x mask)
-	// result: (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt32x8 x mask)
-	// result: (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt64x2 x mask)
-	// result: (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt64x4 x mask)
-	// result: (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt64x8 x mask)
-	// result: (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt8x16 x mask)
-	// result: (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt8x32 x mask)
-	// result: (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt8x64(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt8x64 x mask)
-	// result: (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdPairsSaturatedMaskedInt32x16 x y z mask)
-	// result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPWSSDSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdPairsSaturatedMaskedInt32x4 x y z mask)
-	// result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPWSSDSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdPairsSaturatedMaskedInt32x8 x y z mask)
-	// result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPWSSDSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleMaskedInt32x16 x y z mask)
-	// result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleMaskedInt32x4 x y z mask)
-	// result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleMaskedInt32x8 x y z mask)
-	// result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask)
-	// result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask)
-	// result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask)
-	// result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat32x16 x y mask)
-	// result: (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat32x4 x y mask)
-	// result: (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat32x8 x y mask)
-	// result: (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat64x2 x y mask)
-	// result: (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat64x4 x y mask)
-	// result: (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat64x8 x y mask)
-	// result: (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt16x16 x y mask)
-	// result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt16x32 x y mask)
-	// result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt16x8 x y mask)
-	// result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt32x16 x y mask)
-	// result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt32x4 x y mask)
-	// result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt32x8 x y mask)
-	// result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt64x2 x y mask)
-	// result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt64x4 x y mask)
-	// result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt64x8 x y mask)
-	// result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt8x16 x y mask)
-	// result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt8x32 x y mask)
-	// result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt8x64 x y mask)
-	// result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint16x16 x y mask)
-	// result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint16x32 x y mask)
-	// result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint16x8 x y mask)
-	// result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint32x16 x y mask)
-	// result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint32x4 x y mask)
-	// result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint32x8 x y mask)
-	// result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint64x2 x y mask)
-	// result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint64x4 x y mask)
-	// result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint64x8 x y mask)
-	// result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint8x16 x y mask)
-	// result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint8x32 x y mask)
-	// result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint8x64 x y mask)
-	// result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt16x16 x y mask)
-	// result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt16x32 x y mask)
-	// result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt16x8 x y mask)
-	// result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt8x16 x y mask)
-	// result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt8x32 x y mask)
-	// result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt8x64 x y mask)
-	// result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint16x16 x y mask)
-	// result: (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint16x32 x y mask)
-	// result: (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint16x8 x y mask)
-	// result: (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint8x16 x y mask)
-	// result: (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint8x32 x y mask)
-	// result: (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint8x64 x y mask)
-	// result: (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpAddr(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (Addr {sym} base)
@@ -30564,438 +27336,6 @@ func rewriteValueAMD64_OpAddr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpAndMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt32x16 x y mask)
-	// result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt32x4 x y mask)
-	// result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt32x8 x y mask)
-	// result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt64x2 x y mask)
-	// result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt64x4 x y mask)
-	// result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt64x8 x y mask)
-	// result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint32x16 x y mask)
-	// result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint32x4 x y mask)
-	// result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint32x8 x y mask)
-	// result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint64x2 x y mask)
-	// result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint64x4 x y mask)
-	// result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint64x8 x y mask)
-	// result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt32x16 x y mask)
-	// result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt32x4 x y mask)
-	// result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt32x8 x y mask)
-	// result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt64x2 x y mask)
-	// result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt64x4 x y mask)
-	// result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt64x8 x y mask)
-	// result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint32x16 x y mask)
-	// result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint32x4 x y mask)
-	// result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint32x8 x y mask)
-	// result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint64x2 x y mask)
-	// result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint64x4 x y mask)
-	// result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint64x8 x y mask)
-	// result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -31361,114 +27701,6 @@ func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpAverageMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint16x16 x y mask)
-	// result: (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint16x32 x y mask)
-	// result: (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint16x8 x y mask)
-	// result: (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint8x16 x y mask)
-	// result: (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint8x32 x y mask)
-	// result: (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint8x64 x y mask)
-	// result: (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpBitLen16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -31646,486 +27878,6 @@ func rewriteValueAMD64_OpBitLen8(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedFloat32x4 x mask)
-	// result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedFloat64x2 x mask)
-	// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt16x8 x mask)
-	// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt32x4 x mask)
-	// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt64x2 x mask)
-	// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt8x16 x mask)
-	// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint16x8 x mask)
-	// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint32x4 x mask)
-	// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint64x2 x mask)
-	// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint8x16 x mask)
-	// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedFloat32x4 x mask)
-	// result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedFloat64x2 x mask)
-	// result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt16x8 x mask)
-	// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt32x4 x mask)
-	// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt64x2 x mask)
-	// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt8x16 x mask)
-	// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint16x8 x mask)
-	// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint32x4 x mask)
-	// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint64x2 x mask)
-	// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint8x16 x mask)
-	// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedFloat32x4 x mask)
-	// result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedFloat64x2 x mask)
-	// result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt16x8 x mask)
-	// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt32x4 x mask)
-	// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt64x2 x mask)
-	// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt8x16 x mask)
-	// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint16x8 x mask)
-	// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint32x4 x mask)
-	// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint64x2 x mask)
-	// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint8x16 x mask)
-	// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpBswap16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (Bswap16 x)
@@ -32276,114 +28028,6 @@ func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (CeilScaledResidueFloat32x16 [a] x)
@@ -32462,114 +28106,6 @@ func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -33961,102 +29497,6 @@ func rewriteValueAMD64_OpConstNil(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToInt32MaskedFloat32x16 x mask)
-	// result: (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTTPS2DQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToInt32MaskedFloat32x4 x mask)
-	// result: (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTTPS2DQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToInt32MaskedFloat32x8 x mask)
-	// result: (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTTPS2DQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToUint32MaskedFloat32x16 x mask)
-	// result: (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTPS2UDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToUint32MaskedFloat32x4 x mask)
-	// result: (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTPS2UDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToUint32MaskedFloat32x8 x mask)
-	// result: (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTPS2UDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpCtz16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -34813,222 +30253,6 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpDivMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat32x16 x y mask)
-	// result: (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat32x4 x y mask)
-	// result: (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat32x8 x y mask)
-	// result: (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat64x2 x y mask)
-	// result: (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat64x4 x y mask)
-	// result: (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat64x8 x y mask)
-	// result: (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsMaskedInt16x16 x y mask)
-	// result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDWDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsMaskedInt16x32 x y mask)
-	// result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDWDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsMaskedInt16x8 x y mask)
-	// result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDWDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsSaturatedMaskedUint8x16 x y mask)
-	// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDUBSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsSaturatedMaskedUint8x32 x y mask)
-	// result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDUBSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsSaturatedMaskedUint8x64 x y mask)
-	// result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDUBSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpEq16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -35317,666 +30541,6 @@ func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -36678,114 +31242,6 @@ func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (FloorScaledResidueFloat32x16 [a] x)
@@ -36864,288 +31320,6 @@ func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask)
-	// result: (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask)
-	// result: (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask)
-	// result: (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask)
-	// result: (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEQBMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask)
-	// result: (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEQBMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask)
-	// result: (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEQBMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldMulMaskedUint8x16 x y mask)
-	// result: (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8MULBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldMulMaskedUint8x32 x y mask)
-	// result: (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8MULBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldMulMaskedUint8x64 x y mask)
-	// result: (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8MULBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGetG(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (GetG mem)
@@ -37806,666 +31980,6 @@ func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38698,666 +32212,6 @@ func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -39555,138 +32409,6 @@ func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpIsNonNil(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -40201,666 +32923,6 @@ func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41097,666 +33159,6 @@ func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -43056,546 +34458,6 @@ func rewriteValueAMD64_OpMax64F(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpMaxMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat32x16 x y mask)
-	// result: (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat32x4 x y mask)
-	// result: (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat32x8 x y mask)
-	// result: (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat64x2 x y mask)
-	// result: (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat64x4 x y mask)
-	// result: (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat64x8 x y mask)
-	// result: (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt16x16 x y mask)
-	// result: (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt16x32 x y mask)
-	// result: (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt16x8 x y mask)
-	// result: (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt32x16 x y mask)
-	// result: (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt32x4 x y mask)
-	// result: (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt32x8 x y mask)
-	// result: (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt64x2 x y mask)
-	// result: (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt64x4 x y mask)
-	// result: (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt64x8 x y mask)
-	// result: (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt8x16 x y mask)
-	// result: (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt8x32 x y mask)
-	// result: (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt8x64 x y mask)
-	// result: (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint16x16 x y mask)
-	// result: (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint16x32 x y mask)
-	// result: (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint16x8 x y mask)
-	// result: (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint32x16 x y mask)
-	// result: (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint32x4 x y mask)
-	// result: (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint32x8 x y mask)
-	// result: (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint64x2 x y mask)
-	// result: (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint64x4 x y mask)
-	// result: (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint64x8 x y mask)
-	// result: (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint8x16 x y mask)
-	// result: (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint8x32 x y mask)
-	// result: (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint8x64 x y mask)
-	// result: (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpMin32F(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -43634,546 +34496,6 @@ func rewriteValueAMD64_OpMin64F(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpMinMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat32x16 x y mask)
-	// result: (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat32x4 x y mask)
-	// result: (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat32x8 x y mask)
-	// result: (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat64x2 x y mask)
-	// result: (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat64x4 x y mask)
-	// result: (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat64x8 x y mask)
-	// result: (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt16x16 x y mask)
-	// result: (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt16x32 x y mask)
-	// result: (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt16x8 x y mask)
-	// result: (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt32x16 x y mask)
-	// result: (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt32x4 x y mask)
-	// result: (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt32x8 x y mask)
-	// result: (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt64x2 x y mask)
-	// result: (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt64x4 x y mask)
-	// result: (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt64x8 x y mask)
-	// result: (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt8x16 x y mask)
-	// result: (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt8x32 x y mask)
-	// result: (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt8x64 x y mask)
-	// result: (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint16x16 x y mask)
-	// result: (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint16x32 x y mask)
-	// result: (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint16x8 x y mask)
-	// result: (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint32x16 x y mask)
-	// result: (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint32x4 x y mask)
-	// result: (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint32x8 x y mask)
-	// result: (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint64x2 x y mask)
-	// result: (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint64x4 x y mask)
-	// result: (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint64x8 x y mask)
-	// result: (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint8x16 x y mask)
-	// result: (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint8x32 x y mask)
-	// result: (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint8x64 x y mask)
-	// result: (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpMod16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -44683,906 +35005,6 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpMulAddMaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat32x16 x y z mask)
-	// result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat32x4 x y z mask)
-	// result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat32x8 x y z mask)
-	// result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat64x2 x y z mask)
-	// result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat64x4 x y z mask)
-	// result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat64x8 x y z mask)
-	// result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat32x16 x y z mask)
-	// result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat32x4 x y z mask)
-	// result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat32x8 x y z mask)
-	// result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat64x2 x y z mask)
-	// result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat64x4 x y z mask)
-	// result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat64x8 x y z mask)
-	// result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedInt16x16 x y mask)
-	// result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedInt16x32 x y mask)
-	// result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedInt16x8 x y mask)
-	// result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedUint16x16 x y mask)
-	// result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHUWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedUint16x32 x y mask)
-	// result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHUWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedUint16x8 x y mask)
-	// result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHUWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat32x16 x y mask)
-	// result: (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat32x4 x y mask)
-	// result: (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat32x8 x y mask)
-	// result: (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat64x2 x y mask)
-	// result: (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat64x4 x y mask)
-	// result: (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat64x8 x y mask)
-	// result: (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt16x16 x y mask)
-	// result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt16x32 x y mask)
-	// result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt16x8 x y mask)
-	// result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt32x16 x y mask)
-	// result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt32x4 x y mask)
-	// result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt32x8 x y mask)
-	// result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt64x2 x y mask)
-	// result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt64x4 x y mask)
-	// result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt64x8 x y mask)
-	// result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint16x16 x y mask)
-	// result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint16x32 x y mask)
-	// result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint16x8 x y mask)
-	// result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint32x16 x y mask)
-	// result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint32x4 x y mask)
-	// result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint32x8 x y mask)
-	// result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint64x2 x y mask)
-	// result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint64x4 x y mask)
-	// result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint64x8 x y mask)
-	// result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat32x16 x y z mask)
-	// result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat32x4 x y z mask)
-	// result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat32x8 x y z mask)
-	// result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat64x2 x y z mask)
-	// result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat64x4 x y z mask)
-	// result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat64x8 x y z mask)
-	// result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNeg32F(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -45917,666 +35339,6 @@ func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -46679,1638 +35441,6 @@ func rewriteValueAMD64_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpOnesCountMaskedInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt16x16 x mask)
-	// result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt16x32 x mask)
-	// result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt16x8 x mask)
-	// result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt32x16 x mask)
-	// result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt32x4 x mask)
-	// result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt32x8 x mask)
-	// result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt64x2 x mask)
-	// result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt64x4 x mask)
-	// result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt64x8 x mask)
-	// result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt8x16 x mask)
-	// result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt8x32 x mask)
-	// result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x64(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt8x64 x mask)
-	// result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint16x16 x mask)
-	// result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint16x32 x mask)
-	// result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint16x8 x mask)
-	// result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint32x16 x mask)
-	// result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint32x4 x mask)
-	// result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint32x8 x mask)
-	// result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint64x2 x mask)
-	// result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint64x4 x mask)
-	// result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint64x8 x mask)
-	// result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint8x16 x mask)
-	// result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint8x32 x mask)
-	// result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint8x64(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint8x64 x mask)
-	// result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt32x16 x y mask)
-	// result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt32x4 x y mask)
-	// result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt32x8 x y mask)
-	// result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt64x2 x y mask)
-	// result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt64x4 x y mask)
-	// result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt64x8 x y mask)
-	// result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint32x16 x y mask)
-	// result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint32x4 x y mask)
-	// result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint32x8 x y mask)
-	// result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint64x2 x y mask)
-	// result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint64x4 x y mask)
-	// result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint64x8 x y mask)
-	// result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat32x16 x y z mask)
-	// result: (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat32x4 x y z mask)
-	// result: (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat32x8 x y z mask)
-	// result: (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat64x2 x y z mask)
-	// result: (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat64x4 x y z mask)
-	// result: (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat64x8 x y z mask)
-	// result: (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt16x16 x y z mask)
-	// result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt16x32 x y z mask)
-	// result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt16x8 x y z mask)
-	// result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt32x16 x y z mask)
-	// result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt32x4 x y z mask)
-	// result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt32x8 x y z mask)
-	// result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt64x2 x y z mask)
-	// result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt64x4 x y z mask)
-	// result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt64x8 x y z mask)
-	// result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt8x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt8x16 x y z mask)
-	// result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt8x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt8x32 x y z mask)
-	// result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt8x64(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt8x64 x y z mask)
-	// result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint16x16 x y z mask)
-	// result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint16x32 x y z mask)
-	// result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint16x8 x y z mask)
-	// result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint32x16 x y z mask)
-	// result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint32x4 x y z mask)
-	// result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint32x8 x y z mask)
-	// result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint64x2 x y z mask)
-	// result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint64x4 x y z mask)
-	// result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint64x8 x y z mask)
-	// result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint8x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint8x16 x y z mask)
-	// result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint8x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint8x32 x y z mask)
-	// result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint8x64(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint8x64 x y z mask)
-	// result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat32x16 x y mask)
-	// result: (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat32x8 x y mask)
-	// result: (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat64x4 x y mask)
-	// result: (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat64x8 x y mask)
-	// result: (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt16x16 x y mask)
-	// result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt16x32 x y mask)
-	// result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt16x8 x y mask)
-	// result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt32x16 x y mask)
-	// result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt32x8 x y mask)
-	// result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt64x4 x y mask)
-	// result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt64x8 x y mask)
-	// result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt8x16 x y mask)
-	// result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt8x32 x y mask)
-	// result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt8x64 x y mask)
-	// result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint16x16 x y mask)
-	// result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint16x32 x y mask)
-	// result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint16x8 x y mask)
-	// result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint32x16 x y mask)
-	// result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint32x8 x y mask)
-	// result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint64x4 x y mask)
-	// result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint64x8 x y mask)
-	// result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint8x16 x y mask)
-	// result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint8x32 x y mask)
-	// result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint8x64 x y mask)
-	// result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -48341,1062 +35471,6 @@ func rewriteValueAMD64_OpPopCount8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat32x16 x mask)
-	// result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat32x4 x mask)
-	// result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat32x8 x mask)
-	// result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat64x2 x mask)
-	// result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat64x4 x mask)
-	// result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat64x8 x mask)
-	// result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat32x16 x mask)
-	// result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat32x4 x mask)
-	// result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat32x8 x mask)
-	// result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat64x2 x mask)
-	// result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat64x4 x mask)
-	// result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat64x8 x mask)
-	// result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt32x16 [a] x mask)
-	// result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt32x4 [a] x mask)
-	// result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt32x8 [a] x mask)
-	// result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt64x2 [a] x mask)
-	// result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt64x4 [a] x mask)
-	// result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt64x8 [a] x mask)
-	// result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint32x16 [a] x mask)
-	// result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint32x4 [a] x mask)
-	// result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint32x8 [a] x mask)
-	// result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint64x2 [a] x mask)
-	// result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint64x4 [a] x mask)
-	// result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint64x8 [a] x mask)
-	// result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt32x16 [a] x mask)
-	// result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt32x4 [a] x mask)
-	// result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt32x8 [a] x mask)
-	// result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt64x2 [a] x mask)
-	// result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt64x4 [a] x mask)
-	// result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt64x8 [a] x mask)
-	// result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint32x16 [a] x mask)
-	// result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint32x4 [a] x mask)
-	// result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint32x8 [a] x mask)
-	// result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint64x2 [a] x mask)
-	// result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint64x4 [a] x mask)
-	// result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint64x8 [a] x mask)
-	// result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt32x16 x y mask)
-	// result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt32x4 x y mask)
-	// result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt32x8 x y mask)
-	// result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt64x2 x y mask)
-	// result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt64x4 x y mask)
-	// result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt64x8 x y mask)
-	// result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint32x16 x y mask)
-	// result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint32x4 x y mask)
-	// result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint32x8 x y mask)
-	// result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint64x2 x y mask)
-	// result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint64x4 x y mask)
-	// result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint64x8 x y mask)
-	// result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt32x16 x y mask)
-	// result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt32x4 x y mask)
-	// result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt32x8 x y mask)
-	// result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt64x2 x y mask)
-	// result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt64x4 x y mask)
-	// result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt64x8 x y mask)
-	// result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint32x16 x y mask)
-	// result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint32x4 x y mask)
-	// result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint32x8 x y mask)
-	// result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint64x2 x y mask)
-	// result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint64x4 x y mask)
-	// result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint64x8 x y mask)
-	// result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (RoundToEven x)
@@ -49535,114 +35609,6 @@ func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (RoundToEvenScaledResidueFloat32x16 [a] x)
@@ -49721,114 +35687,6 @@ func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -51157,114 +37015,6 @@ func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpScaleMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat32x16 x y mask)
-	// result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat32x4 x y mask)
-	// result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat32x8 x y mask)
-	// result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat64x2 x y mask)
-	// result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat64x4 x y mask)
-	// result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat64x8 x y mask)
-	// result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpSelect0(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -52250,2742 +38000,6 @@ func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
-	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
-	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
-	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
-	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
-	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
-	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
-	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
-	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
-	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
-	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
-	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
-	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
-	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
-	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
-	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
-	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
-	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
-	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt16x16 x y mask)
-	// result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt16x32 x y mask)
-	// result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt16x8 x y mask)
-	// result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt32x16 x y mask)
-	// result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt32x4 x y mask)
-	// result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt32x8 x y mask)
-	// result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt64x2 x y mask)
-	// result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt64x4 x y mask)
-	// result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt64x8 x y mask)
-	// result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint16x16 x y mask)
-	// result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint16x32 x y mask)
-	// result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint16x8 x y mask)
-	// result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint32x16 x y mask)
-	// result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint32x4 x y mask)
-	// result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint32x8 x y mask)
-	// result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint64x2 x y mask)
-	// result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint64x4 x y mask)
-	// result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint64x8 x y mask)
-	// result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
-	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
-	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
-	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
-	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
-	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
-	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
-	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
-	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
-	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
-	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
-	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
-	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
-	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
-	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
-	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
-	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
-	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
-	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt16x16 x y mask)
-	// result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt16x32 x y mask)
-	// result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt16x8 x y mask)
-	// result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt32x16 x y mask)
-	// result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt32x4 x y mask)
-	// result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt32x8 x y mask)
-	// result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt64x2 x y mask)
-	// result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt64x4 x y mask)
-	// result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt64x8 x y mask)
-	// result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint16x16 x y mask)
-	// result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint16x32 x y mask)
-	// result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint16x8 x y mask)
-	// result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint32x16 x y mask)
-	// result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint32x4 x y mask)
-	// result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint32x8 x y mask)
-	// result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x2 x y mask)
-	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x4 x y mask)
-	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x8 x y mask)
-	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt16x16 x y z mask)
-	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt16x32 x y z mask)
-	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt16x8 x y z mask)
-	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt32x16 x y z mask)
-	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt32x4 x y z mask)
-	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt32x8 x y z mask)
-	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt64x2 x y z mask)
-	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt64x4 x y z mask)
-	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt64x8 x y z mask)
-	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint16x16 x y z mask)
-	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint16x32 x y z mask)
-	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint16x8 x y z mask)
-	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint32x16 x y z mask)
-	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint32x4 x y z mask)
-	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint32x8 x y z mask)
-	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint64x2 x y z mask)
-	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint64x4 x y z mask)
-	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint64x8 x y z mask)
-	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt16x16 x y mask)
-	// result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt16x32 x y mask)
-	// result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt16x8 x y mask)
-	// result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt32x16 x y mask)
-	// result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt32x4 x y mask)
-	// result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt32x8 x y mask)
-	// result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt64x2 x y mask)
-	// result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt64x4 x y mask)
-	// result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt64x8 x y mask)
-	// result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint16x16 x y mask)
-	// result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint16x32 x y mask)
-	// result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint16x8 x y mask)
-	// result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint32x16 x y mask)
-	// result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint32x4 x y mask)
-	// result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint32x8 x y mask)
-	// result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint64x2 x y mask)
-	// result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint64x4 x y mask)
-	// result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint64x8 x y mask)
-	// result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt16x16 x y z mask)
-	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt16x32 x y z mask)
-	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt16x8 x y z mask)
-	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt32x16 x y z mask)
-	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt32x4 x y z mask)
-	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt32x8 x y z mask)
-	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt64x2 x y z mask)
-	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt64x4 x y z mask)
-	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt64x8 x y z mask)
-	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint16x16 x y z mask)
-	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint16x32 x y z mask)
-	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint16x8 x y z mask)
-	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint32x16 x y z mask)
-	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint32x4 x y z mask)
-	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint32x8 x y z mask)
-	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint64x2 x y z mask)
-	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint64x4 x y z mask)
-	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint64x8 x y z mask)
-	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt16x16 x y mask)
-	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt16x32 x y mask)
-	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt16x8 x y mask)
-	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt32x16 x y mask)
-	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt32x4 x y mask)
-	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt32x8 x y mask)
-	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt64x2 x y mask)
-	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt64x4 x y mask)
-	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt64x8 x y mask)
-	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint16x16 x y mask)
-	// result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint16x32 x y mask)
-	// result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint16x8 x y mask)
-	// result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint32x16 x y mask)
-	// result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint32x4 x y mask)
-	// result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint32x8 x y mask)
-	// result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint64x2 x y mask)
-	// result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint64x4 x y mask)
-	// result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint64x8 x y mask)
-	// result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpSlicemask(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -55040,102 +38054,6 @@ func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpSqrtMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat32x16 x mask)
-	// result: (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat32x4 x mask)
-	// result: (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat32x8 x mask)
-	// result: (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat64x2 x mask)
-	// result: (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat64x4 x mask)
-	// result: (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat64x8 x mask)
-	// result: (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpStore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -55673,762 +38591,6 @@ func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat32x16 x y mask)
-	// result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat32x4 x y mask)
-	// result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat32x8 x y mask)
-	// result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat64x2 x y mask)
-	// result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat64x4 x y mask)
-	// result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat64x8 x y mask)
-	// result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt16x16 x y mask)
-	// result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt16x32 x y mask)
-	// result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt16x8 x y mask)
-	// result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt32x16 x y mask)
-	// result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt32x4 x y mask)
-	// result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt32x8 x y mask)
-	// result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt64x2 x y mask)
-	// result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt64x4 x y mask)
-	// result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt64x8 x y mask)
-	// result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt8x16 x y mask)
-	// result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt8x32 x y mask)
-	// result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt8x64 x y mask)
-	// result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint16x16 x y mask)
-	// result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint16x32 x y mask)
-	// result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint16x8 x y mask)
-	// result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint32x16 x y mask)
-	// result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint32x4 x y mask)
-	// result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint32x8 x y mask)
-	// result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint64x2 x y mask)
-	// result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint64x4 x y mask)
-	// result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint64x8 x y mask)
-	// result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint8x16 x y mask)
-	// result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint8x32 x y mask)
-	// result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint8x64 x y mask)
-	// result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt16x16 x y mask)
-	// result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt16x32 x y mask)
-	// result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt16x8 x y mask)
-	// result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt8x16 x y mask)
-	// result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt8x32 x y mask)
-	// result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt8x64 x y mask)
-	// result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint16x16 x y mask)
-	// result: (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint16x32 x y mask)
-	// result: (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint16x8 x y mask)
-	// result: (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint8x16 x y mask)
-	// result: (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint8x32 x y mask)
-	// result: (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint8x64 x y mask)
-	// result: (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpTrunc(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (Trunc x)
@@ -56567,114 +38729,6 @@ func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (TruncScaledResidueFloat32x16 [a] x)
@@ -56753,330 +38807,6 @@ func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt32x16 x y mask)
-	// result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt32x4 x y mask)
-	// result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt32x8 x y mask)
-	// result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt64x2 x y mask)
-	// result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt64x4 x y mask)
-	// result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt64x8 x y mask)
-	// result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint32x16 x y mask)
-	// result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint32x4 x y mask)
-	// result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint32x8 x y mask)
-	// result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint64x2 x y mask)
-	// result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint64x4 x y mask)
-	// result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint64x8 x y mask)
-	// result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpZero(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 90149300b2c..e6c6874bddc 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -24,18 +24,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int64x2.Abs", opLen1(ssa.OpAbsInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.Abs", opLen1(ssa.OpAbsInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.Abs", opLen1(ssa.OpAbsInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
@@ -69,51 +57,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.AddMasked", opLen3(ssa.OpAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.AddMasked", opLen3(ssa.OpAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.AddMasked", opLen3(ssa.OpAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddMasked", opLen3(ssa.OpAddMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddMasked", opLen3(ssa.OpAddMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddMasked", opLen3(ssa.OpAddMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.AddMasked", opLen3(ssa.OpAddMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AddMasked", opLen3(ssa.OpAddMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.AddMasked", opLen3(ssa.OpAddMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AddMasked", opLen3(ssa.OpAddMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AddMasked", opLen3(ssa.OpAddMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AddMasked", opLen3(ssa.OpAddMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AddMasked", opLen3(ssa.OpAddMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AddMasked", opLen3(ssa.OpAddMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AddMasked", opLen3(ssa.OpAddMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AddMasked", opLen3(ssa.OpAddMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AddMasked", opLen3(ssa.OpAddMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.AddMasked", opLen3(ssa.OpAddMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AddMasked", opLen3(ssa.OpAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AddMasked", opLen3(ssa.OpAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.AddMasked", opLen3(ssa.OpAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AddMasked", opLen3(ssa.OpAddMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AddMasked", opLen3(ssa.OpAddMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AddMasked", opLen3(ssa.OpAddMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -140,18 +89,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
@@ -180,18 +117,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AndMasked", opLen3(ssa.OpAndMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AndMasked", opLen3(ssa.OpAndMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AndMasked", opLen3(ssa.OpAndMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AndMasked", opLen3(ssa.OpAndMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AndMasked", opLen3(ssa.OpAndMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AndMasked", opLen3(ssa.OpAndMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AndMasked", opLen3(ssa.OpAndMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AndMasked", opLen3(ssa.OpAndMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AndMasked", opLen3(ssa.OpAndMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.AndNot", opLen2_21(ssa.OpAndNotInt8x64, types.TypeVec512), sys.AMD64)
@@ -216,30 +141,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.AndNot", opLen2_21(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.AndNot", opLen2_21(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.AndNot", opLen2_21(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
@@ -250,16 +157,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
@@ -270,16 +167,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
@@ -290,16 +177,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
@@ -310,24 +187,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
@@ -361,15 +226,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
@@ -382,24 +241,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.DivMasked", opLen3(ssa.OpDivMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.DivMasked", opLen3(ssa.OpDivMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.DivMasked", opLen3(ssa.OpDivMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
@@ -430,36 +277,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
@@ -500,42 +317,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x16, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x32, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x64, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.GetElem", opLen1Imm8(ssa.OpGetElemFloat32x4, types.Types[types.TFLOAT32], 0), sys.AMD64)
 	addF(simdPackage, "Float64x2.GetElem", opLen1Imm8(ssa.OpGetElemFloat64x2, types.Types[types.TFLOAT64], 0), sys.AMD64)
 	addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
@@ -622,78 +418,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
@@ -722,66 +452,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.LessMasked", opLen3(ssa.OpLessMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.LessMasked", opLen3(ssa.OpLessMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.LessMasked", opLen3(ssa.OpLessMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.LessMasked", opLen3(ssa.OpLessMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.LessMasked", opLen3(ssa.OpLessMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.LessMasked", opLen3(ssa.OpLessMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.LessMasked", opLen3(ssa.OpLessMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.LessMasked", opLen3(ssa.OpLessMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.LessMasked", opLen3(ssa.OpLessMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.LessMasked", opLen3(ssa.OpLessMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.LessMasked", opLen3(ssa.OpLessMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.LessMasked", opLen3(ssa.OpLessMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.LessMasked", opLen3(ssa.OpLessMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.LessMasked", opLen3(ssa.OpLessMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.LessMasked", opLen3(ssa.OpLessMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.LessMasked", opLen3(ssa.OpLessMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.LessMasked", opLen3(ssa.OpLessMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.LessMasked", opLen3(ssa.OpLessMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.LessMasked", opLen3(ssa.OpLessMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.LessMasked", opLen3(ssa.OpLessMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.LessMasked", opLen3(ssa.OpLessMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.LessMasked", opLen3(ssa.OpLessMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.LessMasked", opLen3(ssa.OpLessMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.LessMasked", opLen3(ssa.OpLessMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.LessMasked", opLen3(ssa.OpLessMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.LessMasked", opLen3(ssa.OpLessMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.LessMasked", opLen3(ssa.OpLessMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.LessMasked", opLen3(ssa.OpLessMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.LessMasked", opLen3(ssa.OpLessMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.LessMasked", opLen3(ssa.OpLessMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64)
@@ -812,36 +482,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64)
@@ -872,36 +512,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MinMasked", opLen3(ssa.OpMinMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MinMasked", opLen3(ssa.OpMinMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MinMasked", opLen3(ssa.OpMinMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MinMasked", opLen3(ssa.OpMinMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MinMasked", opLen3(ssa.OpMinMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MinMasked", opLen3(ssa.OpMinMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.MinMasked", opLen3(ssa.OpMinMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.MinMasked", opLen3(ssa.OpMinMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.MinMasked", opLen3(ssa.OpMinMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MinMasked", opLen3(ssa.OpMinMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MinMasked", opLen3(ssa.OpMinMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MinMasked", opLen3(ssa.OpMinMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.MinMasked", opLen3(ssa.OpMinMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.MinMasked", opLen3(ssa.OpMinMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.MinMasked", opLen3(ssa.OpMinMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.MinMasked", opLen3(ssa.OpMinMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.MinMasked", opLen3(ssa.OpMinMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.MinMasked", opLen3(ssa.OpMinMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.MinMasked", opLen3(ssa.OpMinMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.MinMasked", opLen3(ssa.OpMinMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.MinMasked", opLen3(ssa.OpMinMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MinMasked", opLen3(ssa.OpMinMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MinMasked", opLen3(ssa.OpMinMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MinMasked", opLen3(ssa.OpMinMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.MinMasked", opLen3(ssa.OpMinMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.MinMasked", opLen3(ssa.OpMinMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.MinMasked", opLen3(ssa.OpMinMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.MinMasked", opLen3(ssa.OpMinMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.MinMasked", opLen3(ssa.OpMinMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.MinMasked", opLen3(ssa.OpMinMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64)
@@ -932,24 +542,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.MulAdd", opLen3(ssa.OpMulAddFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MulAdd", opLen3(ssa.OpMulAddFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MulAdd", opLen3(ssa.OpMulAddFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
@@ -960,48 +558,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MulMasked", opLen3(ssa.OpMulMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MulMasked", opLen3(ssa.OpMulMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MulMasked", opLen3(ssa.OpMulMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.MulMasked", opLen3(ssa.OpMulMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.MulMasked", opLen3(ssa.OpMulMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.MulMasked", opLen3(ssa.OpMulMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MulMasked", opLen3(ssa.OpMulMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MulMasked", opLen3(ssa.OpMulMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MulMasked", opLen3(ssa.OpMulMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.MulMasked", opLen3(ssa.OpMulMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.MulMasked", opLen3(ssa.OpMulMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.MulMasked", opLen3(ssa.OpMulMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.MulMasked", opLen3(ssa.OpMulMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.MulMasked", opLen3(ssa.OpMulMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.MulMasked", opLen3(ssa.OpMulMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1016,36 +578,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.OnesCount", opLen1(ssa.OpOnesCountInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.OnesCount", opLen1(ssa.OpOnesCountInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.OnesCount", opLen1(ssa.OpOnesCountInt8x64, types.TypeVec512), sys.AMD64)
@@ -1070,30 +602,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.OnesCount", opLen1(ssa.OpOnesCountUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.OnesCount", opLen1(ssa.OpOnesCountUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.OnesCount", opLen1(ssa.OpOnesCountUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
@@ -1118,18 +626,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.OrMasked", opLen3(ssa.OpOrMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.OrMasked", opLen3(ssa.OpOrMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.OrMasked", opLen3(ssa.OpOrMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.OrMasked", opLen3(ssa.OpOrMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.OrMasked", opLen3(ssa.OpOrMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.OrMasked", opLen3(ssa.OpOrMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.OrMasked", opLen3(ssa.OpOrMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.OrMasked", opLen3(ssa.OpOrMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.OrMasked", opLen3(ssa.OpOrMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
@@ -1184,84 +680,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Reciprocal", opLen1(ssa.OpReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Reciprocal", opLen1(ssa.OpReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Reciprocal", opLen1(ssa.OpReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1274,18 +704,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1298,18 +716,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64)
@@ -1322,18 +728,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64)
@@ -1346,18 +740,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1368,36 +750,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
@@ -1484,42 +848,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1556,42 +884,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
@@ -1628,42 +920,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1700,54 +956,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1778,36 +992,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.SubMasked", opLen3(ssa.OpSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.SubMasked", opLen3(ssa.OpSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.SubMasked", opLen3(ssa.OpSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.SubMasked", opLen3(ssa.OpSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.SubMasked", opLen3(ssa.OpSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.SubMasked", opLen3(ssa.OpSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.SubMasked", opLen3(ssa.OpSubMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.SubMasked", opLen3(ssa.OpSubMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.SubMasked", opLen3(ssa.OpSubMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.SubMasked", opLen3(ssa.OpSubMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.SubMasked", opLen3(ssa.OpSubMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.SubMasked", opLen3(ssa.OpSubMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.SubMasked", opLen3(ssa.OpSubMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.SubMasked", opLen3(ssa.OpSubMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.SubMasked", opLen3(ssa.OpSubMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.SubMasked", opLen3(ssa.OpSubMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.SubMasked", opLen3(ssa.OpSubMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.SubMasked", opLen3(ssa.OpSubMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.SubMasked", opLen3(ssa.OpSubMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.SubMasked", opLen3(ssa.OpSubMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.SubMasked", opLen3(ssa.OpSubMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.SubMasked", opLen3(ssa.OpSubMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.SubMasked", opLen3(ssa.OpSubMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.SubMasked", opLen3(ssa.OpSubMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.SubMasked", opLen3(ssa.OpSubMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.SubMasked", opLen3(ssa.OpSubMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.SubMasked", opLen3(ssa.OpSubMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1834,18 +1018,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1856,24 +1028,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)
@@ -1898,18 +1058,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.XorMasked", opLen3(ssa.OpXorMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.XorMasked", opLen3(ssa.OpXorMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.XorMasked", opLen3(ssa.OpXorMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.XorMasked", opLen3(ssa.OpXorMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.XorMasked", opLen3(ssa.OpXorMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.XorMasked", opLen3(ssa.OpXorMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.XorMasked", opLen3(ssa.OpXorMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.XorMasked", opLen3(ssa.OpXorMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.XorMasked", opLen3(ssa.OpXorMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.XorMasked", opLen3(ssa.OpXorMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.XorMasked", opLen3(ssa.OpXorMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.XorMasked", opLen3(ssa.OpXorMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.blend", opLen3(ssa.OpblendInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.blend", opLen3(ssa.OpblendInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.blendMasked", opLen3(ssa.OpblendMaskedInt8x64, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go
index 22decb9d7e6..4044addd8c1 100644
--- a/src/simd/_gen/simdgen/godefs.go
+++ b/src/simd/_gen/simdgen/godefs.go
@@ -11,6 +11,7 @@ import (
 	"slices"
 	"strconv"
 	"strings"
+	"unicode"
 
 	"simd/_gen/unify"
 )
@@ -100,6 +101,11 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
 	o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
 	if isMasked {
 		o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
+		if unicode.IsUpper([]rune(o.Go)[0]) {
+			trueVal := "true"
+			o.NoGenericOps = &trueVal
+			o.NoTypes = &trueVal
+		}
 	}
 
 	o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
diff --git a/src/simd/compare_test.go b/src/simd/compare_test.go
index 7fd20cf5d79..f8526d27e98 100644
--- a/src/simd/compare_test.go
+++ b/src/simd/compare_test.go
@@ -15,44 +15,6 @@ import (
 // from > and =
 var comparisonFixed bool = simd.HasAVX512()
 
-func TestLessMasked(t *testing.T) {
-	if simd.HasAVX512() {
-		testFloat32x4CompareMasked(t, simd.Float32x4.LessMasked, lessSlice[float32])
-		testFloat32x8CompareMasked(t, simd.Float32x8.LessMasked, lessSlice[float32])
-		testFloat64x2CompareMasked(t, simd.Float64x2.LessMasked, lessSlice[float64])
-		testFloat64x4CompareMasked(t, simd.Float64x4.LessMasked, lessSlice[float64])
-
-		testInt16x16CompareMasked(t, simd.Int16x16.LessMasked, lessSlice[int16])
-		testInt16x8CompareMasked(t, simd.Int16x8.LessMasked, lessSlice[int16])
-		testInt32x4CompareMasked(t, simd.Int32x4.LessMasked, lessSlice[int32])
-		testInt32x8CompareMasked(t, simd.Int32x8.LessMasked, lessSlice[int32])
-		testInt64x2CompareMasked(t, simd.Int64x2.LessMasked, lessSlice[int64])
-		testInt64x4CompareMasked(t, simd.Int64x4.LessMasked, lessSlice[int64])
-		testInt8x16CompareMasked(t, simd.Int8x16.LessMasked, lessSlice[int8])
-		testInt8x32CompareMasked(t, simd.Int8x32.LessMasked, lessSlice[int8])
-
-		testUint16x16CompareMasked(t, simd.Uint16x16.LessMasked, lessSlice[uint16])
-		testUint16x8CompareMasked(t, simd.Uint16x8.LessMasked, lessSlice[uint16])
-		testUint32x4CompareMasked(t, simd.Uint32x4.LessMasked, lessSlice[uint32])
-		testUint32x8CompareMasked(t, simd.Uint32x8.LessMasked, lessSlice[uint32])
-		testUint64x2CompareMasked(t, simd.Uint64x2.LessMasked, lessSlice[uint64])
-		testUint64x4CompareMasked(t, simd.Uint64x4.LessMasked, lessSlice[uint64])
-		testUint8x16CompareMasked(t, simd.Uint8x16.LessMasked, lessSlice[uint8])
-		testUint8x32CompareMasked(t, simd.Uint8x32.LessMasked, lessSlice[uint8])
-
-		testFloat32x16CompareMasked(t, simd.Float32x16.LessMasked, lessSlice[float32])
-		testFloat64x8CompareMasked(t, simd.Float64x8.LessMasked, lessSlice[float64])
-		testInt8x64CompareMasked(t, simd.Int8x64.LessMasked, lessSlice[int8])
-		testInt16x32CompareMasked(t, simd.Int16x32.LessMasked, lessSlice[int16])
-		testInt32x16CompareMasked(t, simd.Int32x16.LessMasked, lessSlice[int32])
-		testInt64x8CompareMasked(t, simd.Int64x8.LessMasked, lessSlice[int64])
-		testUint8x64CompareMasked(t, simd.Uint8x64.LessMasked, lessSlice[uint8])
-		testUint16x32CompareMasked(t, simd.Uint16x32.LessMasked, lessSlice[uint16])
-		testUint32x16CompareMasked(t, simd.Uint32x16.LessMasked, lessSlice[uint32])
-		testUint64x8CompareMasked(t, simd.Uint64x8.LessMasked, lessSlice[uint64])
-	}
-}
-
 func TestLess(t *testing.T) {
 	testFloat32x4Compare(t, simd.Float32x4.Less, lessSlice[float32])
 	testFloat32x8Compare(t, simd.Float32x8.Less, lessSlice[float32])
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index d6fcd065bbb..76bbf738cb1 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -66,92 +66,6 @@ func (x Int64x4) Abs() Int64x4
 // Asm: VPABSQ, CPU Feature: AVX512
 func (x Int64x8) Abs() Int64x8
 
-/* AbsMasked */
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x16) AbsMasked(mask Mask8x16) Int8x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x32) AbsMasked(mask Mask8x32) Int8x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x64) AbsMasked(mask Mask8x64) Int8x64
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x8) AbsMasked(mask Mask16x8) Int16x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x16) AbsMasked(mask Mask16x16) Int16x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x32) AbsMasked(mask Mask16x32) Int16x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x4) AbsMasked(mask Mask32x4) Int32x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x8) AbsMasked(mask Mask32x8) Int32x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x16) AbsMasked(mask Mask32x16) Int32x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x2) AbsMasked(mask Mask64x2) Int64x2
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x4) AbsMasked(mask Mask64x4) Int64x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x8) AbsMasked(mask Mask64x8) Int64x8
-
 /* Add */
 
 // Add adds corresponding elements of two vectors.
@@ -321,29 +235,6 @@ func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8
 // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16
 
-/* AddDotProdPairsSaturatedMasked */
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) AddDotProdPairsSaturatedMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) AddDotProdPairsSaturatedMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProdPairsSaturatedMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
-
 /* AddDotProdQuadruple */
 
 // AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
@@ -361,29 +252,6 @@ func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleMasked */
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
 /* AddDotProdQuadrupleSaturated */
 
 // AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
@@ -401,241 +269,6 @@ func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleSaturatedMasked */
-
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleSaturatedMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleSaturatedMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleSaturatedMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
-/* AddMasked */
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* AddPairs */
 
 // AddPairs horizontally adds adjacent pairs of elements.
@@ -786,92 +419,6 @@ func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
 // Asm: VPADDUSW, CPU Feature: AVX512
 func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
 
-/* AddSaturatedMasked */
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x16) AddSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x32) AddSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x64) AddSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x8) AddSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x16) AddSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x32) AddSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x16) AddSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x32) AddSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x64) AddSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x8) AddSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x16) AddSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x32) AddSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* AddSub */
 
 // AddSub subtracts even elements and adds odd elements of two vectors.
@@ -1016,92 +563,6 @@ func (x Uint64x4) And(y Uint64x4) Uint64x4
 // Asm: VPANDQ, CPU Feature: AVX512
 func (x Uint64x8) And(y Uint64x8) Uint64x8
 
-/* AndMasked */
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* AndNot */
 
 // AndNot performs a bitwise x &^ y.
@@ -1224,92 +685,6 @@ func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
 // Asm: VPANDNQ, CPU Feature: AVX512
 func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
 
-/* AndNotMasked */
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Average */
 
 // Average computes the rounded average of corresponding elements.
@@ -1342,50 +717,6 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
 // Asm: VPAVGW, CPU Feature: AVX512
 func (x Uint16x32) Average(y Uint16x32) Uint16x32
 
-/* AverageMasked */
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* Broadcast128 */
 
 // Broadcast128 copies element zero of its (128-bit) input to all elements of
@@ -1448,88 +779,6 @@ func (x Uint32x4) Broadcast128() Uint32x4
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
 func (x Uint64x2) Broadcast128() Uint64x2
 
-/* Broadcast128Masked */
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
-
 /* Broadcast256 */
 
 // Broadcast256 copies element zero of its (128-bit) input to all elements of
@@ -1592,88 +841,6 @@ func (x Uint32x4) Broadcast256() Uint32x8
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
 func (x Uint64x2) Broadcast256() Uint64x4
 
-/* Broadcast256Masked */
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
-
 /* Broadcast512 */
 
 // Broadcast512 copies element zero of its (128-bit) input to all elements of
@@ -1736,88 +903,6 @@ func (x Uint32x4) Broadcast512() Uint32x16
 // Asm: VPBROADCASTQ, CPU Feature: AVX512
 func (x Uint64x2) Broadcast512() Uint64x8
 
-/* Broadcast512Masked */
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
-
 /* Ceil */
 
 // Ceil rounds elements up to the nearest integer.
@@ -1884,62 +969,6 @@ func (x Float64x4) CeilScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) CeilScaled(prec uint8) Float64x8
 
-/* CeilScaledMasked */
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* CeilScaledResidue */
 
 // CeilScaledResidue computes the difference after ceiling with specified precision.
@@ -1984,62 +1013,6 @@ func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
 
-/* CeilScaledResidueMasked */
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Compress */
 
 // Compress performs a compression on vector x using mask by
@@ -2239,29 +1212,6 @@ func (x Float32x8) ConvertToInt32() Int32x8
 // Asm: VCVTTPS2DQ, CPU Feature: AVX512
 func (x Float32x16) ConvertToInt32() Int32x16
 
-/* ConvertToInt32Masked */
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToInt32Masked(mask Mask32x4) Int32x4
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToInt32Masked(mask Mask32x8) Int32x8
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToInt32Masked(mask Mask32x16) Int32x16
-
 /* ConvertToUint32 */
 
 // ConvertToUint32Masked converts element values to uint32.
@@ -2279,29 +1229,6 @@ func (x Float32x8) ConvertToUint32() Uint32x8
 // Asm: VCVTPS2UDQ, CPU Feature: AVX512
 func (x Float32x16) ConvertToUint32() Uint32x16
 
-/* ConvertToUint32Masked */
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToUint32Masked(mask Mask32x4) Uint32x4
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToUint32Masked(mask Mask32x8) Uint32x8
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
-
 /* CopySign */
 
 // CopySign returns the product of the first operand with -1, 0, or 1,
@@ -2372,50 +1299,6 @@ func (x Float64x4) Div(y Float64x4) Float64x4
 // Asm: VDIVPD, CPU Feature: AVX512
 func (x Float64x8) Div(y Float64x8) Float64x8
 
-/* DivMasked */
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
-
 /* DotProdPairs */
 
 // DotProdPairs multiplies the elements and add the pairs together,
@@ -2436,32 +1319,6 @@ func (x Int16x16) DotProdPairs(y Int16x16) Int32x8
 // Asm: VPMADDWD, CPU Feature: AVX512
 func (x Int16x32) DotProdPairs(y Int16x32) Int32x16
 
-/* DotProdPairsMasked */
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x8) DotProdPairsMasked(y Int16x8, mask Mask16x8) Int32x4
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x16) DotProdPairsMasked(y Int16x16, mask Mask16x16) Int32x8
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x32) DotProdPairsMasked(y Int16x32, mask Mask16x32) Int32x16
-
 /* DotProdPairsSaturated */
 
 // DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
@@ -2482,32 +1339,6 @@ func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16
 // Asm: VPMADDUBSW, CPU Feature: AVX512
 func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32
 
-/* DotProdPairsSaturatedMasked */
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x16) DotProdPairsSaturatedMasked(y Int8x16, mask Mask16x8) Int16x8
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x32) DotProdPairsSaturatedMasked(y Int8x32, mask Mask16x16) Int16x16
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x64) DotProdPairsSaturatedMasked(y Int8x64, mask Mask16x32) Int16x32
-
 /* Equal */
 
 // Equal compares for equality.
@@ -2660,218 +1491,6 @@ func (x Float64x4) Equal(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Equal(y Float64x8) Mask64x8
 
-/* EqualMasked */
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* Expand */
 
 // Expand performs an expansion on a vector x whose elements are packed to lower parts.
@@ -3120,62 +1739,6 @@ func (x Float64x4) FloorScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) FloorScaled(prec uint8) Float64x8
 
-/* FloorScaledMasked */
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* FloorScaledResidue */
 
 // FloorScaledResidue computes the difference after flooring with specified precision.
@@ -3220,62 +1783,6 @@ func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
 
-/* FloorScaledResidueMasked */
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* GaloisFieldAffineTransform */
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
@@ -3343,85 +1850,6 @@ func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x3
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
 
-/* GaloisFieldAffineTransformInverseMasked */
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
-/* GaloisFieldAffineTransformMasked */
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
 /* GaloisFieldMul */
 
 // GaloisFieldMul computes element-wise GF(2^8) multiplication with
@@ -3442,32 +1870,6 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 
-/* GaloisFieldMulMasked */
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
 /* GetElem */
 
 // GetElem retrieves a single constant-indexed element's value.
@@ -3928,430 +2330,6 @@ func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
 
-/* GreaterEqualMasked */
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
-/* GreaterMasked */
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* IsNan */
 
 // IsNan checks if elements are NaN. Use as x.IsNan(x).
@@ -4384,50 +2362,6 @@ func (x Float64x4) IsNan(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) IsNan(y Float64x8) Mask64x8
 
-/* IsNanMasked */
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
-
 /* Less */
 
 // Less compares for less than.
@@ -4572,430 +2506,6 @@ func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
 
-/* LessEqualMasked */
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
-/* LessMasked */
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* Max */
 
 // Max computes the maximum of corresponding elements.
@@ -5148,218 +2658,6 @@ func (x Uint64x4) Max(y Uint64x4) Uint64x4
 // Asm: VPMAXUQ, CPU Feature: AVX512
 func (x Uint64x8) Max(y Uint64x8) Uint64x8
 
-/* MaxMasked */
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Min */
 
 // Min computes the minimum of corresponding elements.
@@ -5512,218 +2810,6 @@ func (x Uint64x4) Min(y Uint64x4) Uint64x4
 // Asm: VPMINUQ, CPU Feature: AVX512
 func (x Uint64x8) Min(y Uint64x8) Uint64x8
 
-/* MinMasked */
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Mul */
 
 // Mul multiplies corresponding elements of two vectors.
@@ -5878,50 +2964,6 @@ func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMADD213PD, CPU Feature: AVX512
 func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
 
-/* MulAddMasked */
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
 /* MulAddSub */
 
 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
@@ -5954,50 +2996,6 @@ func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMADDSUB213PD, CPU Feature: AVX512
 func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
 
-/* MulAddSubMasked */
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
 /* MulEvenWiden */
 
 // MulEvenWiden multiplies even-indexed elements, widening the result.
@@ -6056,220 +3054,6 @@ func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
 // Asm: VPMULHUW, CPU Feature: AVX512
 func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
 
-/* MulHighMasked */
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-/* MulMasked */
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x8) MulMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x16) MulMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x32) MulMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x4) MulMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x8) MulMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x16) MulMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x2) MulMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x4) MulMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x8) MulMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x8) MulMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x16) MulMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x32) MulMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x4) MulMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x8) MulMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x16) MulMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x2) MulMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x4) MulMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x8) MulMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* MulSubAdd */
 
 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
@@ -6302,50 +3086,6 @@ func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMSUBADD213PD, CPU Feature: AVX512
 func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
 
-/* MulSubAddMasked */
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulSubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulSubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulSubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulSubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulSubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulSubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
 /* NotEqual */
 
 // NotEqual compares for inequality.
@@ -6418,218 +3158,6 @@ func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
 
-/* NotEqualMasked */
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* OnesCount */
 
 // OnesCount counts the number of set bits in each element.
@@ -6752,176 +3280,6 @@ func (x Uint64x4) OnesCount() Uint64x4
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x8) OnesCount() Uint64x8
 
-/* OnesCountMasked */
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) OnesCountMasked(mask Mask8x16) Int8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) OnesCountMasked(mask Mask8x32) Int8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) OnesCountMasked(mask Mask8x64) Int8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) OnesCountMasked(mask Mask16x8) Int16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) OnesCountMasked(mask Mask16x16) Int16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) OnesCountMasked(mask Mask16x32) Int16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) OnesCountMasked(mask Mask32x4) Int32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) OnesCountMasked(mask Mask32x8) Int32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) OnesCountMasked(mask Mask32x16) Int32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) OnesCountMasked(mask Mask64x2) Int64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) OnesCountMasked(mask Mask64x4) Int64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) OnesCountMasked(mask Mask64x8) Int64x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) OnesCountMasked(mask Mask8x16) Uint8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) OnesCountMasked(mask Mask8x32) Uint8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) OnesCountMasked(mask Mask8x64) Uint8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) OnesCountMasked(mask Mask16x8) Uint16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) OnesCountMasked(mask Mask16x16) Uint16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) OnesCountMasked(mask Mask16x32) Uint16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) OnesCountMasked(mask Mask32x4) Uint32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) OnesCountMasked(mask Mask32x8) Uint32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) OnesCountMasked(mask Mask32x16) Uint32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) OnesCountMasked(mask Mask64x2) Uint64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) OnesCountMasked(mask Mask64x4) Uint64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) OnesCountMasked(mask Mask64x8) Uint64x8
-
 /* Or */
 
 // Or performs a bitwise OR operation between two vectors.
@@ -7044,92 +3402,6 @@ func (x Uint64x4) Or(y Uint64x4) Uint64x4
 // Asm: VPORQ, CPU Feature: AVX512
 func (x Uint64x8) Or(y Uint64x8) Uint64x8
 
-/* OrMasked */
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Permute */
 
 // Permute performs a full permutation of vector x using indices:
@@ -7542,526 +3814,6 @@ func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
 // Asm: VPERMI2Q, CPU Feature: AVX512
 func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
 
-/* Permute2Masked */
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
-
-/* PermuteMasked */
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
-
 /* Reciprocal */
 
 // Reciprocal computes an approximate reciprocal of each element.
@@ -8094,50 +3846,6 @@ func (x Float64x4) Reciprocal() Float64x4
 // Asm: VRCP14PD, CPU Feature: AVX512
 func (x Float64x8) Reciprocal() Float64x8
 
-/* ReciprocalMasked */
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalMasked(mask Mask32x4) Float32x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalMasked(mask Mask32x8) Float32x8
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalMasked(mask Mask32x16) Float32x16
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalMasked(mask Mask64x2) Float64x2
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalMasked(mask Mask64x4) Float64x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalMasked(mask Mask64x8) Float64x8
-
 /* ReciprocalSqrt */
 
 // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
@@ -8170,50 +3878,6 @@ func (x Float64x4) ReciprocalSqrt() Float64x4
 // Asm: VRSQRT14PD, CPU Feature: AVX512
 func (x Float64x8) ReciprocalSqrt() Float64x8
 
-/* ReciprocalSqrtMasked */
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalSqrtMasked(mask Mask32x4) Float32x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalSqrtMasked(mask Mask32x8) Float32x8
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalSqrtMasked(mask Mask32x16) Float32x16
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalSqrtMasked(mask Mask64x2) Float64x2
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalSqrtMasked(mask Mask64x4) Float64x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalSqrtMasked(mask Mask64x8) Float64x8
-
 /* RotateAllLeft */
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
@@ -8300,116 +3964,6 @@ func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
 // Asm: VPROLQ, CPU Feature: AVX512
 func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
 
-/* RotateAllLeftMasked */
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8
-
 /* RotateAllRight */
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
@@ -8496,116 +4050,6 @@ func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
 // Asm: VPRORQ, CPU Feature: AVX512
 func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
 
-/* RotateAllRightMasked */
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8
-
 /* RotateLeft */
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
@@ -8668,92 +4112,6 @@ func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
 // Asm: VPROLVQ, CPU Feature: AVX512
 func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
 
-/* RotateLeftMasked */
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* RotateRight */
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
@@ -8816,92 +4174,6 @@ func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
 // Asm: VPRORVQ, CPU Feature: AVX512
 func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
 
-/* RotateRightMasked */
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* RoundToEven */
 
 // RoundToEven rounds elements to the nearest integer.
@@ -8968,62 +4240,6 @@ func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
 
-/* RoundToEvenScaledMasked */
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* RoundToEvenScaledResidue */
 
 // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
@@ -9068,62 +4284,6 @@ func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
 
-/* RoundToEvenScaledResidueMasked */
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Scale */
 
 // Scale multiplies elements by a power of 2.
@@ -9156,50 +4316,6 @@ func (x Float64x4) Scale(y Float64x4) Float64x4
 // Asm: VSCALEFPD, CPU Feature: AVX512
 func (x Float64x8) Scale(y Float64x8) Float64x8
 
-/* ScaleMasked */
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x4) ScaleMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x8) ScaleMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x16) ScaleMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x2) ScaleMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
-
 /* SetElem */
 
 // SetElem sets a single constant-indexed element's value.
@@ -9714,316 +4830,6 @@ func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
 
-/* ShiftAllLeftConcatMasked */
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllLeftMasked */
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
-
 /* ShiftAllRight */
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
@@ -10262,316 +5068,6 @@ func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
 
-/* ShiftAllRightConcatMasked */
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllRightMasked */
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
-
 /* ShiftLeft */
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@@ -10774,280 +5270,6 @@ func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
 
-/* ShiftLeftConcatMasked */
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftLeftMasked */
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* ShiftRight */
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
@@ -11250,280 +5472,6 @@ func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
 
-/* ShiftRightConcatMasked */
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftRightMasked */
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Sqrt */
 
 // Sqrt computes the square root of each element.
@@ -11556,50 +5504,6 @@ func (x Float64x4) Sqrt() Float64x4
 // Asm: VSQRTPD, CPU Feature: AVX512
 func (x Float64x8) Sqrt() Float64x8
 
-/* SqrtMasked */
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
-
 /* Sub */
 
 // Sub subtracts corresponding elements of two vectors.
@@ -11752,218 +5656,6 @@ func (x Uint64x4) Sub(y Uint64x4) Uint64x4
 // Asm: VPSUBQ, CPU Feature: AVX512
 func (x Uint64x8) Sub(y Uint64x8) Uint64x8
 
-/* SubMasked */
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* SubPairs */
 
 // SubPairs horizontally subtracts adjacent pairs of elements.
@@ -12114,92 +5806,6 @@ func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
 // Asm: VPSUBUSW, CPU Feature: AVX512
 func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
 
-/* SubSaturatedMasked */
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x16) SubSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x32) SubSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x64) SubSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x8) SubSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x16) SubSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x32) SubSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x16) SubSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x32) SubSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x64) SubSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x8) SubSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x16) SubSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x32) SubSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* Trunc */
 
 // Trunc truncates elements towards zero.
@@ -12266,62 +5872,6 @@ func (x Float64x4) TruncScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) TruncScaled(prec uint8) Float64x8
 
-/* TruncScaledMasked */
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* TruncScaledResidue */
 
 // TruncScaledResidue computes the difference after truncating with specified precision.
@@ -12366,62 +5916,6 @@ func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
 
-/* TruncScaledResidueMasked */
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Xor */
 
 // Xor performs a bitwise XOR operation between two vectors.
@@ -12544,92 +6038,6 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4
 // Asm: VPXORQ, CPU Feature: AVX512
 func (x Uint64x8) Xor(y Uint64x8) Uint64x8
 
-/* XorMasked */
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* blend */
 
 // blend blends two vectors based on mask values, choosing either
diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go
index 3faeeaccfde..c88fe4b9fef 100644
--- a/src/simd/simd_test.go
+++ b/src/simd/simd_test.go
@@ -43,7 +43,7 @@ func TestType(t *testing.T) {
 		return
 	}
 	v.z = maskT(simd.Mask32x4FromBits(0b0011))
-	*v.y = v.y.AddMasked(v.x, simd.Mask32x4(v.z))
+	*v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
 
 	got := [4]int32{}
 	v.y.Store(&got)
@@ -121,7 +121,7 @@ func TestMaskConversion(t *testing.T) {
 	}
 	x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
 	mask := simd.Int32x4{}.Sub(x).ToMask()
-	y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).AddMasked(x, mask)
+	y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
 	want := [4]int32{6, 0, 10, 0}
 	got := make([]int32, 4)
 	y.StoreSlice(got)
@@ -327,7 +327,7 @@ func TestBitMaskLoad(t *testing.T) {
 	results := [2]int64{}
 	want := [2]int64{0, 6}
 	m := simd.LoadMask64x2FromBits(&bits)
-	simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+	simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
 	for i := range 2 {
 		if results[i] != want[i] {
 			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
@@ -359,7 +359,7 @@ func TestBitMaskFromBits(t *testing.T) {
 	results := [2]int64{}
 	want := [2]int64{0, 6}
 	m := simd.Mask64x2FromBits(0b10)
-	simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+	simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
 	for i := range 2 {
 		if results[i] != want[i] {
 			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])