[dev.simd] cmd/compile, simd: add Compress

This CL is generated by CL 687975. Change-Id: I21707d108773cc6d8e6f07aaed60e756faa1e6cb Reviewed-on: https://go-review.googlesource.com/c/go/+/687995 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
2026-02-06 18:00:01 +00:00 · 2025-07-14 20:29:46 +00:00 · 2025-07-14 20:29:46 +00:00 · 6d10680141
commit 6d10680141
parent 17baae72db
10 changed files with 2177 additions and 221 deletions
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@ -600,6 +600,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VRSQRT14PDMasked128,
 		ssa.OpAMD64VRSQRT14PDMasked256,
 		ssa.OpAMD64VRSQRT14PDMasked512,
+		ssa.OpAMD64VCOMPRESSPSMasked128,
+		ssa.OpAMD64VCOMPRESSPSMasked256,
+		ssa.OpAMD64VCOMPRESSPSMasked512,
+		ssa.OpAMD64VCOMPRESSPDMasked128,
+		ssa.OpAMD64VCOMPRESSPDMasked256,
+		ssa.OpAMD64VCOMPRESSPDMasked512,
+		ssa.OpAMD64VPCOMPRESSBMasked128,
+		ssa.OpAMD64VPCOMPRESSBMasked256,
+		ssa.OpAMD64VPCOMPRESSBMasked512,
+		ssa.OpAMD64VPCOMPRESSWMasked128,
+		ssa.OpAMD64VPCOMPRESSWMasked256,
+		ssa.OpAMD64VPCOMPRESSWMasked512,
+		ssa.OpAMD64VPCOMPRESSDMasked128,
+		ssa.OpAMD64VPCOMPRESSDMasked256,
+		ssa.OpAMD64VPCOMPRESSDMasked512,
+		ssa.OpAMD64VPCOMPRESSQMasked128,
+		ssa.OpAMD64VPCOMPRESSQMasked256,
+		ssa.OpAMD64VPCOMPRESSQMasked512,
 		ssa.OpAMD64VPOPCNTBMasked128,
 		ssa.OpAMD64VPOPCNTBMasked256,
 		ssa.OpAMD64VPOPCNTBMasked512,
@ -1078,6 +1096,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VRNDSCALEPDMasked128,
 		ssa.OpAMD64VRNDSCALEPDMasked256,
 		ssa.OpAMD64VRNDSCALEPDMasked512,
+		ssa.OpAMD64VCOMPRESSPSMasked128,
+		ssa.OpAMD64VCOMPRESSPSMasked256,
+		ssa.OpAMD64VCOMPRESSPSMasked512,
+		ssa.OpAMD64VCOMPRESSPDMasked128,
+		ssa.OpAMD64VCOMPRESSPDMasked256,
+		ssa.OpAMD64VCOMPRESSPDMasked512,
+		ssa.OpAMD64VPCOMPRESSBMasked128,
+		ssa.OpAMD64VPCOMPRESSBMasked256,
+		ssa.OpAMD64VPCOMPRESSBMasked512,
+		ssa.OpAMD64VPCOMPRESSWMasked128,
+		ssa.OpAMD64VPCOMPRESSWMasked256,
+		ssa.OpAMD64VPCOMPRESSWMasked512,
+		ssa.OpAMD64VPCOMPRESSDMasked128,
+		ssa.OpAMD64VPCOMPRESSDMasked256,
+		ssa.OpAMD64VPCOMPRESSDMasked512,
+		ssa.OpAMD64VPCOMPRESSQMasked128,
+		ssa.OpAMD64VPCOMPRESSQMasked256,
+		ssa.OpAMD64VPCOMPRESSQMasked512,
 		ssa.OpAMD64VREDUCEPSMasked128,
 		ssa.OpAMD64VREDUCEPSMasked256,
 		ssa.OpAMD64VREDUCEPSMasked512,
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@ -204,6 +204,36 @@
 (CeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (CeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (CeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(CompressFloat64x2 x mask) => (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(CompressFloat64x4 x mask) => (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(CompressFloat64x8 x mask) => (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(CompressInt8x16 x mask) => (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(CompressInt8x32 x mask) => (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(CompressInt8x64 x mask) => (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(CompressInt16x8 x mask) => (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(CompressInt16x16 x mask) => (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(CompressInt16x32 x mask) => (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(CompressInt32x4 x mask) => (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(CompressInt32x8 x mask) => (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(CompressInt32x16 x mask) => (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(CompressInt64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(CompressInt64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(CompressInt64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(CompressUint8x16 x mask) => (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(CompressUint8x32 x mask) => (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(CompressUint8x64 x mask) => (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(CompressUint16x8 x mask) => (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(CompressUint16x16 x mask) => (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(CompressUint16x32 x mask) => (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(CompressUint32x4 x mask) => (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(CompressUint32x8 x mask) => (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(CompressUint32x16 x mask) => (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
 (DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
 (DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@ -9,6 +9,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VRCP14PSMasked512", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VRSQRT14PS512", argLength: 1, reg: w11, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VRSQRT14PSMasked512", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VCOMPRESSPSMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VDIVPS512", argLength: 2, reg: w21, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VDIVPSMasked512", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VFMADD213PS512", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true},
@ -36,6 +37,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VRCP14PSMasked128", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VRSQRTPS128", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VRSQRT14PSMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VCOMPRESSPSMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VDIVPS128", argLength: 2, reg: v21, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VDIVPSMasked128", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VFMADD213PS128", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
@ -65,6 +67,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VRCP14PSMasked256", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VRSQRTPS256", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VRSQRT14PSMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VCOMPRESSPSMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VDIVPS256", argLength: 2, reg: v21, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VDIVPSMasked256", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VFMADD213PS256", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
@ -94,6 +97,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VRCP14PDMasked128", argLength: 2, reg: wkw, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VRSQRT14PD128", argLength: 1, reg: w11, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VRSQRT14PDMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VCOMPRESSPDMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VDIVPD128", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VDIVPDMasked128", argLength: 3, reg: w2kw, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
@ -123,6 +127,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VRCP14PDMasked256", argLength: 2, reg: wkw, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VRSQRT14PD256", argLength: 1, reg: w11, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VRSQRT14PDMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VCOMPRESSPDMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VDIVPD256", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VDIVPDMasked256", argLength: 3, reg: w2kw, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
@ -151,6 +156,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VRCP14PDMasked512", argLength: 2, reg: wkw, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VRSQRT14PD512", argLength: 1, reg: w11, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VRSQRT14PDMasked512", argLength: 2, reg: wkw, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VCOMPRESSPDMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VDIVPD512", argLength: 2, reg: w21, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VDIVPDMasked512", argLength: 3, reg: w2kw, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
@ -175,6 +181,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPABSWMasked256", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPADDW256", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPADDWMasked256", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPEQW256", argLength: 2, reg: v21, asm: "VPCMPEQW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPGTW256", argLength: 2, reg: v21, asm: "VPCMPGTW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXSW256", argLength: 2, reg: v21, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false},
@ -216,6 +223,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPABSWMasked512", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDWMasked512", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSW512", argLength: 2, reg: w21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSWMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINSW512", argLength: 2, reg: w21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
@ -250,6 +258,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPABSWMasked128", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPADDW128", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPADDWMasked128", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPEQW128", argLength: 2, reg: v21, asm: "VPCMPEQW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPGTW128", argLength: 2, reg: v21, asm: "VPCMPGTW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXSW128", argLength: 2, reg: v21, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false},
@ -295,6 +304,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPANDDMasked512", argLength: 3, reg: w2kw, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPANDND512", argLength: 2, reg: w21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPANDNDMasked512", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPCOMPRESSDMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSD512", argLength: 2, reg: w21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINSD512", argLength: 2, reg: w21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
@ -339,6 +349,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDDMasked128", argLength: 3, reg: w2kw, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPANDDMasked128", argLength: 3, reg: w2kw, asm: "VPANDD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPANDNDMasked128", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPCOMPRESSDMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPEQD128", argLength: 2, reg: v21, asm: "VPCMPEQD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPGTD128", argLength: 2, reg: v21, asm: "VPCMPGTD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXSD128", argLength: 2, reg: v21, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false},
@ -387,6 +398,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDDMasked256", argLength: 3, reg: w2kw, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPANDDMasked256", argLength: 3, reg: w2kw, asm: "VPANDD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPANDNDMasked256", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPCOMPRESSDMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPEQD256", argLength: 2, reg: v21, asm: "VPCMPEQD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPGTD256", argLength: 2, reg: v21, asm: "VPCMPGTD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXSD256", argLength: 2, reg: v21, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false},
@ -435,6 +447,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDQMasked128", argLength: 3, reg: w2kw, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPANDQMasked128", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPANDNQMasked128", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPCOMPRESSQMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPEQQ128", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPGTQ128", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXSQ128", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false},
@ -472,6 +485,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDQMasked256", argLength: 3, reg: w2kw, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPANDQMasked256", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPANDNQMasked256", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPCOMPRESSQMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPEQQ256", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPGTQ256", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXSQ256", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false},
@ -511,6 +525,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPANDQMasked512", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPANDNQ512", argLength: 2, reg: w21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPANDNQMasked512", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPCOMPRESSQMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSQ512", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINSQ512", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false},
@ -549,6 +564,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDBMasked128", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPAND128", argLength: 2, reg: v21, asm: "VPAND", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPANDN128", argLength: 2, reg: v21, asm: "VPANDN", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPCOMPRESSBMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPEQB128", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPGTB128", argLength: 2, reg: v21, asm: "VPCMPGTB", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXSB128", argLength: 2, reg: v21, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false},
@ -572,6 +588,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDBMasked256", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPAND256", argLength: 2, reg: v21, asm: "VPAND", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPANDN256", argLength: 2, reg: v21, asm: "VPANDN", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPCOMPRESSBMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPEQB256", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPGTB256", argLength: 2, reg: v21, asm: "VPCMPGTB", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXSB256", argLength: 2, reg: v21, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false},
@ -593,6 +610,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPABSBMasked512", argLength: 2, reg: wkw, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDBMasked512", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPCOMPRESSBMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSB512", argLength: 2, reg: w21, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSBMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINSB512", argLength: 2, reg: w21, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false},
@ -657,12 +675,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMAXUDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUD512", argLength: 2, reg: w21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUDMasked512", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
-		{name: "VPERMPS512", argLength: 2, reg: w21, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPERMD512", argLength: 2, reg: w21, asm: "VPERMD", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPERMI2D512", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPERMPS512", argLength: 2, reg: w21, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPERMI2PS512", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPERMI2DMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPERMI2D512", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPERMI2PSMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPERMI2DMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPERMPSMasked512", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPERMDMasked512", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRLD512", argLength: 2, reg: wfpw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
@ -687,12 +705,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
-		{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPERMI2PS256", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPERMPSMasked256", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPERMDMasked256", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
@ -706,8 +724,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPERMI2PD128", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPERMI2Q128", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPERMI2QMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPERMI2PDMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPERMI2QMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
@ -719,12 +737,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPERMPD256", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPERMI2PDMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPERMI2QMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPERMPDMasked256", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPERMQMasked256", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPERMPDMasked256", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
@ -741,8 +759,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPERMI2PD512", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPERMI2QMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPERMI2PDMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@ -9,6 +9,7 @@ func simdGenericOps() []opData {
 		{name: "ApproximateReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtFloat32x16", argLength: 1, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtMaskedFloat32x16", argLength: 2, commutative: false},
+		{name: "CompressFloat32x16", argLength: 2, commutative: false},
 		{name: "DivFloat32x16", argLength: 2, commutative: false},
 		{name: "DivMaskedFloat32x16", argLength: 3, commutative: false},
 		{name: "EqualFloat32x16", argLength: 2, commutative: true},
@ -51,6 +52,7 @@ func simdGenericOps() []opData {
 		{name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtMaskedFloat32x4", argLength: 2, commutative: false},
 		{name: "CeilFloat32x4", argLength: 1, commutative: false},
+		{name: "CompressFloat32x4", argLength: 2, commutative: false},
 		{name: "DivFloat32x4", argLength: 2, commutative: false},
 		{name: "DivMaskedFloat32x4", argLength: 3, commutative: false},
 		{name: "DotProdBroadcastFloat32x4", argLength: 2, commutative: true},
@ -99,6 +101,7 @@ func simdGenericOps() []opData {
 		{name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtMaskedFloat32x8", argLength: 2, commutative: false},
 		{name: "CeilFloat32x8", argLength: 1, commutative: false},
+		{name: "CompressFloat32x8", argLength: 2, commutative: false},
 		{name: "DivFloat32x8", argLength: 2, commutative: false},
 		{name: "DivMaskedFloat32x8", argLength: 3, commutative: false},
 		{name: "DotProdBroadcastFloat32x8", argLength: 2, commutative: true},
@ -147,6 +150,7 @@ func simdGenericOps() []opData {
 		{name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtMaskedFloat64x2", argLength: 2, commutative: false},
 		{name: "CeilFloat64x2", argLength: 1, commutative: false},
+		{name: "CompressFloat64x2", argLength: 2, commutative: false},
 		{name: "DivFloat64x2", argLength: 2, commutative: false},
 		{name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
 		{name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true},
@ -195,6 +199,7 @@ func simdGenericOps() []opData {
 		{name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtMaskedFloat64x4", argLength: 2, commutative: false},
 		{name: "CeilFloat64x4", argLength: 1, commutative: false},
+		{name: "CompressFloat64x4", argLength: 2, commutative: false},
 		{name: "DivFloat64x4", argLength: 2, commutative: false},
 		{name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
 		{name: "EqualFloat64x4", argLength: 2, commutative: true},
@ -240,6 +245,7 @@ func simdGenericOps() []opData {
 		{name: "ApproximateReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtFloat64x8", argLength: 1, commutative: false},
 		{name: "ApproximateReciprocalOfSqrtMaskedFloat64x8", argLength: 2, commutative: false},
+		{name: "CompressFloat64x8", argLength: 2, commutative: false},
 		{name: "DivFloat64x8", argLength: 2, commutative: false},
 		{name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
 		{name: "EqualFloat64x8", argLength: 2, commutative: true},
@ -280,6 +286,7 @@ func simdGenericOps() []opData {
 		{name: "AddMaskedInt16x16", argLength: 3, commutative: true},
 		{name: "AndInt16x16", argLength: 2, commutative: true},
 		{name: "AndNotInt16x16", argLength: 2, commutative: false},
+		{name: "CompressInt16x16", argLength: 2, commutative: false},
 		{name: "EqualInt16x16", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt16x16", argLength: 3, commutative: true},
 		{name: "GreaterInt16x16", argLength: 2, commutative: false},
@ -333,6 +340,7 @@ func simdGenericOps() []opData {
 		{name: "AbsoluteMaskedInt16x32", argLength: 2, commutative: false},
 		{name: "AddInt16x32", argLength: 2, commutative: true},
 		{name: "AddMaskedInt16x32", argLength: 3, commutative: true},
+		{name: "CompressInt16x32", argLength: 2, commutative: false},
 		{name: "EqualInt16x32", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt16x32", argLength: 3, commutative: true},
 		{name: "GreaterInt16x32", argLength: 2, commutative: false},
@ -381,6 +389,7 @@ func simdGenericOps() []opData {
 		{name: "AddMaskedInt16x8", argLength: 3, commutative: true},
 		{name: "AndInt16x8", argLength: 2, commutative: true},
 		{name: "AndNotInt16x8", argLength: 2, commutative: false},
+		{name: "CompressInt16x8", argLength: 2, commutative: false},
 		{name: "EqualInt16x8", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt16x8", argLength: 3, commutative: true},
 		{name: "GreaterInt16x8", argLength: 2, commutative: false},
@ -438,6 +447,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedInt32x16", argLength: 3, commutative: true},
 		{name: "AndNotInt32x16", argLength: 2, commutative: false},
 		{name: "AndNotMaskedInt32x16", argLength: 3, commutative: false},
+		{name: "CompressInt32x16", argLength: 2, commutative: false},
 		{name: "EqualInt32x16", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt32x16", argLength: 3, commutative: true},
 		{name: "GreaterInt32x16", argLength: 2, commutative: false},
@ -496,6 +506,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedInt32x4", argLength: 3, commutative: true},
 		{name: "AndNotInt32x4", argLength: 2, commutative: false},
 		{name: "AndNotMaskedInt32x4", argLength: 3, commutative: false},
+		{name: "CompressInt32x4", argLength: 2, commutative: false},
 		{name: "EqualInt32x4", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt32x4", argLength: 3, commutative: true},
 		{name: "GreaterInt32x4", argLength: 2, commutative: false},
@ -558,6 +569,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedInt32x8", argLength: 3, commutative: true},
 		{name: "AndNotInt32x8", argLength: 2, commutative: false},
 		{name: "AndNotMaskedInt32x8", argLength: 3, commutative: false},
+		{name: "CompressInt32x8", argLength: 2, commutative: false},
 		{name: "EqualInt32x8", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt32x8", argLength: 3, commutative: true},
 		{name: "GreaterInt32x8", argLength: 2, commutative: false},
@ -620,6 +632,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedInt64x2", argLength: 3, commutative: true},
 		{name: "AndNotInt64x2", argLength: 2, commutative: false},
 		{name: "AndNotMaskedInt64x2", argLength: 3, commutative: false},
+		{name: "CompressInt64x2", argLength: 2, commutative: false},
 		{name: "EqualInt64x2", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt64x2", argLength: 3, commutative: true},
 		{name: "GreaterInt64x2", argLength: 2, commutative: false},
@ -672,6 +685,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedInt64x4", argLength: 3, commutative: true},
 		{name: "AndNotInt64x4", argLength: 2, commutative: false},
 		{name: "AndNotMaskedInt64x4", argLength: 3, commutative: false},
+		{name: "CompressInt64x4", argLength: 2, commutative: false},
 		{name: "EqualInt64x4", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt64x4", argLength: 3, commutative: true},
 		{name: "GreaterInt64x4", argLength: 2, commutative: false},
@ -724,6 +738,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedInt64x8", argLength: 3, commutative: true},
 		{name: "AndNotInt64x8", argLength: 2, commutative: false},
 		{name: "AndNotMaskedInt64x8", argLength: 3, commutative: false},
+		{name: "CompressInt64x8", argLength: 2, commutative: false},
 		{name: "EqualInt64x8", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt64x8", argLength: 3, commutative: true},
 		{name: "GreaterInt64x8", argLength: 2, commutative: false},
@ -774,6 +789,7 @@ func simdGenericOps() []opData {
 		{name: "AddMaskedInt8x16", argLength: 3, commutative: true},
 		{name: "AndInt8x16", argLength: 2, commutative: true},
 		{name: "AndNotInt8x16", argLength: 2, commutative: false},
+		{name: "CompressInt8x16", argLength: 2, commutative: false},
 		{name: "EqualInt8x16", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt8x16", argLength: 3, commutative: true},
 		{name: "GreaterInt8x16", argLength: 2, commutative: false},
@ -807,6 +823,7 @@ func simdGenericOps() []opData {
 		{name: "AddMaskedInt8x32", argLength: 3, commutative: true},
 		{name: "AndInt8x32", argLength: 2, commutative: true},
 		{name: "AndNotInt8x32", argLength: 2, commutative: false},
+		{name: "CompressInt8x32", argLength: 2, commutative: false},
 		{name: "EqualInt8x32", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt8x32", argLength: 3, commutative: true},
 		{name: "GreaterInt8x32", argLength: 2, commutative: false},
@ -838,6 +855,7 @@ func simdGenericOps() []opData {
 		{name: "AbsoluteMaskedInt8x64", argLength: 2, commutative: false},
 		{name: "AddInt8x64", argLength: 2, commutative: true},
 		{name: "AddMaskedInt8x64", argLength: 3, commutative: true},
+		{name: "CompressInt8x64", argLength: 2, commutative: false},
 		{name: "EqualInt8x64", argLength: 2, commutative: true},
 		{name: "EqualMaskedInt8x64", argLength: 3, commutative: true},
 		{name: "GreaterInt8x64", argLength: 2, commutative: false},
@ -868,6 +886,7 @@ func simdGenericOps() []opData {
 		{name: "AndNotUint16x16", argLength: 2, commutative: false},
 		{name: "AverageUint16x16", argLength: 2, commutative: true},
 		{name: "AverageMaskedUint16x16", argLength: 3, commutative: true},
+		{name: "CompressUint16x16", argLength: 2, commutative: false},
 		{name: "EqualUint16x16", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint16x16", argLength: 3, commutative: true},
 		{name: "GreaterUint16x16", argLength: 2, commutative: false},
@ -893,10 +912,10 @@ func simdGenericOps() []opData {
 		{name: "PermuteUint16x16", argLength: 2, commutative: false},
 		{name: "Permute2Uint16x16", argLength: 3, commutative: false},
 		{name: "Permute2Int16x16", argLength: 3, commutative: false},
-		{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
-		{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
+		{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
 		{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
+		{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "PopCountUint16x16", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
 		{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
@ -922,6 +941,7 @@ func simdGenericOps() []opData {
 		{name: "AddMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "AverageUint16x32", argLength: 2, commutative: true},
 		{name: "AverageMaskedUint16x32", argLength: 3, commutative: true},
+		{name: "CompressUint16x32", argLength: 2, commutative: false},
 		{name: "EqualUint16x32", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "GreaterUint16x32", argLength: 2, commutative: false},
@ -940,12 +960,12 @@ func simdGenericOps() []opData {
 		{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "NotEqualUint16x32", argLength: 2, commutative: true},
 		{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "PermuteUint16x32", argLength: 2, commutative: false},
 		{name: "PermuteInt16x32", argLength: 2, commutative: false},
+		{name: "PermuteUint16x32", argLength: 2, commutative: false},
 		{name: "Permute2Int16x32", argLength: 3, commutative: false},
 		{name: "Permute2Uint16x32", argLength: 3, commutative: false},
-		{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
+		{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
 		{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "PopCountUint16x32", argLength: 1, commutative: false},
@ -974,6 +994,7 @@ func simdGenericOps() []opData {
 		{name: "AndNotUint16x8", argLength: 2, commutative: false},
 		{name: "AverageUint16x8", argLength: 2, commutative: true},
 		{name: "AverageMaskedUint16x8", argLength: 3, commutative: true},
+		{name: "CompressUint16x8", argLength: 2, commutative: false},
 		{name: "EqualUint16x8", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint16x8", argLength: 3, commutative: true},
 		{name: "GreaterUint16x8", argLength: 2, commutative: false},
@ -1030,6 +1051,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint32x16", argLength: 3, commutative: true},
 		{name: "AndNotUint32x16", argLength: 2, commutative: false},
 		{name: "AndNotMaskedUint32x16", argLength: 3, commutative: false},
+		{name: "CompressUint32x16", argLength: 2, commutative: false},
 		{name: "EqualUint32x16", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint32x16", argLength: 3, commutative: true},
 		{name: "GreaterUint32x16", argLength: 2, commutative: false},
@ -1049,17 +1071,17 @@ func simdGenericOps() []opData {
 		{name: "OrUint32x16", argLength: 2, commutative: true},
 		{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
 		{name: "PermuteInt32x16", argLength: 2, commutative: false},
-		{name: "PermuteUint32x16", argLength: 2, commutative: false},
 		{name: "PermuteFloat32x16", argLength: 2, commutative: false},
-		{name: "Permute2Int32x16", argLength: 3, commutative: false},
+		{name: "PermuteUint32x16", argLength: 2, commutative: false},
 		{name: "Permute2Uint32x16", argLength: 3, commutative: false},
 		{name: "Permute2Float32x16", argLength: 3, commutative: false},
+		{name: "Permute2Int32x16", argLength: 3, commutative: false},
 		{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
 		{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
+		{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
 		{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
 		{name: "PopCountUint32x16", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
@ -1092,6 +1114,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint32x4", argLength: 3, commutative: true},
 		{name: "AndNotUint32x4", argLength: 2, commutative: false},
 		{name: "AndNotMaskedUint32x4", argLength: 3, commutative: false},
+		{name: "CompressUint32x4", argLength: 2, commutative: false},
 		{name: "EqualUint32x4", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint32x4", argLength: 3, commutative: true},
 		{name: "GreaterUint32x4", argLength: 2, commutative: false},
@ -1114,11 +1137,11 @@ func simdGenericOps() []opData {
 		{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
 		{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
 		{name: "Permute2Uint32x4", argLength: 3, commutative: false},
-		{name: "Permute2Float32x4", argLength: 3, commutative: false},
 		{name: "Permute2Int32x4", argLength: 3, commutative: false},
-		{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
+		{name: "Permute2Float32x4", argLength: 3, commutative: false},
 		{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
+		{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
+		{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
 		{name: "PopCountUint32x4", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
@ -1151,6 +1174,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint32x8", argLength: 3, commutative: true},
 		{name: "AndNotUint32x8", argLength: 2, commutative: false},
 		{name: "AndNotMaskedUint32x8", argLength: 3, commutative: false},
+		{name: "CompressUint32x8", argLength: 2, commutative: false},
 		{name: "EqualUint32x8", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint32x8", argLength: 3, commutative: true},
 		{name: "GreaterUint32x8", argLength: 2, commutative: false},
@ -1172,18 +1196,18 @@ func simdGenericOps() []opData {
 		{name: "OrMaskedUint32x8", argLength: 3, commutative: true},
 		{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
 		{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
+		{name: "PermuteUint32x8", argLength: 2, commutative: false},
 		{name: "PermuteInt32x8", argLength: 2, commutative: false},
 		{name: "PermuteFloat32x8", argLength: 2, commutative: false},
-		{name: "PermuteUint32x8", argLength: 2, commutative: false},
 		{name: "Permute2Uint32x8", argLength: 3, commutative: false},
 		{name: "Permute2Float32x8", argLength: 3, commutative: false},
 		{name: "Permute2Int32x8", argLength: 3, commutative: false},
 		{name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
+		{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
 		{name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
 		{name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
+		{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
 		{name: "PopCountUint32x8", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint32x8", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x8", argLength: 2, commutative: false},
@ -1216,6 +1240,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint64x2", argLength: 3, commutative: true},
 		{name: "AndNotUint64x2", argLength: 2, commutative: false},
 		{name: "AndNotMaskedUint64x2", argLength: 3, commutative: false},
+		{name: "CompressUint64x2", argLength: 2, commutative: false},
 		{name: "EqualUint64x2", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint64x2", argLength: 3, commutative: true},
 		{name: "GreaterUint64x2", argLength: 2, commutative: false},
@ -1236,11 +1261,11 @@ func simdGenericOps() []opData {
 		{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
 		{name: "OrUint64x2", argLength: 2, commutative: true},
 		{name: "OrMaskedUint64x2", argLength: 3, commutative: true},
+		{name: "Permute2Float64x2", argLength: 3, commutative: false},
 		{name: "Permute2Uint64x2", argLength: 3, commutative: false},
 		{name: "Permute2Int64x2", argLength: 3, commutative: false},
-		{name: "Permute2Float64x2", argLength: 3, commutative: false},
-		{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
+		{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
 		{name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
 		{name: "PopCountUint64x2", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
@ -1270,6 +1295,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint64x4", argLength: 3, commutative: true},
 		{name: "AndNotUint64x4", argLength: 2, commutative: false},
 		{name: "AndNotMaskedUint64x4", argLength: 3, commutative: false},
+		{name: "CompressUint64x4", argLength: 2, commutative: false},
 		{name: "EqualUint64x4", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint64x4", argLength: 3, commutative: true},
 		{name: "GreaterUint64x4", argLength: 2, commutative: false},
@ -1290,18 +1316,18 @@ func simdGenericOps() []opData {
 		{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
 		{name: "OrUint64x4", argLength: 2, commutative: true},
 		{name: "OrMaskedUint64x4", argLength: 3, commutative: true},
+		{name: "PermuteFloat64x4", argLength: 2, commutative: false},
 		{name: "PermuteUint64x4", argLength: 2, commutative: false},
 		{name: "PermuteInt64x4", argLength: 2, commutative: false},
-		{name: "PermuteFloat64x4", argLength: 2, commutative: false},
-		{name: "Permute2Uint64x4", argLength: 3, commutative: false},
 		{name: "Permute2Int64x4", argLength: 3, commutative: false},
+		{name: "Permute2Uint64x4", argLength: 3, commutative: false},
 		{name: "Permute2Float64x4", argLength: 3, commutative: false},
-		{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
 		{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
+		{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
+		{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
 		{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
+		{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "PopCountUint64x4", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
 		{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
@ -1330,6 +1356,7 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "AndNotUint64x8", argLength: 2, commutative: false},
 		{name: "AndNotMaskedUint64x8", argLength: 3, commutative: false},
+		{name: "CompressUint64x8", argLength: 2, commutative: false},
 		{name: "EqualUint64x8", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "GreaterUint64x8", argLength: 2, commutative: false},
@ -1350,18 +1377,18 @@ func simdGenericOps() []opData {
 		{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "OrUint64x8", argLength: 2, commutative: true},
 		{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
-		{name: "PermuteUint64x8", argLength: 2, commutative: false},
 		{name: "PermuteInt64x8", argLength: 2, commutative: false},
+		{name: "PermuteUint64x8", argLength: 2, commutative: false},
 		{name: "PermuteFloat64x8", argLength: 2, commutative: false},
-		{name: "Permute2Int64x8", argLength: 3, commutative: false},
 		{name: "Permute2Uint64x8", argLength: 3, commutative: false},
 		{name: "Permute2Float64x8", argLength: 3, commutative: false},
+		{name: "Permute2Int64x8", argLength: 3, commutative: false},
 		{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
 		{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
-		{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
+		{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
 		{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
+		{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
+		{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
 		{name: "PopCountUint64x8", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
 		{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
@ -1390,6 +1417,7 @@ func simdGenericOps() []opData {
 		{name: "AndNotUint8x16", argLength: 2, commutative: false},
 		{name: "AverageUint8x16", argLength: 2, commutative: true},
 		{name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
+		{name: "CompressUint8x16", argLength: 2, commutative: false},
 		{name: "EqualUint8x16", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint8x16", argLength: 3, commutative: true},
 		{name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false},
@ -1411,12 +1439,12 @@ func simdGenericOps() []opData {
 		{name: "OrUint8x16", argLength: 2, commutative: true},
 		{name: "PermuteUint8x16", argLength: 2, commutative: false},
 		{name: "PermuteInt8x16", argLength: 2, commutative: false},
-		{name: "Permute2Uint8x16", argLength: 3, commutative: false},
 		{name: "Permute2Int8x16", argLength: 3, commutative: false},
+		{name: "Permute2Uint8x16", argLength: 3, commutative: false},
 		{name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
 		{name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
-		{name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
 		{name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
+		{name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
 		{name: "PopCountUint8x16", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint8x16", argLength: 2, commutative: false},
 		{name: "SaturatedAddUint8x16", argLength: 2, commutative: true},
@ -1434,6 +1462,7 @@ func simdGenericOps() []opData {
 		{name: "AndNotUint8x32", argLength: 2, commutative: false},
 		{name: "AverageUint8x32", argLength: 2, commutative: true},
 		{name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
+		{name: "CompressUint8x32", argLength: 2, commutative: false},
 		{name: "EqualUint8x32", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint8x32", argLength: 3, commutative: true},
 		{name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false},
@ -1457,10 +1486,10 @@ func simdGenericOps() []opData {
 		{name: "PermuteInt8x32", argLength: 2, commutative: false},
 		{name: "Permute2Int8x32", argLength: 3, commutative: false},
 		{name: "Permute2Uint8x32", argLength: 3, commutative: false},
-		{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
-		{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
+		{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
 		{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
+		{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
 		{name: "PopCountUint8x32", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
 		{name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
@ -1476,6 +1505,7 @@ func simdGenericOps() []opData {
 		{name: "AddMaskedUint8x64", argLength: 3, commutative: true},
 		{name: "AverageUint8x64", argLength: 2, commutative: true},
 		{name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
+		{name: "CompressUint8x64", argLength: 2, commutative: false},
 		{name: "EqualUint8x64", argLength: 2, commutative: true},
 		{name: "EqualMaskedUint8x64", argLength: 3, commutative: true},
 		{name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false},
@ -1494,14 +1524,14 @@ func simdGenericOps() []opData {
 		{name: "MinMaskedUint8x64", argLength: 3, commutative: true},
 		{name: "NotEqualUint8x64", argLength: 2, commutative: true},
 		{name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "PermuteUint8x64", argLength: 2, commutative: false},
 		{name: "PermuteInt8x64", argLength: 2, commutative: false},
-		{name: "Permute2Int8x64", argLength: 3, commutative: false},
+		{name: "PermuteUint8x64", argLength: 2, commutative: false},
 		{name: "Permute2Uint8x64", argLength: 3, commutative: false},
+		{name: "Permute2Int8x64", argLength: 3, commutative: false},
 		{name: "Permute2MaskedUint8x64", argLength: 4, commutative: false},
 		{name: "Permute2MaskedInt8x64", argLength: 4, commutative: false},
-		{name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
 		{name: "PermuteMaskedUint8x64", argLength: 3, commutative: false},
+		{name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
 		{name: "PopCountUint8x64", argLength: 1, commutative: false},
 		{name: "PopCountMaskedUint8x64", argLength: 2, commutative: false},
 		{name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@ -1185,6 +1185,66 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpCom8:
 		v.Op = OpAMD64NOTL
 		return true
+	case OpCompressFloat32x16:
+		return rewriteValueAMD64_OpCompressFloat32x16(v)
+	case OpCompressFloat32x4:
+		return rewriteValueAMD64_OpCompressFloat32x4(v)
+	case OpCompressFloat32x8:
+		return rewriteValueAMD64_OpCompressFloat32x8(v)
+	case OpCompressFloat64x2:
+		return rewriteValueAMD64_OpCompressFloat64x2(v)
+	case OpCompressFloat64x4:
+		return rewriteValueAMD64_OpCompressFloat64x4(v)
+	case OpCompressFloat64x8:
+		return rewriteValueAMD64_OpCompressFloat64x8(v)
+	case OpCompressInt16x16:
+		return rewriteValueAMD64_OpCompressInt16x16(v)
+	case OpCompressInt16x32:
+		return rewriteValueAMD64_OpCompressInt16x32(v)
+	case OpCompressInt16x8:
+		return rewriteValueAMD64_OpCompressInt16x8(v)
+	case OpCompressInt32x16:
+		return rewriteValueAMD64_OpCompressInt32x16(v)
+	case OpCompressInt32x4:
+		return rewriteValueAMD64_OpCompressInt32x4(v)
+	case OpCompressInt32x8:
+		return rewriteValueAMD64_OpCompressInt32x8(v)
+	case OpCompressInt64x2:
+		return rewriteValueAMD64_OpCompressInt64x2(v)
+	case OpCompressInt64x4:
+		return rewriteValueAMD64_OpCompressInt64x4(v)
+	case OpCompressInt64x8:
+		return rewriteValueAMD64_OpCompressInt64x8(v)
+	case OpCompressInt8x16:
+		return rewriteValueAMD64_OpCompressInt8x16(v)
+	case OpCompressInt8x32:
+		return rewriteValueAMD64_OpCompressInt8x32(v)
+	case OpCompressInt8x64:
+		return rewriteValueAMD64_OpCompressInt8x64(v)
+	case OpCompressUint16x16:
+		return rewriteValueAMD64_OpCompressUint16x16(v)
+	case OpCompressUint16x32:
+		return rewriteValueAMD64_OpCompressUint16x32(v)
+	case OpCompressUint16x8:
+		return rewriteValueAMD64_OpCompressUint16x8(v)
+	case OpCompressUint32x16:
+		return rewriteValueAMD64_OpCompressUint32x16(v)
+	case OpCompressUint32x4:
+		return rewriteValueAMD64_OpCompressUint32x4(v)
+	case OpCompressUint32x8:
+		return rewriteValueAMD64_OpCompressUint32x8(v)
+	case OpCompressUint64x2:
+		return rewriteValueAMD64_OpCompressUint64x2(v)
+	case OpCompressUint64x4:
+		return rewriteValueAMD64_OpCompressUint64x4(v)
+	case OpCompressUint64x8:
+		return rewriteValueAMD64_OpCompressUint64x8(v)
+	case OpCompressUint8x16:
+		return rewriteValueAMD64_OpCompressUint8x16(v)
+	case OpCompressUint8x32:
+		return rewriteValueAMD64_OpCompressUint8x32(v)
+	case OpCompressUint8x64:
+		return rewriteValueAMD64_OpCompressUint8x64(v)
 	case OpCondSelect:
 		return rewriteValueAMD64_OpCondSelect(v)
 	case OpConst16:
@ -30451,6 +30511,486 @@ func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x8(v *Value) bool {
 		return true
 	}
 }
+func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressFloat32x16 x mask)
+	// result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VCOMPRESSPSMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressFloat32x4 x mask)
+	// result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VCOMPRESSPSMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressFloat32x8 x mask)
+	// result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VCOMPRESSPSMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressFloat64x2 x mask)
+	// result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VCOMPRESSPDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressFloat64x4 x mask)
+	// result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VCOMPRESSPDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressFloat64x8 x mask)
+	// result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VCOMPRESSPDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt16x16 x mask)
+	// result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt16x32 x mask)
+	// result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt16x8 x mask)
+	// result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt32x16 x mask)
+	// result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt32x4 x mask)
+	// result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt32x8 x mask)
+	// result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt64x2 x mask)
+	// result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt64x4 x mask)
+	// result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt64x8 x mask)
+	// result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt8x16 x mask)
+	// result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSBMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt8x32 x mask)
+	// result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSBMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressInt8x64 x mask)
+	// result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSBMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint16x16 x mask)
+	// result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint16x32 x mask)
+	// result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint16x8 x mask)
+	// result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint32x16 x mask)
+	// result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint32x4 x mask)
+	// result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint32x8 x mask)
+	// result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint64x2 x mask)
+	// result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint64x4 x mask)
+	// result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint64x8 x mask)
+	// result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint8x16 x mask)
+	// result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSBMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint8x32 x mask)
+	// result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSBMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (CompressUint8x64 x mask)
+	// result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPCOMPRESSBMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
 func rewriteValueAMD64_OpCondSelect(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@ -215,6 +215,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+	addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Float64x2.Compress", opLen2(ssa.OpCompressFloat64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Float64x4.Compress", opLen2(ssa.OpCompressFloat64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Float64x8.Compress", opLen2(ssa.OpCompressFloat64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int8x16.Compress", opLen2(ssa.OpCompressInt8x16, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int8x32.Compress", opLen2(ssa.OpCompressInt8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int8x64.Compress", opLen2(ssa.OpCompressInt8x64, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.Compress", opLen2(ssa.OpCompressInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.Compress", opLen2(ssa.OpCompressInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.Compress", opLen2(ssa.OpCompressInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.Compress", opLen2(ssa.OpCompressInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.Compress", opLen2(ssa.OpCompressInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.Compress", opLen2(ssa.OpCompressInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.Compress", opLen2(ssa.OpCompressInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.Compress", opLen2(ssa.OpCompressInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.Compress", opLen2(ssa.OpCompressInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint8x16.Compress", opLen2(ssa.OpCompressUint8x16, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint8x32.Compress", opLen2(ssa.OpCompressUint8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint8x64.Compress", opLen2(ssa.OpCompressUint8x64, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.Compress", opLen2(ssa.OpCompressUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.Compress", opLen2(ssa.OpCompressUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.Compress", opLen2(ssa.OpCompressUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.Compress", opLen2(ssa.OpCompressUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.Compress", opLen2(ssa.OpCompressUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.Compress", opLen2(ssa.OpCompressUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@ -1084,6 +1084,188 @@ func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

+/* Compress */
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VCOMPRESSPS, CPU Feature: AVX512F
+func (x Float32x4) Compress(mask Mask32x4) Float32x4
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VCOMPRESSPS, CPU Feature: AVX512F
+func (x Float32x8) Compress(mask Mask32x8) Float32x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VCOMPRESSPS, CPU Feature: AVX512F
+func (x Float32x16) Compress(mask Mask32x16) Float32x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VCOMPRESSPD, CPU Feature: AVX512F
+func (x Float64x2) Compress(mask Mask64x2) Float64x2
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VCOMPRESSPD, CPU Feature: AVX512F
+func (x Float64x4) Compress(mask Mask64x4) Float64x4
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VCOMPRESSPD, CPU Feature: AVX512F
+func (x Float64x8) Compress(mask Mask64x8) Float64x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
+func (x Int8x16) Compress(mask Mask8x16) Int8x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
+func (x Int8x32) Compress(mask Mask8x32) Int8x32
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
+func (x Int8x64) Compress(mask Mask8x64) Int8x64
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
+func (x Int16x8) Compress(mask Mask16x8) Int16x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
+func (x Int16x16) Compress(mask Mask16x16) Int16x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
+func (x Int16x32) Compress(mask Mask16x32) Int16x32
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSD, CPU Feature: AVX512F
+func (x Int32x4) Compress(mask Mask32x4) Int32x4
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSD, CPU Feature: AVX512F
+func (x Int32x8) Compress(mask Mask32x8) Int32x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSD, CPU Feature: AVX512F
+func (x Int32x16) Compress(mask Mask32x16) Int32x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
+func (x Int64x2) Compress(mask Mask64x2) Int64x2
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
+func (x Int64x4) Compress(mask Mask64x4) Int64x4
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
+func (x Int64x8) Compress(mask Mask64x8) Int64x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
+func (x Uint8x16) Compress(mask Mask8x16) Uint8x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
+func (x Uint8x32) Compress(mask Mask8x32) Uint8x32
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
+func (x Uint8x64) Compress(mask Mask8x64) Uint8x64
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
+func (x Uint16x8) Compress(mask Mask16x8) Uint16x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
+func (x Uint16x16) Compress(mask Mask16x16) Uint16x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
+func (x Uint16x32) Compress(mask Mask16x32) Uint16x32
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSD, CPU Feature: AVX512F
+func (x Uint32x4) Compress(mask Mask32x4) Uint32x4
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSD, CPU Feature: AVX512F
+func (x Uint32x8) Compress(mask Mask32x8) Uint32x8
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSD, CPU Feature: AVX512F
+func (x Uint32x16) Compress(mask Mask32x16) Uint32x16
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
+func (x Uint64x2) Compress(mask Mask64x2) Uint64x2
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
+func (x Uint64x4) Compress(mask Mask64x4) Uint64x4
+
+// Compress performs a compression on vector x using mask by
+// selecting elements as indicated by mask, and pack them to lower indexed elements.
+//
+// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
+func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
+
 /* DiffWithCeilWithPrecision */

 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
--- a/src/simd/simd_test.go
+++ b/src/simd/simd_test.go
@ -186,6 +186,16 @@ func TestPermute2(t *testing.T) {
 	}
 }

+func TestCompress(t *testing.T) {
+	if !simd.HasAVX512() {
+		t.Skip("Test requires HasAVX512, not available on this hardware")
+		return
+	}
+	testInt32x4Mask32x4Int32x4(t, []int32{1, 2, 3, 4},
+		[]int32{0, -1, 0, -1},
+		[]int32{2, 4, 0, 0}, "Compress")
+}
+
 // checkInt8Slices ensures that b and a are equal, to the end of b.
 // also serves to use the slices, to prevent accidental optimization.
 func checkInt8Slices(t *testing.T, a, b []int8) {
--- a/src/simd/simd_wrapped_test.go
+++ b/src/simd/simd_wrapped_test.go
@ -117,6 +117,27 @@ func testFloat32x4Compare(t *testing.T, v0 []float32, v1 []float32, want []int32
 	}
 }

+func testFloat32x4Mask32x4Float32x4(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) {
+	t.Helper()
+	var gotv simd.Float32x4
+	got := make([]float32, len(want))
+	vec0 := simd.LoadFloat32x4Slice(v0)
+	vec1 := simd.LoadInt32x4Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x4())
+
+	default:
+		t.Errorf("Unknown method: Float32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testFloat32x4MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x4
@ -369,6 +390,27 @@ func testFloat32x8Compare(t *testing.T, v0 []float32, v1 []float32, want []int32
 	}
 }

+func testFloat32x8Mask32x8Float32x8(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) {
+	t.Helper()
+	var gotv simd.Float32x8
+	got := make([]float32, len(want))
+	vec0 := simd.LoadFloat32x8Slice(v0)
+	vec1 := simd.LoadInt32x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x8())
+
+	default:
+		t.Errorf("Unknown method: Float32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testFloat32x8MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x8
@ -613,6 +655,27 @@ func testFloat32x16Compare(t *testing.T, v0 []float32, v1 []float32, want []int3
 	}
 }

+func testFloat32x16Mask32x16Float32x16(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) {
+	t.Helper()
+	var gotv simd.Float32x16
+	got := make([]float32, len(want))
+	vec0 := simd.LoadFloat32x16Slice(v0)
+	vec1 := simd.LoadInt32x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x16())
+
+	default:
+		t.Errorf("Unknown method: Float32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testFloat32x16MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x16
@ -857,6 +920,27 @@ func testFloat64x2Compare(t *testing.T, v0 []float64, v1 []float64, want []int64
 	}
 }

+func testFloat64x2Mask64x2Float64x2(t *testing.T, v0 []float64, v1 []int64, want []float64, which string) {
+	t.Helper()
+	var gotv simd.Float64x2
+	got := make([]float64, len(want))
+	vec0 := simd.LoadFloat64x2Slice(v0)
+	vec1 := simd.LoadInt64x2Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x2())
+
+	default:
+		t.Errorf("Unknown method: Float64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testFloat64x2MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x2
@ -1107,6 +1191,27 @@ func testFloat64x4Compare(t *testing.T, v0 []float64, v1 []float64, want []int64
 	}
 }

+func testFloat64x4Mask64x4Float64x4(t *testing.T, v0 []float64, v1 []int64, want []float64, which string) {
+	t.Helper()
+	var gotv simd.Float64x4
+	got := make([]float64, len(want))
+	vec0 := simd.LoadFloat64x4Slice(v0)
+	vec1 := simd.LoadInt64x4Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x4())
+
+	default:
+		t.Errorf("Unknown method: Float64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testFloat64x4MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x4
@ -1351,6 +1456,27 @@ func testFloat64x8Compare(t *testing.T, v0 []float64, v1 []float64, want []int64
 	}
 }

+func testFloat64x8Mask64x8Float64x8(t *testing.T, v0 []float64, v1 []int64, want []float64, which string) {
+	t.Helper()
+	var gotv simd.Float64x8
+	got := make([]float64, len(want))
+	vec0 := simd.LoadFloat64x8Slice(v0)
+	vec1 := simd.LoadInt64x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x8())
+
+	default:
+		t.Errorf("Unknown method: Float64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testFloat64x8MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x8
@ -1591,6 +1717,27 @@ func testInt8x16Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which s
 	}
 }

+func testInt8x16Mask8x16Int8x16(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) {
+	t.Helper()
+	var gotv simd.Int8x16
+	got := make([]int8, len(want))
+	vec0 := simd.LoadInt8x16Slice(v0)
+	vec1 := simd.LoadInt8x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask8x16())
+
+	default:
+		t.Errorf("Unknown method: Int8x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt8x16MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) {
 	t.Helper()
 	var gotv simd.Int8x16
@ -1772,6 +1919,27 @@ func testInt8x32Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which s
 	}
 }

+func testInt8x32Mask8x32Int8x32(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) {
+	t.Helper()
+	var gotv simd.Int8x32
+	got := make([]int8, len(want))
+	vec0 := simd.LoadInt8x32Slice(v0)
+	vec1 := simd.LoadInt8x32Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask8x32())
+
+	default:
+		t.Errorf("Unknown method: Int8x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt8x32MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) {
 	t.Helper()
 	var gotv simd.Int8x32
@ -1943,6 +2111,27 @@ func testInt8x64Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which s
 	}
 }

+func testInt8x64Mask8x64Int8x64(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) {
+	t.Helper()
+	var gotv simd.Int8x64
+	got := make([]int8, len(want))
+	vec0 := simd.LoadInt8x64Slice(v0)
+	vec1 := simd.LoadInt8x64Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask8x64())
+
+	default:
+		t.Errorf("Unknown method: Int8x64.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt8x64MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) {
 	t.Helper()
 	var gotv simd.Int8x64
@ -2191,6 +2380,27 @@ func testInt16x8Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 	}
 }

+func testInt16x8Mask16x8Int16x8(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x8
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x8Slice(v0)
+	vec1 := simd.LoadInt16x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask16x8())
+
+	default:
+		t.Errorf("Unknown method: Int16x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x8
@ -2488,6 +2698,27 @@ func testInt16x16Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, whi
 	}
 }

+func testInt16x16Mask16x16Int16x16(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x16
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x16Slice(v0)
+	vec1 := simd.LoadInt16x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask16x16())
+
+	default:
+		t.Errorf("Unknown method: Int16x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt16x16MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x16
@ -2767,6 +2998,27 @@ func testInt16x32Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, whi
 	}
 }

+func testInt16x32Mask16x32Int16x32(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x32
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x32Slice(v0)
+	vec1 := simd.LoadInt16x32Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask16x32())
+
+	default:
+		t.Errorf("Unknown method: Int16x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt16x32MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x32
@ -3091,6 +3343,27 @@ func testInt32x4Int16x8Int16x8Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int
 	}
 }

+func testInt32x4Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x4
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x4Slice(v0)
+	vec1 := simd.LoadInt32x4Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x4())
+
+	default:
+		t.Errorf("Unknown method: Int32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt32x4MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x4
@ -3464,6 +3737,27 @@ func testInt32x8Int16x16Int16x16Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []i
 	}
 }

+func testInt32x8Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x8
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x8Slice(v0)
+	vec1 := simd.LoadInt32x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x8())
+
+	default:
+		t.Errorf("Unknown method: Int32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt32x8MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x8
@ -3810,6 +4104,27 @@ func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1
 	}
 }

+func testInt32x16Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x16
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x16Slice(v0)
+	vec1 := simd.LoadInt32x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x16())
+
+	default:
+		t.Errorf("Unknown method: Int32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x16
@ -4111,6 +4426,27 @@ func testInt64x2Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic
 	}
 }

+func testInt64x2Mask64x2Int64x2(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x2
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x2Slice(v0)
+	vec1 := simd.LoadInt64x2Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x2())
+
+	default:
+		t.Errorf("Unknown method: Int64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt64x2MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x2
@ -4363,6 +4699,27 @@ func testInt64x4Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic
 	}
 }

+func testInt64x4Mask64x4Int64x4(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x4
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x4Slice(v0)
+	vec1 := simd.LoadInt64x4Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x4())
+
+	default:
+		t.Errorf("Unknown method: Int64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt64x4MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x4
@ -4615,6 +4972,27 @@ func testInt64x8Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic
 	}
 }

+func testInt64x8Mask64x8Int64x8(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x8
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x8Slice(v0)
+	vec1 := simd.LoadInt64x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x8())
+
+	default:
+		t.Errorf("Unknown method: Int64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt64x8MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x8
@ -4894,6 +5272,27 @@ func testUint8x16Int8x16Mask16x8Int16x8(t *testing.T, v0 []uint8, v1 []int8, v2
 	}
 }

+func testUint8x16Mask8x16Uint8x16(t *testing.T, v0 []uint8, v1 []int8, want []uint8, which string) {
+	t.Helper()
+	var gotv simd.Uint8x16
+	got := make([]uint8, len(want))
+	vec0 := simd.LoadUint8x16Slice(v0)
+	vec1 := simd.LoadInt8x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask8x16())
+
+	default:
+		t.Errorf("Unknown method: Uint8x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint8x16MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, want []int8, which string) {
 	t.Helper()
 	var gotv simd.Int8x16
@ -5120,6 +5519,27 @@ func testUint8x32Int8x32Mask16x16Int16x16(t *testing.T, v0 []uint8, v1 []int8, v
 	}
 }

+func testUint8x32Mask8x32Uint8x32(t *testing.T, v0 []uint8, v1 []int8, want []uint8, which string) {
+	t.Helper()
+	var gotv simd.Uint8x32
+	got := make([]uint8, len(want))
+	vec0 := simd.LoadUint8x32Slice(v0)
+	vec1 := simd.LoadInt8x32Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask8x32())
+
+	default:
+		t.Errorf("Unknown method: Uint8x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint8x32MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, want []int8, which string) {
 	t.Helper()
 	var gotv simd.Int8x32
@ -5338,6 +5758,27 @@ func testUint8x64Int8x64Mask16x32Int16x32(t *testing.T, v0 []uint8, v1 []int8, v
 	}
 }

+func testUint8x64Mask8x64Uint8x64(t *testing.T, v0 []uint8, v1 []int8, want []uint8, which string) {
+	t.Helper()
+	var gotv simd.Uint8x64
+	got := make([]uint8, len(want))
+	vec0 := simd.LoadUint8x64Slice(v0)
+	vec1 := simd.LoadInt8x64Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask8x64())
+
+	default:
+		t.Errorf("Unknown method: Uint8x64.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint8x64MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, want []int8, which string) {
 	t.Helper()
 	var gotv simd.Int8x64
@ -5533,6 +5974,27 @@ func testUint16x8Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, w
 	}
 }

+func testUint16x8Mask16x8Uint16x8(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x8
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x8Slice(v0)
+	vec1 := simd.LoadInt16x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask16x8())
+
+	default:
+		t.Errorf("Unknown method: Uint16x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x8
@ -5777,6 +6239,27 @@ func testUint16x16Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16,
 	}
 }

+func testUint16x16Mask16x16Uint16x16(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x16
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x16Slice(v0)
+	vec1 := simd.LoadInt16x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask16x16())
+
+	default:
+		t.Errorf("Unknown method: Uint16x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x16
@ -6009,6 +6492,27 @@ func testUint16x32Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16,
 	}
 }

+func testUint16x32Mask16x32Uint16x32(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x32
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x32Slice(v0)
+	vec1 := simd.LoadInt16x32Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask16x32())
+
+	default:
+		t.Errorf("Unknown method: Uint16x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x32
@ -6274,6 +6778,27 @@ func testUint32x4Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, w
 	}
 }

+func testUint32x4Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x4
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x4Slice(v0)
+	vec1 := simd.LoadInt32x4Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x4())
+
+	default:
+		t.Errorf("Unknown method: Uint32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x4
@ -6588,6 +7113,27 @@ func testUint32x8Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, w
 	}
 }

+func testUint32x8Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x8
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x8Slice(v0)
+	vec1 := simd.LoadInt32x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x8())
+
+	default:
+		t.Errorf("Unknown method: Uint32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x8
@ -6877,6 +7423,27 @@ func testUint32x16Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32,
 	}
 }

+func testUint32x16Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x16
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x16Slice(v0)
+	vec1 := simd.LoadInt32x16Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask32x16())
+
+	default:
+		t.Errorf("Unknown method: Uint32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x16
@ -7170,6 +7737,27 @@ func testUint64x2Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, w
 	}
 }

+func testUint64x2Mask64x2Uint64x2(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x2
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x2Slice(v0)
+	vec1 := simd.LoadInt64x2Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x2())
+
+	default:
+		t.Errorf("Unknown method: Uint64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x2
@ -7414,6 +8002,27 @@ func testUint64x4Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, w
 	}
 }

+func testUint64x4Mask64x4Uint64x4(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x4
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x4Slice(v0)
+	vec1 := simd.LoadInt64x4Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x4())
+
+	default:
+		t.Errorf("Unknown method: Uint64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x4
@ -7658,6 +8267,27 @@ func testUint64x8Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, w
 	}
 }

+func testUint64x8Mask64x8Uint64x8(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x8
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x8Slice(v0)
+	vec1 := simd.LoadInt64x8Slice(v1)
+	switch which {
+	case "Compress":
+		gotv = vec0.Compress(vec1.AsMask64x8())
+
+	default:
+		t.Errorf("Unknown method: Uint64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x8