From 6d1068014168da26b2f5bcaab15a137aee4d7d05 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 20:29:46 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd: add Compress This CL is generated by CL 687975. Change-Id: I21707d108773cc6d8e6f07aaed60e756faa1e6cb Reviewed-on: https://go-review.googlesource.com/c/go/+/687995 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/simdssa.go | 36 + .../compile/internal/ssa/_gen/simdAMD64.rules | 30 + .../compile/internal/ssa/_gen/simdAMD64ops.go | 36 +- .../internal/ssa/_gen/simdgenericOps.go | 94 +- src/cmd/compile/internal/ssa/opGen.go | 810 ++++++++++++++---- src/cmd/compile/internal/ssa/rewriteAMD64.go | 540 ++++++++++++ .../compile/internal/ssagen/simdintrinsics.go | 30 + src/simd/ops_amd64.go | 182 ++++ src/simd/simd_test.go | 10 + src/simd/simd_wrapped_test.go | 630 ++++++++++++++ 10 files changed, 2177 insertions(+), 221 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 1a7e3be9e50..67179ef12d6 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -600,6 +600,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VRSQRT14PDMasked128, ssa.OpAMD64VRSQRT14PDMasked256, ssa.OpAMD64VRSQRT14PDMasked512, + ssa.OpAMD64VCOMPRESSPSMasked128, + ssa.OpAMD64VCOMPRESSPSMasked256, + ssa.OpAMD64VCOMPRESSPSMasked512, + ssa.OpAMD64VCOMPRESSPDMasked128, + ssa.OpAMD64VCOMPRESSPDMasked256, + ssa.OpAMD64VCOMPRESSPDMasked512, + ssa.OpAMD64VPCOMPRESSBMasked128, + ssa.OpAMD64VPCOMPRESSBMasked256, + ssa.OpAMD64VPCOMPRESSBMasked512, + ssa.OpAMD64VPCOMPRESSWMasked128, + ssa.OpAMD64VPCOMPRESSWMasked256, + ssa.OpAMD64VPCOMPRESSWMasked512, + ssa.OpAMD64VPCOMPRESSDMasked128, + ssa.OpAMD64VPCOMPRESSDMasked256, + ssa.OpAMD64VPCOMPRESSDMasked512, + ssa.OpAMD64VPCOMPRESSQMasked128, + ssa.OpAMD64VPCOMPRESSQMasked256, + ssa.OpAMD64VPCOMPRESSQMasked512, ssa.OpAMD64VPOPCNTBMasked128, ssa.OpAMD64VPOPCNTBMasked256, ssa.OpAMD64VPOPCNTBMasked512, @@ -1078,6 +1096,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VRNDSCALEPDMasked128, ssa.OpAMD64VRNDSCALEPDMasked256, ssa.OpAMD64VRNDSCALEPDMasked512, + ssa.OpAMD64VCOMPRESSPSMasked128, + ssa.OpAMD64VCOMPRESSPSMasked256, + ssa.OpAMD64VCOMPRESSPSMasked512, + ssa.OpAMD64VCOMPRESSPDMasked128, + ssa.OpAMD64VCOMPRESSPDMasked256, + ssa.OpAMD64VCOMPRESSPDMasked512, + ssa.OpAMD64VPCOMPRESSBMasked128, + ssa.OpAMD64VPCOMPRESSBMasked256, + ssa.OpAMD64VPCOMPRESSBMasked512, + ssa.OpAMD64VPCOMPRESSWMasked128, + ssa.OpAMD64VPCOMPRESSWMasked256, + ssa.OpAMD64VPCOMPRESSWMasked512, + ssa.OpAMD64VPCOMPRESSDMasked128, + ssa.OpAMD64VPCOMPRESSDMasked256, + ssa.OpAMD64VPCOMPRESSDMasked512, + ssa.OpAMD64VPCOMPRESSQMasked128, + ssa.OpAMD64VPCOMPRESSQMasked256, + ssa.OpAMD64VPCOMPRESSQMasked512, ssa.OpAMD64VREDUCEPSMasked128, ssa.OpAMD64VREDUCEPSMasked256, ssa.OpAMD64VREDUCEPSMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 5898406e9d3..88744174300 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -204,6 +204,36 @@ (CeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) (CeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) (CeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) +(CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM mask)) +(CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM mask)) +(CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM mask)) +(CompressFloat64x2 x mask) => (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM mask)) +(CompressFloat64x4 x mask) => (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM mask)) +(CompressFloat64x8 x mask) => (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM mask)) +(CompressInt8x16 x mask) => (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) +(CompressInt8x32 x mask) => (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) +(CompressInt8x64 x mask) => (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) +(CompressInt16x8 x mask) => (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) +(CompressInt16x16 x mask) => (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) +(CompressInt16x32 x mask) => (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) +(CompressInt32x4 x mask) => (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) +(CompressInt32x8 x mask) => (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) +(CompressInt32x16 x mask) => (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) +(CompressInt64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) +(CompressInt64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) +(CompressInt64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) +(CompressUint8x16 x mask) => (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) +(CompressUint8x32 x mask) => (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) +(CompressUint8x64 x mask) => (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) +(CompressUint16x8 x mask) => (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) +(CompressUint16x16 x mask) => (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) +(CompressUint16x32 x mask) => (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) +(CompressUint32x4 x mask) => (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) +(CompressUint32x8 x mask) => (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) +(CompressUint32x16 x mask) => (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) +(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) +(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) +(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) (DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x) (DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x) (DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 19ac0b0dea6..a7a3c9715c4 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -9,6 +9,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VRCP14PSMasked512", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRSQRT14PS512", argLength: 1, reg: w11, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRSQRT14PSMasked512", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VCOMPRESSPSMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPS512", argLength: 2, reg: w21, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPSMasked512", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VFMADD213PS512", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, @@ -36,6 +37,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VRCP14PSMasked128", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRSQRTPS128", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRSQRT14PSMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VCOMPRESSPSMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPS128", argLength: 2, reg: v21, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPSMasked128", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VFMADD213PS128", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, @@ -65,6 +67,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VRCP14PSMasked256", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRSQRTPS256", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRSQRT14PSMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VCOMPRESSPSMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPS256", argLength: 2, reg: v21, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPSMasked256", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VFMADD213PS256", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, @@ -94,6 +97,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VRCP14PDMasked128", argLength: 2, reg: wkw, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRSQRT14PD128", argLength: 1, reg: w11, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRSQRT14PDMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VCOMPRESSPDMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPD128", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPDMasked128", argLength: 3, reg: w2kw, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, @@ -123,6 +127,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VRCP14PDMasked256", argLength: 2, reg: wkw, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRSQRT14PD256", argLength: 1, reg: w11, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRSQRT14PDMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VCOMPRESSPDMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPD256", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPDMasked256", argLength: 3, reg: w2kw, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, @@ -151,6 +156,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VRCP14PDMasked512", argLength: 2, reg: wkw, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRSQRT14PD512", argLength: 1, reg: w11, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRSQRT14PDMasked512", argLength: 2, reg: wkw, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VCOMPRESSPDMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPD512", argLength: 2, reg: w21, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPDMasked512", argLength: 3, reg: w2kw, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, @@ -175,6 +181,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPABSWMasked256", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDW256", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPADDWMasked256", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQW256", argLength: 2, reg: v21, asm: "VPCMPEQW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTW256", argLength: 2, reg: v21, asm: "VPCMPGTW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSW256", argLength: 2, reg: v21, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -216,6 +223,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPABSWMasked512", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDWMasked512", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSW512", argLength: 2, reg: w21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSWMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSW512", argLength: 2, reg: w21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -250,6 +258,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPABSWMasked128", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDW128", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPADDWMasked128", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQW128", argLength: 2, reg: v21, asm: "VPCMPEQW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTW128", argLength: 2, reg: v21, asm: "VPCMPGTW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSW128", argLength: 2, reg: v21, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -295,6 +304,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPANDDMasked512", argLength: 3, reg: w2kw, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDND512", argLength: 2, reg: w21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPANDNDMasked512", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPCOMPRESSDMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSD512", argLength: 2, reg: w21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSD512", argLength: 2, reg: w21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -339,6 +349,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDDMasked128", argLength: 3, reg: w2kw, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDDMasked128", argLength: 3, reg: w2kw, asm: "VPANDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNDMasked128", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPCOMPRESSDMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQD128", argLength: 2, reg: v21, asm: "VPCMPEQD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTD128", argLength: 2, reg: v21, asm: "VPCMPGTD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSD128", argLength: 2, reg: v21, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -387,6 +398,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDDMasked256", argLength: 3, reg: w2kw, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDDMasked256", argLength: 3, reg: w2kw, asm: "VPANDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDNDMasked256", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCOMPRESSDMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQD256", argLength: 2, reg: v21, asm: "VPCMPEQD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTD256", argLength: 2, reg: v21, asm: "VPCMPGTD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSD256", argLength: 2, reg: v21, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -435,6 +447,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDQMasked128", argLength: 3, reg: w2kw, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDQMasked128", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNQMasked128", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPCOMPRESSQMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQQ128", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTQ128", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSQ128", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -472,6 +485,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDQMasked256", argLength: 3, reg: w2kw, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDQMasked256", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDNQMasked256", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCOMPRESSQMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQQ256", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTQ256", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSQ256", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -511,6 +525,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPANDQMasked512", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDNQ512", argLength: 2, reg: w21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPANDNQMasked512", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPCOMPRESSQMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSQ512", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSQ512", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -549,6 +564,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDBMasked128", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAND128", argLength: 2, reg: v21, asm: "VPAND", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDN128", argLength: 2, reg: v21, asm: "VPANDN", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPCOMPRESSBMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQB128", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTB128", argLength: 2, reg: v21, asm: "VPCMPGTB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSB128", argLength: 2, reg: v21, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -572,6 +588,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDBMasked256", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAND256", argLength: 2, reg: v21, asm: "VPAND", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDN256", argLength: 2, reg: v21, asm: "VPANDN", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCOMPRESSBMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQB256", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTB256", argLength: 2, reg: v21, asm: "VPCMPGTB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSB256", argLength: 2, reg: v21, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -593,6 +610,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPABSBMasked512", argLength: 2, reg: wkw, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDBMasked512", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPCOMPRESSBMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSB512", argLength: 2, reg: w21, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSBMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSB512", argLength: 2, reg: w21, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -657,12 +675,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMAXUDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUD512", argLength: 2, reg: w21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUDMasked512", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPERMPS512", argLength: 2, reg: w21, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPERMD512", argLength: 2, reg: w21, asm: "VPERMD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPERMI2D512", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPERMPS512", argLength: 2, reg: w21, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPERMI2PS512", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPERMI2DMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPERMI2D512", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPERMI2PSMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPERMI2DMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPERMPSMasked512", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPERMDMasked512", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLD512", argLength: 2, reg: wfpw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -687,12 +705,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPERMI2PS256", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPERMPSMasked256", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPERMDMasked256", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -706,8 +724,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPERMI2PD128", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPERMI2Q128", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPERMI2QMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPERMI2PDMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPERMI2QMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -719,12 +737,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPERMPD256", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPERMI2PDMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPERMI2QMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPERMPDMasked256", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPERMQMasked256", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPERMPDMasked256", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -741,8 +759,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPERMI2PD512", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPERMI2QMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPERMI2PDMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index dd27d0cc941..00e4baf141d 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -9,6 +9,7 @@ func simdGenericOps() []opData { {name: "ApproximateReciprocalMaskedFloat32x16", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat32x16", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtMaskedFloat32x16", argLength: 2, commutative: false}, + {name: "CompressFloat32x16", argLength: 2, commutative: false}, {name: "DivFloat32x16", argLength: 2, commutative: false}, {name: "DivMaskedFloat32x16", argLength: 3, commutative: false}, {name: "EqualFloat32x16", argLength: 2, commutative: true}, @@ -51,6 +52,7 @@ func simdGenericOps() []opData { {name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtMaskedFloat32x4", argLength: 2, commutative: false}, {name: "CeilFloat32x4", argLength: 1, commutative: false}, + {name: "CompressFloat32x4", argLength: 2, commutative: false}, {name: "DivFloat32x4", argLength: 2, commutative: false}, {name: "DivMaskedFloat32x4", argLength: 3, commutative: false}, {name: "DotProdBroadcastFloat32x4", argLength: 2, commutative: true}, @@ -99,6 +101,7 @@ func simdGenericOps() []opData { {name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtMaskedFloat32x8", argLength: 2, commutative: false}, {name: "CeilFloat32x8", argLength: 1, commutative: false}, + {name: "CompressFloat32x8", argLength: 2, commutative: false}, {name: "DivFloat32x8", argLength: 2, commutative: false}, {name: "DivMaskedFloat32x8", argLength: 3, commutative: false}, {name: "DotProdBroadcastFloat32x8", argLength: 2, commutative: true}, @@ -147,6 +150,7 @@ func simdGenericOps() []opData { {name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtMaskedFloat64x2", argLength: 2, commutative: false}, {name: "CeilFloat64x2", argLength: 1, commutative: false}, + {name: "CompressFloat64x2", argLength: 2, commutative: false}, {name: "DivFloat64x2", argLength: 2, commutative: false}, {name: "DivMaskedFloat64x2", argLength: 3, commutative: false}, {name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true}, @@ -195,6 +199,7 @@ func simdGenericOps() []opData { {name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtMaskedFloat64x4", argLength: 2, commutative: false}, {name: "CeilFloat64x4", argLength: 1, commutative: false}, + {name: "CompressFloat64x4", argLength: 2, commutative: false}, {name: "DivFloat64x4", argLength: 2, commutative: false}, {name: "DivMaskedFloat64x4", argLength: 3, commutative: false}, {name: "EqualFloat64x4", argLength: 2, commutative: true}, @@ -240,6 +245,7 @@ func simdGenericOps() []opData { {name: "ApproximateReciprocalMaskedFloat64x8", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x8", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtMaskedFloat64x8", argLength: 2, commutative: false}, + {name: "CompressFloat64x8", argLength: 2, commutative: false}, {name: "DivFloat64x8", argLength: 2, commutative: false}, {name: "DivMaskedFloat64x8", argLength: 3, commutative: false}, {name: "EqualFloat64x8", argLength: 2, commutative: true}, @@ -280,6 +286,7 @@ func simdGenericOps() []opData { {name: "AddMaskedInt16x16", argLength: 3, commutative: true}, {name: "AndInt16x16", argLength: 2, commutative: true}, {name: "AndNotInt16x16", argLength: 2, commutative: false}, + {name: "CompressInt16x16", argLength: 2, commutative: false}, {name: "EqualInt16x16", argLength: 2, commutative: true}, {name: "EqualMaskedInt16x16", argLength: 3, commutative: true}, {name: "GreaterInt16x16", argLength: 2, commutative: false}, @@ -333,6 +340,7 @@ func simdGenericOps() []opData { {name: "AbsoluteMaskedInt16x32", argLength: 2, commutative: false}, {name: "AddInt16x32", argLength: 2, commutative: true}, {name: "AddMaskedInt16x32", argLength: 3, commutative: true}, + {name: "CompressInt16x32", argLength: 2, commutative: false}, {name: "EqualInt16x32", argLength: 2, commutative: true}, {name: "EqualMaskedInt16x32", argLength: 3, commutative: true}, {name: "GreaterInt16x32", argLength: 2, commutative: false}, @@ -381,6 +389,7 @@ func simdGenericOps() []opData { {name: "AddMaskedInt16x8", argLength: 3, commutative: true}, {name: "AndInt16x8", argLength: 2, commutative: true}, {name: "AndNotInt16x8", argLength: 2, commutative: false}, + {name: "CompressInt16x8", argLength: 2, commutative: false}, {name: "EqualInt16x8", argLength: 2, commutative: true}, {name: "EqualMaskedInt16x8", argLength: 3, commutative: true}, {name: "GreaterInt16x8", argLength: 2, commutative: false}, @@ -438,6 +447,7 @@ func simdGenericOps() []opData { {name: "AndMaskedInt32x16", argLength: 3, commutative: true}, {name: "AndNotInt32x16", argLength: 2, commutative: false}, {name: "AndNotMaskedInt32x16", argLength: 3, commutative: false}, + {name: "CompressInt32x16", argLength: 2, commutative: false}, {name: "EqualInt32x16", argLength: 2, commutative: true}, {name: "EqualMaskedInt32x16", argLength: 3, commutative: true}, {name: "GreaterInt32x16", argLength: 2, commutative: false}, @@ -496,6 +506,7 @@ func simdGenericOps() []opData { {name: "AndMaskedInt32x4", argLength: 3, commutative: true}, {name: "AndNotInt32x4", argLength: 2, commutative: false}, {name: "AndNotMaskedInt32x4", argLength: 3, commutative: false}, + {name: "CompressInt32x4", argLength: 2, commutative: false}, {name: "EqualInt32x4", argLength: 2, commutative: true}, {name: "EqualMaskedInt32x4", argLength: 3, commutative: true}, {name: "GreaterInt32x4", argLength: 2, commutative: false}, @@ -558,6 +569,7 @@ func simdGenericOps() []opData { {name: "AndMaskedInt32x8", argLength: 3, commutative: true}, {name: "AndNotInt32x8", argLength: 2, commutative: false}, {name: "AndNotMaskedInt32x8", argLength: 3, commutative: false}, + {name: "CompressInt32x8", argLength: 2, commutative: false}, {name: "EqualInt32x8", argLength: 2, commutative: true}, {name: "EqualMaskedInt32x8", argLength: 3, commutative: true}, {name: "GreaterInt32x8", argLength: 2, commutative: false}, @@ -620,6 +632,7 @@ func simdGenericOps() []opData { {name: "AndMaskedInt64x2", argLength: 3, commutative: true}, {name: "AndNotInt64x2", argLength: 2, commutative: false}, {name: "AndNotMaskedInt64x2", argLength: 3, commutative: false}, + {name: "CompressInt64x2", argLength: 2, commutative: false}, {name: "EqualInt64x2", argLength: 2, commutative: true}, {name: "EqualMaskedInt64x2", argLength: 3, commutative: true}, {name: "GreaterInt64x2", argLength: 2, commutative: false}, @@ -672,6 +685,7 @@ func simdGenericOps() []opData { {name: "AndMaskedInt64x4", argLength: 3, commutative: true}, {name: "AndNotInt64x4", argLength: 2, commutative: false}, {name: "AndNotMaskedInt64x4", argLength: 3, commutative: false}, + {name: "CompressInt64x4", argLength: 2, commutative: false}, {name: "EqualInt64x4", argLength: 2, commutative: true}, {name: "EqualMaskedInt64x4", argLength: 3, commutative: true}, {name: "GreaterInt64x4", argLength: 2, commutative: false}, @@ -724,6 +738,7 @@ func simdGenericOps() []opData { {name: "AndMaskedInt64x8", argLength: 3, commutative: true}, {name: "AndNotInt64x8", argLength: 2, commutative: false}, {name: "AndNotMaskedInt64x8", argLength: 3, commutative: false}, + {name: "CompressInt64x8", argLength: 2, commutative: false}, {name: "EqualInt64x8", argLength: 2, commutative: true}, {name: "EqualMaskedInt64x8", argLength: 3, commutative: true}, {name: "GreaterInt64x8", argLength: 2, commutative: false}, @@ -774,6 +789,7 @@ func simdGenericOps() []opData { {name: "AddMaskedInt8x16", argLength: 3, commutative: true}, {name: "AndInt8x16", argLength: 2, commutative: true}, {name: "AndNotInt8x16", argLength: 2, commutative: false}, + {name: "CompressInt8x16", argLength: 2, commutative: false}, {name: "EqualInt8x16", argLength: 2, commutative: true}, {name: "EqualMaskedInt8x16", argLength: 3, commutative: true}, {name: "GreaterInt8x16", argLength: 2, commutative: false}, @@ -807,6 +823,7 @@ func simdGenericOps() []opData { {name: "AddMaskedInt8x32", argLength: 3, commutative: true}, {name: "AndInt8x32", argLength: 2, commutative: true}, {name: "AndNotInt8x32", argLength: 2, commutative: false}, + {name: "CompressInt8x32", argLength: 2, commutative: false}, {name: "EqualInt8x32", argLength: 2, commutative: true}, {name: "EqualMaskedInt8x32", argLength: 3, commutative: true}, {name: "GreaterInt8x32", argLength: 2, commutative: false}, @@ -838,6 +855,7 @@ func simdGenericOps() []opData { {name: "AbsoluteMaskedInt8x64", argLength: 2, commutative: false}, {name: "AddInt8x64", argLength: 2, commutative: true}, {name: "AddMaskedInt8x64", argLength: 3, commutative: true}, + {name: "CompressInt8x64", argLength: 2, commutative: false}, {name: "EqualInt8x64", argLength: 2, commutative: true}, {name: "EqualMaskedInt8x64", argLength: 3, commutative: true}, {name: "GreaterInt8x64", argLength: 2, commutative: false}, @@ -868,6 +886,7 @@ func simdGenericOps() []opData { {name: "AndNotUint16x16", argLength: 2, commutative: false}, {name: "AverageUint16x16", argLength: 2, commutative: true}, {name: "AverageMaskedUint16x16", argLength: 3, commutative: true}, + {name: "CompressUint16x16", argLength: 2, commutative: false}, {name: "EqualUint16x16", argLength: 2, commutative: true}, {name: "EqualMaskedUint16x16", argLength: 3, commutative: true}, {name: "GreaterUint16x16", argLength: 2, commutative: false}, @@ -893,10 +912,10 @@ func simdGenericOps() []opData { {name: "PermuteUint16x16", argLength: 2, commutative: false}, {name: "Permute2Uint16x16", argLength: 3, commutative: false}, {name: "Permute2Int16x16", argLength: 3, commutative: false}, - {name: "Permute2MaskedUint16x16", argLength: 4, commutative: false}, {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false}, - {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false}, + {name: "Permute2MaskedUint16x16", argLength: 4, commutative: false}, {name: "PermuteMaskedInt16x16", argLength: 3, commutative: false}, + {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false}, {name: "PopCountUint16x16", argLength: 1, commutative: false}, {name: "PopCountMaskedUint16x16", argLength: 2, commutative: false}, {name: "SaturatedAddUint16x16", argLength: 2, commutative: true}, @@ -922,6 +941,7 @@ func simdGenericOps() []opData { {name: "AddMaskedUint16x32", argLength: 3, commutative: true}, {name: "AverageUint16x32", argLength: 2, commutative: true}, {name: "AverageMaskedUint16x32", argLength: 3, commutative: true}, + {name: "CompressUint16x32", argLength: 2, commutative: false}, {name: "EqualUint16x32", argLength: 2, commutative: true}, {name: "EqualMaskedUint16x32", argLength: 3, commutative: true}, {name: "GreaterUint16x32", argLength: 2, commutative: false}, @@ -940,12 +960,12 @@ func simdGenericOps() []opData { {name: "MulHighMaskedUint16x32", argLength: 3, commutative: true}, {name: "NotEqualUint16x32", argLength: 2, commutative: true}, {name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true}, - {name: "PermuteUint16x32", argLength: 2, commutative: false}, {name: "PermuteInt16x32", argLength: 2, commutative: false}, + {name: "PermuteUint16x32", argLength: 2, commutative: false}, {name: "Permute2Int16x32", argLength: 3, commutative: false}, {name: "Permute2Uint16x32", argLength: 3, commutative: false}, - {name: "Permute2MaskedUint16x32", argLength: 4, commutative: false}, {name: "Permute2MaskedInt16x32", argLength: 4, commutative: false}, + {name: "Permute2MaskedUint16x32", argLength: 4, commutative: false}, {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false}, {name: "PermuteMaskedInt16x32", argLength: 3, commutative: false}, {name: "PopCountUint16x32", argLength: 1, commutative: false}, @@ -974,6 +994,7 @@ func simdGenericOps() []opData { {name: "AndNotUint16x8", argLength: 2, commutative: false}, {name: "AverageUint16x8", argLength: 2, commutative: true}, {name: "AverageMaskedUint16x8", argLength: 3, commutative: true}, + {name: "CompressUint16x8", argLength: 2, commutative: false}, {name: "EqualUint16x8", argLength: 2, commutative: true}, {name: "EqualMaskedUint16x8", argLength: 3, commutative: true}, {name: "GreaterUint16x8", argLength: 2, commutative: false}, @@ -1030,6 +1051,7 @@ func simdGenericOps() []opData { {name: "AndMaskedUint32x16", argLength: 3, commutative: true}, {name: "AndNotUint32x16", argLength: 2, commutative: false}, {name: "AndNotMaskedUint32x16", argLength: 3, commutative: false}, + {name: "CompressUint32x16", argLength: 2, commutative: false}, {name: "EqualUint32x16", argLength: 2, commutative: true}, {name: "EqualMaskedUint32x16", argLength: 3, commutative: true}, {name: "GreaterUint32x16", argLength: 2, commutative: false}, @@ -1049,17 +1071,17 @@ func simdGenericOps() []opData { {name: "OrUint32x16", argLength: 2, commutative: true}, {name: "OrMaskedUint32x16", argLength: 3, commutative: true}, {name: "PermuteInt32x16", argLength: 2, commutative: false}, - {name: "PermuteUint32x16", argLength: 2, commutative: false}, {name: "PermuteFloat32x16", argLength: 2, commutative: false}, - {name: "Permute2Int32x16", argLength: 3, commutative: false}, + {name: "PermuteUint32x16", argLength: 2, commutative: false}, {name: "Permute2Uint32x16", argLength: 3, commutative: false}, {name: "Permute2Float32x16", argLength: 3, commutative: false}, + {name: "Permute2Int32x16", argLength: 3, commutative: false}, {name: "Permute2MaskedUint32x16", argLength: 4, commutative: false}, {name: "Permute2MaskedInt32x16", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false}, + {name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false}, {name: "PermuteMaskedUint32x16", argLength: 3, commutative: false}, {name: "PermuteMaskedInt32x16", argLength: 3, commutative: false}, - {name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false}, {name: "PopCountUint32x16", argLength: 1, commutative: false}, {name: "PopCountMaskedUint32x16", argLength: 2, commutative: false}, {name: "RotateLeftUint32x16", argLength: 2, commutative: false}, @@ -1092,6 +1114,7 @@ func simdGenericOps() []opData { {name: "AndMaskedUint32x4", argLength: 3, commutative: true}, {name: "AndNotUint32x4", argLength: 2, commutative: false}, {name: "AndNotMaskedUint32x4", argLength: 3, commutative: false}, + {name: "CompressUint32x4", argLength: 2, commutative: false}, {name: "EqualUint32x4", argLength: 2, commutative: true}, {name: "EqualMaskedUint32x4", argLength: 3, commutative: true}, {name: "GreaterUint32x4", argLength: 2, commutative: false}, @@ -1114,11 +1137,11 @@ func simdGenericOps() []opData { {name: "PairwiseAddUint32x4", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x4", argLength: 2, commutative: false}, {name: "Permute2Uint32x4", argLength: 3, commutative: false}, - {name: "Permute2Float32x4", argLength: 3, commutative: false}, {name: "Permute2Int32x4", argLength: 3, commutative: false}, - {name: "Permute2MaskedUint32x4", argLength: 4, commutative: false}, - {name: "Permute2MaskedInt32x4", argLength: 4, commutative: false}, + {name: "Permute2Float32x4", argLength: 3, commutative: false}, {name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false}, + {name: "Permute2MaskedInt32x4", argLength: 4, commutative: false}, + {name: "Permute2MaskedUint32x4", argLength: 4, commutative: false}, {name: "PopCountUint32x4", argLength: 1, commutative: false}, {name: "PopCountMaskedUint32x4", argLength: 2, commutative: false}, {name: "RotateLeftUint32x4", argLength: 2, commutative: false}, @@ -1151,6 +1174,7 @@ func simdGenericOps() []opData { {name: "AndMaskedUint32x8", argLength: 3, commutative: true}, {name: "AndNotUint32x8", argLength: 2, commutative: false}, {name: "AndNotMaskedUint32x8", argLength: 3, commutative: false}, + {name: "CompressUint32x8", argLength: 2, commutative: false}, {name: "EqualUint32x8", argLength: 2, commutative: true}, {name: "EqualMaskedUint32x8", argLength: 3, commutative: true}, {name: "GreaterUint32x8", argLength: 2, commutative: false}, @@ -1172,18 +1196,18 @@ func simdGenericOps() []opData { {name: "OrMaskedUint32x8", argLength: 3, commutative: true}, {name: "PairwiseAddUint32x8", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x8", argLength: 2, commutative: false}, + {name: "PermuteUint32x8", argLength: 2, commutative: false}, {name: "PermuteInt32x8", argLength: 2, commutative: false}, {name: "PermuteFloat32x8", argLength: 2, commutative: false}, - {name: "PermuteUint32x8", argLength: 2, commutative: false}, {name: "Permute2Uint32x8", argLength: 3, commutative: false}, {name: "Permute2Float32x8", argLength: 3, commutative: false}, {name: "Permute2Int32x8", argLength: 3, commutative: false}, {name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false}, - {name: "Permute2MaskedUint32x8", argLength: 4, commutative: false}, {name: "Permute2MaskedInt32x8", argLength: 4, commutative: false}, + {name: "Permute2MaskedUint32x8", argLength: 4, commutative: false}, {name: "PermuteMaskedInt32x8", argLength: 3, commutative: false}, - {name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false}, {name: "PermuteMaskedUint32x8", argLength: 3, commutative: false}, + {name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false}, {name: "PopCountUint32x8", argLength: 1, commutative: false}, {name: "PopCountMaskedUint32x8", argLength: 2, commutative: false}, {name: "RotateLeftUint32x8", argLength: 2, commutative: false}, @@ -1216,6 +1240,7 @@ func simdGenericOps() []opData { {name: "AndMaskedUint64x2", argLength: 3, commutative: true}, {name: "AndNotUint64x2", argLength: 2, commutative: false}, {name: "AndNotMaskedUint64x2", argLength: 3, commutative: false}, + {name: "CompressUint64x2", argLength: 2, commutative: false}, {name: "EqualUint64x2", argLength: 2, commutative: true}, {name: "EqualMaskedUint64x2", argLength: 3, commutative: true}, {name: "GreaterUint64x2", argLength: 2, commutative: false}, @@ -1236,11 +1261,11 @@ func simdGenericOps() []opData { {name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true}, {name: "OrUint64x2", argLength: 2, commutative: true}, {name: "OrMaskedUint64x2", argLength: 3, commutative: true}, + {name: "Permute2Float64x2", argLength: 3, commutative: false}, {name: "Permute2Uint64x2", argLength: 3, commutative: false}, {name: "Permute2Int64x2", argLength: 3, commutative: false}, - {name: "Permute2Float64x2", argLength: 3, commutative: false}, - {name: "Permute2MaskedUint64x2", argLength: 4, commutative: false}, {name: "Permute2MaskedInt64x2", argLength: 4, commutative: false}, + {name: "Permute2MaskedUint64x2", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false}, {name: "PopCountUint64x2", argLength: 1, commutative: false}, {name: "PopCountMaskedUint64x2", argLength: 2, commutative: false}, @@ -1270,6 +1295,7 @@ func simdGenericOps() []opData { {name: "AndMaskedUint64x4", argLength: 3, commutative: true}, {name: "AndNotUint64x4", argLength: 2, commutative: false}, {name: "AndNotMaskedUint64x4", argLength: 3, commutative: false}, + {name: "CompressUint64x4", argLength: 2, commutative: false}, {name: "EqualUint64x4", argLength: 2, commutative: true}, {name: "EqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "GreaterUint64x4", argLength: 2, commutative: false}, @@ -1290,18 +1316,18 @@ func simdGenericOps() []opData { {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "OrUint64x4", argLength: 2, commutative: true}, {name: "OrMaskedUint64x4", argLength: 3, commutative: true}, + {name: "PermuteFloat64x4", argLength: 2, commutative: false}, {name: "PermuteUint64x4", argLength: 2, commutative: false}, {name: "PermuteInt64x4", argLength: 2, commutative: false}, - {name: "PermuteFloat64x4", argLength: 2, commutative: false}, - {name: "Permute2Uint64x4", argLength: 3, commutative: false}, {name: "Permute2Int64x4", argLength: 3, commutative: false}, + {name: "Permute2Uint64x4", argLength: 3, commutative: false}, {name: "Permute2Float64x4", argLength: 3, commutative: false}, - {name: "Permute2MaskedInt64x4", argLength: 4, commutative: false}, - {name: "Permute2MaskedUint64x4", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false}, + {name: "Permute2MaskedUint64x4", argLength: 4, commutative: false}, + {name: "Permute2MaskedInt64x4", argLength: 4, commutative: false}, {name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false}, - {name: "PermuteMaskedInt64x4", argLength: 3, commutative: false}, {name: "PermuteMaskedUint64x4", argLength: 3, commutative: false}, + {name: "PermuteMaskedInt64x4", argLength: 3, commutative: false}, {name: "PopCountUint64x4", argLength: 1, commutative: false}, {name: "PopCountMaskedUint64x4", argLength: 2, commutative: false}, {name: "RotateLeftUint64x4", argLength: 2, commutative: false}, @@ -1330,6 +1356,7 @@ func simdGenericOps() []opData { {name: "AndMaskedUint64x8", argLength: 3, commutative: true}, {name: "AndNotUint64x8", argLength: 2, commutative: false}, {name: "AndNotMaskedUint64x8", argLength: 3, commutative: false}, + {name: "CompressUint64x8", argLength: 2, commutative: false}, {name: "EqualUint64x8", argLength: 2, commutative: true}, {name: "EqualMaskedUint64x8", argLength: 3, commutative: true}, {name: "GreaterUint64x8", argLength: 2, commutative: false}, @@ -1350,18 +1377,18 @@ func simdGenericOps() []opData { {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true}, {name: "OrUint64x8", argLength: 2, commutative: true}, {name: "OrMaskedUint64x8", argLength: 3, commutative: true}, - {name: "PermuteUint64x8", argLength: 2, commutative: false}, {name: "PermuteInt64x8", argLength: 2, commutative: false}, + {name: "PermuteUint64x8", argLength: 2, commutative: false}, {name: "PermuteFloat64x8", argLength: 2, commutative: false}, - {name: "Permute2Int64x8", argLength: 3, commutative: false}, {name: "Permute2Uint64x8", argLength: 3, commutative: false}, {name: "Permute2Float64x8", argLength: 3, commutative: false}, + {name: "Permute2Int64x8", argLength: 3, commutative: false}, {name: "Permute2MaskedUint64x8", argLength: 4, commutative: false}, - {name: "Permute2MaskedInt64x8", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false}, - {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false}, - {name: "PermuteMaskedInt64x8", argLength: 3, commutative: false}, + {name: "Permute2MaskedInt64x8", argLength: 4, commutative: false}, {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false}, + {name: "PermuteMaskedInt64x8", argLength: 3, commutative: false}, + {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false}, {name: "PopCountUint64x8", argLength: 1, commutative: false}, {name: "PopCountMaskedUint64x8", argLength: 2, commutative: false}, {name: "RotateLeftUint64x8", argLength: 2, commutative: false}, @@ -1390,6 +1417,7 @@ func simdGenericOps() []opData { {name: "AndNotUint8x16", argLength: 2, commutative: false}, {name: "AverageUint8x16", argLength: 2, commutative: true}, {name: "AverageMaskedUint8x16", argLength: 3, commutative: true}, + {name: "CompressUint8x16", argLength: 2, commutative: false}, {name: "EqualUint8x16", argLength: 2, commutative: true}, {name: "EqualMaskedUint8x16", argLength: 3, commutative: true}, {name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false}, @@ -1411,12 +1439,12 @@ func simdGenericOps() []opData { {name: "OrUint8x16", argLength: 2, commutative: true}, {name: "PermuteUint8x16", argLength: 2, commutative: false}, {name: "PermuteInt8x16", argLength: 2, commutative: false}, - {name: "Permute2Uint8x16", argLength: 3, commutative: false}, {name: "Permute2Int8x16", argLength: 3, commutative: false}, + {name: "Permute2Uint8x16", argLength: 3, commutative: false}, {name: "Permute2MaskedInt8x16", argLength: 4, commutative: false}, {name: "Permute2MaskedUint8x16", argLength: 4, commutative: false}, - {name: "PermuteMaskedInt8x16", argLength: 3, commutative: false}, {name: "PermuteMaskedUint8x16", argLength: 3, commutative: false}, + {name: "PermuteMaskedInt8x16", argLength: 3, commutative: false}, {name: "PopCountUint8x16", argLength: 1, commutative: false}, {name: "PopCountMaskedUint8x16", argLength: 2, commutative: false}, {name: "SaturatedAddUint8x16", argLength: 2, commutative: true}, @@ -1434,6 +1462,7 @@ func simdGenericOps() []opData { {name: "AndNotUint8x32", argLength: 2, commutative: false}, {name: "AverageUint8x32", argLength: 2, commutative: true}, {name: "AverageMaskedUint8x32", argLength: 3, commutative: true}, + {name: "CompressUint8x32", argLength: 2, commutative: false}, {name: "EqualUint8x32", argLength: 2, commutative: true}, {name: "EqualMaskedUint8x32", argLength: 3, commutative: true}, {name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false}, @@ -1457,10 +1486,10 @@ func simdGenericOps() []opData { {name: "PermuteInt8x32", argLength: 2, commutative: false}, {name: "Permute2Int8x32", argLength: 3, commutative: false}, {name: "Permute2Uint8x32", argLength: 3, commutative: false}, - {name: "Permute2MaskedUint8x32", argLength: 4, commutative: false}, {name: "Permute2MaskedInt8x32", argLength: 4, commutative: false}, - {name: "PermuteMaskedUint8x32", argLength: 3, commutative: false}, + {name: "Permute2MaskedUint8x32", argLength: 4, commutative: false}, {name: "PermuteMaskedInt8x32", argLength: 3, commutative: false}, + {name: "PermuteMaskedUint8x32", argLength: 3, commutative: false}, {name: "PopCountUint8x32", argLength: 1, commutative: false}, {name: "PopCountMaskedUint8x32", argLength: 2, commutative: false}, {name: "SaturatedAddUint8x32", argLength: 2, commutative: true}, @@ -1476,6 +1505,7 @@ func simdGenericOps() []opData { {name: "AddMaskedUint8x64", argLength: 3, commutative: true}, {name: "AverageUint8x64", argLength: 2, commutative: true}, {name: "AverageMaskedUint8x64", argLength: 3, commutative: true}, + {name: "CompressUint8x64", argLength: 2, commutative: false}, {name: "EqualUint8x64", argLength: 2, commutative: true}, {name: "EqualMaskedUint8x64", argLength: 3, commutative: true}, {name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false}, @@ -1494,14 +1524,14 @@ func simdGenericOps() []opData { {name: "MinMaskedUint8x64", argLength: 3, commutative: true}, {name: "NotEqualUint8x64", argLength: 2, commutative: true}, {name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true}, - {name: "PermuteUint8x64", argLength: 2, commutative: false}, {name: "PermuteInt8x64", argLength: 2, commutative: false}, - {name: "Permute2Int8x64", argLength: 3, commutative: false}, + {name: "PermuteUint8x64", argLength: 2, commutative: false}, {name: "Permute2Uint8x64", argLength: 3, commutative: false}, + {name: "Permute2Int8x64", argLength: 3, commutative: false}, {name: "Permute2MaskedUint8x64", argLength: 4, commutative: false}, {name: "Permute2MaskedInt8x64", argLength: 4, commutative: false}, - {name: "PermuteMaskedInt8x64", argLength: 3, commutative: false}, {name: "PermuteMaskedUint8x64", argLength: 3, commutative: false}, + {name: "PermuteMaskedInt8x64", argLength: 3, commutative: false}, {name: "PopCountUint8x64", argLength: 1, commutative: false}, {name: "PopCountMaskedUint8x64", argLength: 2, commutative: false}, {name: "SaturatedAddUint8x64", argLength: 2, commutative: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 60a12e21fb1..35612493ea3 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1204,6 +1204,7 @@ const ( OpAMD64VRCP14PSMasked512 OpAMD64VRSQRT14PS512 OpAMD64VRSQRT14PSMasked512 + OpAMD64VCOMPRESSPSMasked512 OpAMD64VDIVPS512 OpAMD64VDIVPSMasked512 OpAMD64VFMADD213PS512 @@ -1231,6 +1232,7 @@ const ( OpAMD64VRCP14PSMasked128 OpAMD64VRSQRTPS128 OpAMD64VRSQRT14PSMasked128 + OpAMD64VCOMPRESSPSMasked128 OpAMD64VDIVPS128 OpAMD64VDIVPSMasked128 OpAMD64VFMADD213PS128 @@ -1260,6 +1262,7 @@ const ( OpAMD64VRCP14PSMasked256 OpAMD64VRSQRTPS256 OpAMD64VRSQRT14PSMasked256 + OpAMD64VCOMPRESSPSMasked256 OpAMD64VDIVPS256 OpAMD64VDIVPSMasked256 OpAMD64VFMADD213PS256 @@ -1289,6 +1292,7 @@ const ( OpAMD64VRCP14PDMasked128 OpAMD64VRSQRT14PD128 OpAMD64VRSQRT14PDMasked128 + OpAMD64VCOMPRESSPDMasked128 OpAMD64VDIVPD128 OpAMD64VDIVPDMasked128 OpAMD64VFMADD213PD128 @@ -1318,6 +1322,7 @@ const ( OpAMD64VRCP14PDMasked256 OpAMD64VRSQRT14PD256 OpAMD64VRSQRT14PDMasked256 + OpAMD64VCOMPRESSPDMasked256 OpAMD64VDIVPD256 OpAMD64VDIVPDMasked256 OpAMD64VFMADD213PD256 @@ -1346,6 +1351,7 @@ const ( OpAMD64VRCP14PDMasked512 OpAMD64VRSQRT14PD512 OpAMD64VRSQRT14PDMasked512 + OpAMD64VCOMPRESSPDMasked512 OpAMD64VDIVPD512 OpAMD64VDIVPDMasked512 OpAMD64VFMADD213PD512 @@ -1370,6 +1376,7 @@ const ( OpAMD64VPABSWMasked256 OpAMD64VPADDW256 OpAMD64VPADDWMasked256 + OpAMD64VPCOMPRESSWMasked256 OpAMD64VPCMPEQW256 OpAMD64VPCMPGTW256 OpAMD64VPMAXSW256 @@ -1411,6 +1418,7 @@ const ( OpAMD64VPABSWMasked512 OpAMD64VPADDW512 OpAMD64VPADDWMasked512 + OpAMD64VPCOMPRESSWMasked512 OpAMD64VPMAXSW512 OpAMD64VPMAXSWMasked512 OpAMD64VPMINSW512 @@ -1445,6 +1453,7 @@ const ( OpAMD64VPABSWMasked128 OpAMD64VPADDW128 OpAMD64VPADDWMasked128 + OpAMD64VPCOMPRESSWMasked128 OpAMD64VPCMPEQW128 OpAMD64VPCMPGTW128 OpAMD64VPMAXSW128 @@ -1490,6 +1499,7 @@ const ( OpAMD64VPANDDMasked512 OpAMD64VPANDND512 OpAMD64VPANDNDMasked512 + OpAMD64VPCOMPRESSDMasked512 OpAMD64VPMAXSD512 OpAMD64VPMAXSDMasked512 OpAMD64VPMINSD512 @@ -1534,6 +1544,7 @@ const ( OpAMD64VPADDDMasked128 OpAMD64VPANDDMasked128 OpAMD64VPANDNDMasked128 + OpAMD64VPCOMPRESSDMasked128 OpAMD64VPCMPEQD128 OpAMD64VPCMPGTD128 OpAMD64VPMAXSD128 @@ -1582,6 +1593,7 @@ const ( OpAMD64VPADDDMasked256 OpAMD64VPANDDMasked256 OpAMD64VPANDNDMasked256 + OpAMD64VPCOMPRESSDMasked256 OpAMD64VPCMPEQD256 OpAMD64VPCMPGTD256 OpAMD64VPMAXSD256 @@ -1630,6 +1642,7 @@ const ( OpAMD64VPADDQMasked128 OpAMD64VPANDQMasked128 OpAMD64VPANDNQMasked128 + OpAMD64VPCOMPRESSQMasked128 OpAMD64VPCMPEQQ128 OpAMD64VPCMPGTQ128 OpAMD64VPMAXSQ128 @@ -1667,6 +1680,7 @@ const ( OpAMD64VPADDQMasked256 OpAMD64VPANDQMasked256 OpAMD64VPANDNQMasked256 + OpAMD64VPCOMPRESSQMasked256 OpAMD64VPCMPEQQ256 OpAMD64VPCMPGTQ256 OpAMD64VPMAXSQ256 @@ -1706,6 +1720,7 @@ const ( OpAMD64VPANDQMasked512 OpAMD64VPANDNQ512 OpAMD64VPANDNQMasked512 + OpAMD64VPCOMPRESSQMasked512 OpAMD64VPMAXSQ512 OpAMD64VPMAXSQMasked512 OpAMD64VPMINSQ512 @@ -1744,6 +1759,7 @@ const ( OpAMD64VPADDBMasked128 OpAMD64VPAND128 OpAMD64VPANDN128 + OpAMD64VPCOMPRESSBMasked128 OpAMD64VPCMPEQB128 OpAMD64VPCMPGTB128 OpAMD64VPMAXSB128 @@ -1767,6 +1783,7 @@ const ( OpAMD64VPADDBMasked256 OpAMD64VPAND256 OpAMD64VPANDN256 + OpAMD64VPCOMPRESSBMasked256 OpAMD64VPCMPEQB256 OpAMD64VPCMPGTB256 OpAMD64VPMAXSB256 @@ -1788,6 +1805,7 @@ const ( OpAMD64VPABSBMasked512 OpAMD64VPADDB512 OpAMD64VPADDBMasked512 + OpAMD64VPCOMPRESSBMasked512 OpAMD64VPMAXSB512 OpAMD64VPMAXSBMasked512 OpAMD64VPMINSB512 @@ -1852,12 +1870,12 @@ const ( OpAMD64VPMAXUDMasked512 OpAMD64VPMINUD512 OpAMD64VPMINUDMasked512 - OpAMD64VPERMPS512 OpAMD64VPERMD512 - OpAMD64VPERMI2D512 + OpAMD64VPERMPS512 OpAMD64VPERMI2PS512 - OpAMD64VPERMI2DMasked512 + OpAMD64VPERMI2D512 OpAMD64VPERMI2PSMasked512 + OpAMD64VPERMI2DMasked512 OpAMD64VPERMPSMasked512 OpAMD64VPERMDMasked512 OpAMD64VPSRLD512 @@ -1882,12 +1900,12 @@ const ( OpAMD64VPMINUD256 OpAMD64VPMINUDMasked256 OpAMD64VPMULUDQ256 - OpAMD64VPERMD256 OpAMD64VPERMPS256 + OpAMD64VPERMD256 OpAMD64VPERMI2D256 OpAMD64VPERMI2PS256 - OpAMD64VPERMI2PSMasked256 OpAMD64VPERMI2DMasked256 + OpAMD64VPERMI2PSMasked256 OpAMD64VPERMPSMasked256 OpAMD64VPERMDMasked256 OpAMD64VPSRLD256 @@ -1901,8 +1919,8 @@ const ( OpAMD64VPMULUDQMasked128 OpAMD64VPERMI2PD128 OpAMD64VPERMI2Q128 - OpAMD64VPERMI2QMasked128 OpAMD64VPERMI2PDMasked128 + OpAMD64VPERMI2QMasked128 OpAMD64VPSRLQ128 OpAMD64VPSRLQMasked128 OpAMD64VPSRLVQ128 @@ -1914,12 +1932,12 @@ const ( OpAMD64VPMULUDQMasked256 OpAMD64VPERMQ256 OpAMD64VPERMPD256 - OpAMD64VPERMI2PD256 OpAMD64VPERMI2Q256 + OpAMD64VPERMI2PD256 OpAMD64VPERMI2PDMasked256 OpAMD64VPERMI2QMasked256 - OpAMD64VPERMPDMasked256 OpAMD64VPERMQMasked256 + OpAMD64VPERMPDMasked256 OpAMD64VPSRLQ256 OpAMD64VPSRLQMasked256 OpAMD64VPSRLVQ256 @@ -1936,8 +1954,8 @@ const ( OpAMD64VPERMI2PD512 OpAMD64VPERMI2QMasked512 OpAMD64VPERMI2PDMasked512 - OpAMD64VPERMPDMasked512 OpAMD64VPERMQMasked512 + OpAMD64VPERMPDMasked512 OpAMD64VPSRLQ512 OpAMD64VPSRLQMasked512 OpAMD64VPSRLVQ512 @@ -4391,6 +4409,7 @@ const ( OpApproximateReciprocalMaskedFloat32x16 OpApproximateReciprocalOfSqrtFloat32x16 OpApproximateReciprocalOfSqrtMaskedFloat32x16 + OpCompressFloat32x16 OpDivFloat32x16 OpDivMaskedFloat32x16 OpEqualFloat32x16 @@ -4433,6 +4452,7 @@ const ( OpApproximateReciprocalOfSqrtFloat32x4 OpApproximateReciprocalOfSqrtMaskedFloat32x4 OpCeilFloat32x4 + OpCompressFloat32x4 OpDivFloat32x4 OpDivMaskedFloat32x4 OpDotProdBroadcastFloat32x4 @@ -4481,6 +4501,7 @@ const ( OpApproximateReciprocalOfSqrtFloat32x8 OpApproximateReciprocalOfSqrtMaskedFloat32x8 OpCeilFloat32x8 + OpCompressFloat32x8 OpDivFloat32x8 OpDivMaskedFloat32x8 OpDotProdBroadcastFloat32x8 @@ -4529,6 +4550,7 @@ const ( OpApproximateReciprocalOfSqrtFloat64x2 OpApproximateReciprocalOfSqrtMaskedFloat64x2 OpCeilFloat64x2 + OpCompressFloat64x2 OpDivFloat64x2 OpDivMaskedFloat64x2 OpDotProdBroadcastFloat64x2 @@ -4577,6 +4599,7 @@ const ( OpApproximateReciprocalOfSqrtFloat64x4 OpApproximateReciprocalOfSqrtMaskedFloat64x4 OpCeilFloat64x4 + OpCompressFloat64x4 OpDivFloat64x4 OpDivMaskedFloat64x4 OpEqualFloat64x4 @@ -4622,6 +4645,7 @@ const ( OpApproximateReciprocalMaskedFloat64x8 OpApproximateReciprocalOfSqrtFloat64x8 OpApproximateReciprocalOfSqrtMaskedFloat64x8 + OpCompressFloat64x8 OpDivFloat64x8 OpDivMaskedFloat64x8 OpEqualFloat64x8 @@ -4662,6 +4686,7 @@ const ( OpAddMaskedInt16x16 OpAndInt16x16 OpAndNotInt16x16 + OpCompressInt16x16 OpEqualInt16x16 OpEqualMaskedInt16x16 OpGreaterInt16x16 @@ -4715,6 +4740,7 @@ const ( OpAbsoluteMaskedInt16x32 OpAddInt16x32 OpAddMaskedInt16x32 + OpCompressInt16x32 OpEqualInt16x32 OpEqualMaskedInt16x32 OpGreaterInt16x32 @@ -4763,6 +4789,7 @@ const ( OpAddMaskedInt16x8 OpAndInt16x8 OpAndNotInt16x8 + OpCompressInt16x8 OpEqualInt16x8 OpEqualMaskedInt16x8 OpGreaterInt16x8 @@ -4820,6 +4847,7 @@ const ( OpAndMaskedInt32x16 OpAndNotInt32x16 OpAndNotMaskedInt32x16 + OpCompressInt32x16 OpEqualInt32x16 OpEqualMaskedInt32x16 OpGreaterInt32x16 @@ -4878,6 +4906,7 @@ const ( OpAndMaskedInt32x4 OpAndNotInt32x4 OpAndNotMaskedInt32x4 + OpCompressInt32x4 OpEqualInt32x4 OpEqualMaskedInt32x4 OpGreaterInt32x4 @@ -4940,6 +4969,7 @@ const ( OpAndMaskedInt32x8 OpAndNotInt32x8 OpAndNotMaskedInt32x8 + OpCompressInt32x8 OpEqualInt32x8 OpEqualMaskedInt32x8 OpGreaterInt32x8 @@ -5002,6 +5032,7 @@ const ( OpAndMaskedInt64x2 OpAndNotInt64x2 OpAndNotMaskedInt64x2 + OpCompressInt64x2 OpEqualInt64x2 OpEqualMaskedInt64x2 OpGreaterInt64x2 @@ -5054,6 +5085,7 @@ const ( OpAndMaskedInt64x4 OpAndNotInt64x4 OpAndNotMaskedInt64x4 + OpCompressInt64x4 OpEqualInt64x4 OpEqualMaskedInt64x4 OpGreaterInt64x4 @@ -5106,6 +5138,7 @@ const ( OpAndMaskedInt64x8 OpAndNotInt64x8 OpAndNotMaskedInt64x8 + OpCompressInt64x8 OpEqualInt64x8 OpEqualMaskedInt64x8 OpGreaterInt64x8 @@ -5156,6 +5189,7 @@ const ( OpAddMaskedInt8x16 OpAndInt8x16 OpAndNotInt8x16 + OpCompressInt8x16 OpEqualInt8x16 OpEqualMaskedInt8x16 OpGreaterInt8x16 @@ -5189,6 +5223,7 @@ const ( OpAddMaskedInt8x32 OpAndInt8x32 OpAndNotInt8x32 + OpCompressInt8x32 OpEqualInt8x32 OpEqualMaskedInt8x32 OpGreaterInt8x32 @@ -5220,6 +5255,7 @@ const ( OpAbsoluteMaskedInt8x64 OpAddInt8x64 OpAddMaskedInt8x64 + OpCompressInt8x64 OpEqualInt8x64 OpEqualMaskedInt8x64 OpGreaterInt8x64 @@ -5250,6 +5286,7 @@ const ( OpAndNotUint16x16 OpAverageUint16x16 OpAverageMaskedUint16x16 + OpCompressUint16x16 OpEqualUint16x16 OpEqualMaskedUint16x16 OpGreaterUint16x16 @@ -5275,10 +5312,10 @@ const ( OpPermuteUint16x16 OpPermute2Uint16x16 OpPermute2Int16x16 - OpPermute2MaskedUint16x16 OpPermute2MaskedInt16x16 - OpPermuteMaskedUint16x16 + OpPermute2MaskedUint16x16 OpPermuteMaskedInt16x16 + OpPermuteMaskedUint16x16 OpPopCountUint16x16 OpPopCountMaskedUint16x16 OpSaturatedAddUint16x16 @@ -5304,6 +5341,7 @@ const ( OpAddMaskedUint16x32 OpAverageUint16x32 OpAverageMaskedUint16x32 + OpCompressUint16x32 OpEqualUint16x32 OpEqualMaskedUint16x32 OpGreaterUint16x32 @@ -5322,12 +5360,12 @@ const ( OpMulHighMaskedUint16x32 OpNotEqualUint16x32 OpNotEqualMaskedUint16x32 - OpPermuteUint16x32 OpPermuteInt16x32 + OpPermuteUint16x32 OpPermute2Int16x32 OpPermute2Uint16x32 - OpPermute2MaskedUint16x32 OpPermute2MaskedInt16x32 + OpPermute2MaskedUint16x32 OpPermuteMaskedUint16x32 OpPermuteMaskedInt16x32 OpPopCountUint16x32 @@ -5356,6 +5394,7 @@ const ( OpAndNotUint16x8 OpAverageUint16x8 OpAverageMaskedUint16x8 + OpCompressUint16x8 OpEqualUint16x8 OpEqualMaskedUint16x8 OpGreaterUint16x8 @@ -5412,6 +5451,7 @@ const ( OpAndMaskedUint32x16 OpAndNotUint32x16 OpAndNotMaskedUint32x16 + OpCompressUint32x16 OpEqualUint32x16 OpEqualMaskedUint32x16 OpGreaterUint32x16 @@ -5431,17 +5471,17 @@ const ( OpOrUint32x16 OpOrMaskedUint32x16 OpPermuteInt32x16 - OpPermuteUint32x16 OpPermuteFloat32x16 - OpPermute2Int32x16 + OpPermuteUint32x16 OpPermute2Uint32x16 OpPermute2Float32x16 + OpPermute2Int32x16 OpPermute2MaskedUint32x16 OpPermute2MaskedInt32x16 OpPermute2MaskedFloat32x16 + OpPermuteMaskedFloat32x16 OpPermuteMaskedUint32x16 OpPermuteMaskedInt32x16 - OpPermuteMaskedFloat32x16 OpPopCountUint32x16 OpPopCountMaskedUint32x16 OpRotateLeftUint32x16 @@ -5474,6 +5514,7 @@ const ( OpAndMaskedUint32x4 OpAndNotUint32x4 OpAndNotMaskedUint32x4 + OpCompressUint32x4 OpEqualUint32x4 OpEqualMaskedUint32x4 OpGreaterUint32x4 @@ -5496,11 +5537,11 @@ const ( OpPairwiseAddUint32x4 OpPairwiseSubUint32x4 OpPermute2Uint32x4 - OpPermute2Float32x4 OpPermute2Int32x4 - OpPermute2MaskedUint32x4 - OpPermute2MaskedInt32x4 + OpPermute2Float32x4 OpPermute2MaskedFloat32x4 + OpPermute2MaskedInt32x4 + OpPermute2MaskedUint32x4 OpPopCountUint32x4 OpPopCountMaskedUint32x4 OpRotateLeftUint32x4 @@ -5533,6 +5574,7 @@ const ( OpAndMaskedUint32x8 OpAndNotUint32x8 OpAndNotMaskedUint32x8 + OpCompressUint32x8 OpEqualUint32x8 OpEqualMaskedUint32x8 OpGreaterUint32x8 @@ -5554,18 +5596,18 @@ const ( OpOrMaskedUint32x8 OpPairwiseAddUint32x8 OpPairwiseSubUint32x8 + OpPermuteUint32x8 OpPermuteInt32x8 OpPermuteFloat32x8 - OpPermuteUint32x8 OpPermute2Uint32x8 OpPermute2Float32x8 OpPermute2Int32x8 OpPermute2MaskedFloat32x8 - OpPermute2MaskedUint32x8 OpPermute2MaskedInt32x8 + OpPermute2MaskedUint32x8 OpPermuteMaskedInt32x8 - OpPermuteMaskedFloat32x8 OpPermuteMaskedUint32x8 + OpPermuteMaskedFloat32x8 OpPopCountUint32x8 OpPopCountMaskedUint32x8 OpRotateLeftUint32x8 @@ -5598,6 +5640,7 @@ const ( OpAndMaskedUint64x2 OpAndNotUint64x2 OpAndNotMaskedUint64x2 + OpCompressUint64x2 OpEqualUint64x2 OpEqualMaskedUint64x2 OpGreaterUint64x2 @@ -5618,11 +5661,11 @@ const ( OpNotEqualMaskedUint64x2 OpOrUint64x2 OpOrMaskedUint64x2 + OpPermute2Float64x2 OpPermute2Uint64x2 OpPermute2Int64x2 - OpPermute2Float64x2 - OpPermute2MaskedUint64x2 OpPermute2MaskedInt64x2 + OpPermute2MaskedUint64x2 OpPermute2MaskedFloat64x2 OpPopCountUint64x2 OpPopCountMaskedUint64x2 @@ -5652,6 +5695,7 @@ const ( OpAndMaskedUint64x4 OpAndNotUint64x4 OpAndNotMaskedUint64x4 + OpCompressUint64x4 OpEqualUint64x4 OpEqualMaskedUint64x4 OpGreaterUint64x4 @@ -5672,18 +5716,18 @@ const ( OpNotEqualMaskedUint64x4 OpOrUint64x4 OpOrMaskedUint64x4 + OpPermuteFloat64x4 OpPermuteUint64x4 OpPermuteInt64x4 - OpPermuteFloat64x4 - OpPermute2Uint64x4 OpPermute2Int64x4 + OpPermute2Uint64x4 OpPermute2Float64x4 - OpPermute2MaskedInt64x4 - OpPermute2MaskedUint64x4 OpPermute2MaskedFloat64x4 + OpPermute2MaskedUint64x4 + OpPermute2MaskedInt64x4 OpPermuteMaskedFloat64x4 - OpPermuteMaskedInt64x4 OpPermuteMaskedUint64x4 + OpPermuteMaskedInt64x4 OpPopCountUint64x4 OpPopCountMaskedUint64x4 OpRotateLeftUint64x4 @@ -5712,6 +5756,7 @@ const ( OpAndMaskedUint64x8 OpAndNotUint64x8 OpAndNotMaskedUint64x8 + OpCompressUint64x8 OpEqualUint64x8 OpEqualMaskedUint64x8 OpGreaterUint64x8 @@ -5732,18 +5777,18 @@ const ( OpNotEqualMaskedUint64x8 OpOrUint64x8 OpOrMaskedUint64x8 - OpPermuteUint64x8 OpPermuteInt64x8 + OpPermuteUint64x8 OpPermuteFloat64x8 - OpPermute2Int64x8 OpPermute2Uint64x8 OpPermute2Float64x8 + OpPermute2Int64x8 OpPermute2MaskedUint64x8 - OpPermute2MaskedInt64x8 OpPermute2MaskedFloat64x8 - OpPermuteMaskedFloat64x8 - OpPermuteMaskedInt64x8 + OpPermute2MaskedInt64x8 OpPermuteMaskedUint64x8 + OpPermuteMaskedInt64x8 + OpPermuteMaskedFloat64x8 OpPopCountUint64x8 OpPopCountMaskedUint64x8 OpRotateLeftUint64x8 @@ -5772,6 +5817,7 @@ const ( OpAndNotUint8x16 OpAverageUint8x16 OpAverageMaskedUint8x16 + OpCompressUint8x16 OpEqualUint8x16 OpEqualMaskedUint8x16 OpGaloisFieldMulUint8x16 @@ -5793,12 +5839,12 @@ const ( OpOrUint8x16 OpPermuteUint8x16 OpPermuteInt8x16 - OpPermute2Uint8x16 OpPermute2Int8x16 + OpPermute2Uint8x16 OpPermute2MaskedInt8x16 OpPermute2MaskedUint8x16 - OpPermuteMaskedInt8x16 OpPermuteMaskedUint8x16 + OpPermuteMaskedInt8x16 OpPopCountUint8x16 OpPopCountMaskedUint8x16 OpSaturatedAddUint8x16 @@ -5816,6 +5862,7 @@ const ( OpAndNotUint8x32 OpAverageUint8x32 OpAverageMaskedUint8x32 + OpCompressUint8x32 OpEqualUint8x32 OpEqualMaskedUint8x32 OpGaloisFieldMulUint8x32 @@ -5839,10 +5886,10 @@ const ( OpPermuteInt8x32 OpPermute2Int8x32 OpPermute2Uint8x32 - OpPermute2MaskedUint8x32 OpPermute2MaskedInt8x32 - OpPermuteMaskedUint8x32 + OpPermute2MaskedUint8x32 OpPermuteMaskedInt8x32 + OpPermuteMaskedUint8x32 OpPopCountUint8x32 OpPopCountMaskedUint8x32 OpSaturatedAddUint8x32 @@ -5858,6 +5905,7 @@ const ( OpAddMaskedUint8x64 OpAverageUint8x64 OpAverageMaskedUint8x64 + OpCompressUint8x64 OpEqualUint8x64 OpEqualMaskedUint8x64 OpGaloisFieldMulUint8x64 @@ -5876,14 +5924,14 @@ const ( OpMinMaskedUint8x64 OpNotEqualUint8x64 OpNotEqualMaskedUint8x64 - OpPermuteUint8x64 OpPermuteInt8x64 - OpPermute2Int8x64 + OpPermuteUint8x64 OpPermute2Uint8x64 + OpPermute2Int8x64 OpPermute2MaskedUint8x64 OpPermute2MaskedInt8x64 - OpPermuteMaskedInt8x64 OpPermuteMaskedUint8x64 + OpPermuteMaskedInt8x64 OpPopCountUint8x64 OpPopCountMaskedUint8x64 OpSaturatedAddUint8x64 @@ -18850,6 +18898,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VCOMPRESSPSMasked512", + argLen: 2, + asm: x86.AVCOMPRESSPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VDIVPS512", argLen: 2, @@ -19255,6 +19317,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VCOMPRESSPSMasked128", + argLen: 2, + asm: x86.AVCOMPRESSPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VDIVPS128", argLen: 2, @@ -19688,6 +19764,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VCOMPRESSPSMasked256", + argLen: 2, + asm: x86.AVCOMPRESSPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VDIVPS256", argLen: 2, @@ -20121,6 +20211,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VCOMPRESSPDMasked128", + argLen: 2, + asm: x86.AVCOMPRESSPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VDIVPD128", argLen: 2, @@ -20554,6 +20658,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VCOMPRESSPDMasked256", + argLen: 2, + asm: x86.AVCOMPRESSPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VDIVPD256", argLen: 2, @@ -20973,6 +21091,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VCOMPRESSPDMasked512", + argLen: 2, + asm: x86.AVCOMPRESSPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VDIVPD512", argLen: 2, @@ -21337,6 +21469,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSWMasked256", + argLen: 2, + asm: x86.AVPCOMPRESSW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQW256", argLen: 2, @@ -21945,6 +22091,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSWMasked512", + argLen: 2, + asm: x86.AVPCOMPRESSW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXSW512", argLen: 2, @@ -22454,6 +22614,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSWMasked128", + argLen: 2, + asm: x86.AVPCOMPRESSW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQW128", argLen: 2, @@ -23122,6 +23296,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSDMasked512", + argLen: 2, + asm: x86.AVPCOMPRESSD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXSD512", argLen: 2, @@ -23794,6 +23982,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSDMasked128", + argLen: 2, + asm: x86.AVPCOMPRESSD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQD128", argLen: 2, @@ -24522,6 +24724,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSDMasked256", + argLen: 2, + asm: x86.AVPCOMPRESSD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQD256", argLen: 2, @@ -25250,6 +25466,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSQMasked128", + argLen: 2, + asm: x86.AVPCOMPRESSQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQQ128", argLen: 2, @@ -25805,6 +26035,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSQMasked256", + argLen: 2, + asm: x86.AVPCOMPRESSQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQQ256", argLen: 2, @@ -26389,6 +26633,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSQMasked512", + argLen: 2, + asm: x86.AVPCOMPRESSQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXSQ512", argLen: 2, @@ -26958,6 +27216,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSBMasked128", + argLen: 2, + asm: x86.AVPCOMPRESSB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQB128", argLen: 2, @@ -27296,6 +27568,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSBMasked256", + argLen: 2, + asm: x86.AVPCOMPRESSB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQB256", argLen: 2, @@ -27605,6 +27891,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCOMPRESSBMasked512", + argLen: 2, + asm: x86.AVPCOMPRESSB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXSB512", argLen: 2, @@ -28577,20 +28877,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPERMPS512", - argLen: 2, - asm: x86.AVPERMPS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPERMD512", argLen: 2, @@ -28606,18 +28892,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2D512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPERMI2D, + name: "VPERMPS512", + argLen: 2, + asm: x86.AVPERMPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -28638,10 +28922,26 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2DMasked512", - argLen: 4, + name: "VPERMI2D512", + argLen: 3, resultInArg0: true, asm: x86.AVPERMI2D, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPERMI2PSMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPERMI2PS, reg: regInfo{ inputs: []inputInfo{ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -28655,10 +28955,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2PSMasked512", + name: "VPERMI2DMasked512", argLen: 4, resultInArg0: true, - asm: x86.AVPERMI2PS, + asm: x86.AVPERMI2D, reg: regInfo{ inputs: []inputInfo{ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29038,9 +29338,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMD256", + name: "VPERMPS256", argLen: 2, - asm: x86.AVPERMD, + asm: x86.AVPERMPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29052,9 +29352,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMPS256", + name: "VPERMD256", argLen: 2, - asm: x86.AVPERMPS, + asm: x86.AVPERMD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29098,10 +29398,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2PSMasked256", + name: "VPERMI2DMasked256", argLen: 4, resultInArg0: true, - asm: x86.AVPERMI2PS, + asm: x86.AVPERMI2D, reg: regInfo{ inputs: []inputInfo{ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29115,10 +29415,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2DMasked256", + name: "VPERMI2PSMasked256", argLen: 4, resultInArg0: true, - asm: x86.AVPERMI2D, + asm: x86.AVPERMI2PS, reg: regInfo{ inputs: []inputInfo{ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29330,10 +29630,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2QMasked128", + name: "VPERMI2PDMasked128", argLen: 4, resultInArg0: true, - asm: x86.AVPERMI2Q, + asm: x86.AVPERMI2PD, reg: regInfo{ inputs: []inputInfo{ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29347,10 +29647,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2PDMasked128", + name: "VPERMI2QMasked128", argLen: 4, resultInArg0: true, - asm: x86.AVPERMI2PD, + asm: x86.AVPERMI2Q, reg: regInfo{ inputs: []inputInfo{ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29528,10 +29828,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2PD256", + name: "VPERMI2Q256", argLen: 3, resultInArg0: true, - asm: x86.AVPERMI2PD, + asm: x86.AVPERMI2Q, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29544,10 +29844,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMI2Q256", + name: "VPERMI2PD256", argLen: 3, resultInArg0: true, - asm: x86.AVPERMI2Q, + asm: x86.AVPERMI2PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29594,9 +29894,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMPDMasked256", + name: "VPERMQMasked256", argLen: 3, - asm: x86.AVPERMPD, + asm: x86.AVPERMQ, reg: regInfo{ inputs: []inputInfo{ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29609,9 +29909,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMQMasked256", + name: "VPERMPDMasked256", argLen: 3, - asm: x86.AVPERMQ, + asm: x86.AVPERMPD, reg: regInfo{ inputs: []inputInfo{ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29869,9 +30169,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMPDMasked512", + name: "VPERMQMasked512", argLen: 3, - asm: x86.AVPERMPD, + asm: x86.AVPERMQ, reg: regInfo{ inputs: []inputInfo{ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -29884,9 +30184,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPERMQMasked512", + name: "VPERMPDMasked512", argLen: 3, - asm: x86.AVPERMQ, + asm: x86.AVPERMPD, reg: regInfo{ inputs: []inputInfo{ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -60471,6 +60771,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CompressFloat32x16", + argLen: 2, + generic: true, + }, { name: "DivFloat32x16", argLen: 2, @@ -60695,6 +61000,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CompressFloat32x4", + argLen: 2, + generic: true, + }, { name: "DivFloat32x4", argLen: 2, @@ -60950,6 +61260,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CompressFloat32x8", + argLen: 2, + generic: true, + }, { name: "DivFloat32x8", argLen: 2, @@ -61205,6 +61520,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CompressFloat64x2", + argLen: 2, + generic: true, + }, { name: "DivFloat64x2", argLen: 2, @@ -61460,6 +61780,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CompressFloat64x4", + argLen: 2, + generic: true, + }, { name: "DivFloat64x4", argLen: 2, @@ -61699,6 +62024,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CompressFloat64x8", + argLen: 2, + generic: true, + }, { name: "DivFloat64x8", argLen: 2, @@ -61914,6 +62244,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CompressInt16x16", + argLen: 2, + generic: true, + }, { name: "EqualInt16x16", argLen: 2, @@ -62197,6 +62532,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressInt16x32", + argLen: 2, + generic: true, + }, { name: "EqualInt16x32", argLen: 2, @@ -62454,6 +62794,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CompressInt16x8", + argLen: 2, + generic: true, + }, { name: "EqualInt16x8", argLen: 2, @@ -62759,6 +63104,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressInt32x16", + argLen: 2, + generic: true, + }, { name: "EqualInt32x16", argLen: 2, @@ -63067,6 +63417,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressInt32x4", + argLen: 2, + generic: true, + }, { name: "EqualInt32x4", argLen: 2, @@ -63396,6 +63751,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressInt32x8", + argLen: 2, + generic: true, + }, { name: "EqualInt32x8", argLen: 2, @@ -63725,6 +64085,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressInt64x2", + argLen: 2, + generic: true, + }, { name: "EqualInt64x2", argLen: 2, @@ -64005,6 +64370,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressInt64x4", + argLen: 2, + generic: true, + }, { name: "EqualInt64x4", argLen: 2, @@ -64285,6 +64655,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressInt64x8", + argLen: 2, + generic: true, + }, { name: "EqualInt64x8", argLen: 2, @@ -64554,6 +64929,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CompressInt8x16", + argLen: 2, + generic: true, + }, { name: "EqualInt8x16", argLen: 2, @@ -64734,6 +65114,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CompressInt8x32", + argLen: 2, + generic: true, + }, { name: "EqualInt8x32", argLen: 2, @@ -64903,6 +65288,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressInt8x64", + argLen: 2, + generic: true, + }, { name: "EqualInt8x64", argLen: 2, @@ -65068,6 +65458,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressUint16x16", + argLen: 2, + generic: true, + }, { name: "EqualUint16x16", argLen: 2, @@ -65204,19 +65599,14 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "Permute2MaskedUint16x16", - argLen: 4, - generic: true, - }, { name: "Permute2MaskedInt16x16", argLen: 4, generic: true, }, { - name: "PermuteMaskedUint16x16", - argLen: 3, + name: "Permute2MaskedUint16x16", + argLen: 4, generic: true, }, { @@ -65224,6 +65614,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "PermuteMaskedUint16x16", + argLen: 3, + generic: true, + }, { name: "PopCountUint16x16", argLen: 1, @@ -65356,6 +65751,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressUint16x32", + argLen: 2, + generic: true, + }, { name: "EqualUint16x32", argLen: 2, @@ -65457,12 +65857,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteUint16x32", + name: "PermuteInt16x32", argLen: 2, generic: true, }, { - name: "PermuteInt16x32", + name: "PermuteUint16x32", argLen: 2, generic: true, }, @@ -65477,12 +65877,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2MaskedUint16x32", + name: "Permute2MaskedInt16x32", argLen: 4, generic: true, }, { - name: "Permute2MaskedInt16x32", + name: "Permute2MaskedUint16x32", argLen: 4, generic: true, }, @@ -65633,6 +66033,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressUint16x8", + argLen: 2, + generic: true, + }, { name: "EqualUint16x8", argLen: 2, @@ -65931,6 +66336,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressUint32x16", + argLen: 2, + generic: true, + }, { name: "EqualUint32x16", argLen: 2, @@ -66036,19 +66446,14 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "PermuteUint32x16", - argLen: 2, - generic: true, - }, { name: "PermuteFloat32x16", argLen: 2, generic: true, }, { - name: "Permute2Int32x16", - argLen: 3, + name: "PermuteUint32x16", + argLen: 2, generic: true, }, { @@ -66061,6 +66466,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "Permute2Int32x16", + argLen: 3, + generic: true, + }, { name: "Permute2MaskedUint32x16", argLen: 4, @@ -66076,6 +66486,11 @@ var opcodeTable = [...]opInfo{ argLen: 4, generic: true, }, + { + name: "PermuteMaskedFloat32x16", + argLen: 3, + generic: true, + }, { name: "PermuteMaskedUint32x16", argLen: 3, @@ -66086,11 +66501,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "PermuteMaskedFloat32x16", - argLen: 3, - generic: true, - }, { name: "PopCountUint32x16", argLen: 1, @@ -66257,6 +66667,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressUint32x4", + argLen: 2, + generic: true, + }, { name: "EqualUint32x4", argLen: 2, @@ -66378,18 +66793,18 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "Permute2Float32x4", - argLen: 3, - generic: true, - }, { name: "Permute2Int32x4", argLen: 3, generic: true, }, { - name: "Permute2MaskedUint32x4", + name: "Permute2Float32x4", + argLen: 3, + generic: true, + }, + { + name: "Permute2MaskedFloat32x4", argLen: 4, generic: true, }, @@ -66399,7 +66814,7 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2MaskedFloat32x4", + name: "Permute2MaskedUint32x4", argLen: 4, generic: true, }, @@ -66569,6 +66984,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressUint32x8", + argLen: 2, + generic: true, + }, { name: "EqualUint32x8", argLen: 2, @@ -66685,6 +67105,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "PermuteUint32x8", + argLen: 2, + generic: true, + }, { name: "PermuteInt32x8", argLen: 2, @@ -66695,11 +67120,6 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "PermuteUint32x8", - argLen: 2, - generic: true, - }, { name: "Permute2Uint32x8", argLen: 3, @@ -66721,12 +67141,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2MaskedUint32x8", + name: "Permute2MaskedInt32x8", argLen: 4, generic: true, }, { - name: "Permute2MaskedInt32x8", + name: "Permute2MaskedUint32x8", argLen: 4, generic: true, }, @@ -66736,12 +67156,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteMaskedFloat32x8", + name: "PermuteMaskedUint32x8", argLen: 3, generic: true, }, { - name: "PermuteMaskedUint32x8", + name: "PermuteMaskedFloat32x8", argLen: 3, generic: true, }, @@ -66911,6 +67331,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressUint64x2", + argLen: 2, + generic: true, + }, { name: "EqualUint64x2", argLen: 2, @@ -67023,6 +67448,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "Permute2Float64x2", + argLen: 3, + generic: true, + }, { name: "Permute2Uint64x2", argLen: 3, @@ -67034,17 +67464,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2Float64x2", - argLen: 3, - generic: true, - }, - { - name: "Permute2MaskedUint64x2", + name: "Permute2MaskedInt64x2", argLen: 4, generic: true, }, { - name: "Permute2MaskedInt64x2", + name: "Permute2MaskedUint64x2", argLen: 4, generic: true, }, @@ -67199,6 +67624,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressUint64x4", + argLen: 2, + generic: true, + }, { name: "EqualUint64x4", argLen: 2, @@ -67311,6 +67741,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "PermuteFloat64x4", + argLen: 2, + generic: true, + }, { name: "PermuteUint64x4", argLen: 2, @@ -67322,17 +67757,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteFloat64x4", - argLen: 2, - generic: true, - }, - { - name: "Permute2Uint64x4", + name: "Permute2Int64x4", argLen: 3, generic: true, }, { - name: "Permute2Int64x4", + name: "Permute2Uint64x4", argLen: 3, generic: true, }, @@ -67342,7 +67772,7 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2MaskedInt64x4", + name: "Permute2MaskedFloat64x4", argLen: 4, generic: true, }, @@ -67352,7 +67782,7 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2MaskedFloat64x4", + name: "Permute2MaskedInt64x4", argLen: 4, generic: true, }, @@ -67362,12 +67792,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteMaskedInt64x4", + name: "PermuteMaskedUint64x4", argLen: 3, generic: true, }, { - name: "PermuteMaskedUint64x4", + name: "PermuteMaskedInt64x4", argLen: 3, generic: true, }, @@ -67517,6 +67947,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "CompressUint64x8", + argLen: 2, + generic: true, + }, { name: "EqualUint64x8", argLen: 2, @@ -67630,12 +68065,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteUint64x8", + name: "PermuteInt64x8", argLen: 2, generic: true, }, { - name: "PermuteInt64x8", + name: "PermuteUint64x8", argLen: 2, generic: true, }, @@ -67644,11 +68079,6 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "Permute2Int64x8", - argLen: 3, - generic: true, - }, { name: "Permute2Uint64x8", argLen: 3, @@ -67660,12 +68090,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2MaskedUint64x8", - argLen: 4, + name: "Permute2Int64x8", + argLen: 3, generic: true, }, { - name: "Permute2MaskedInt64x8", + name: "Permute2MaskedUint64x8", argLen: 4, generic: true, }, @@ -67675,7 +68105,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteMaskedFloat64x8", + name: "Permute2MaskedInt64x8", + argLen: 4, + generic: true, + }, + { + name: "PermuteMaskedUint64x8", argLen: 3, generic: true, }, @@ -67685,7 +68120,7 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteMaskedUint64x8", + name: "PermuteMaskedFloat64x8", argLen: 3, generic: true, }, @@ -67836,6 +68271,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressUint8x16", + argLen: 2, + generic: true, + }, { name: "EqualUint8x16", argLen: 2, @@ -67951,12 +68391,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Permute2Uint8x16", + name: "Permute2Int8x16", argLen: 3, generic: true, }, { - name: "Permute2Int8x16", + name: "Permute2Uint8x16", argLen: 3, generic: true, }, @@ -67971,12 +68411,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteMaskedInt8x16", + name: "PermuteMaskedUint8x16", argLen: 3, generic: true, }, { - name: "PermuteMaskedUint8x16", + name: "PermuteMaskedInt8x16", argLen: 3, generic: true, }, @@ -68073,6 +68513,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressUint8x32", + argLen: 2, + generic: true, + }, { name: "EqualUint8x32", argLen: 2, @@ -68197,19 +68642,14 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "Permute2MaskedUint8x32", - argLen: 4, - generic: true, - }, { name: "Permute2MaskedInt8x32", argLen: 4, generic: true, }, { - name: "PermuteMaskedUint8x32", - argLen: 3, + name: "Permute2MaskedUint8x32", + argLen: 4, generic: true, }, { @@ -68217,6 +68657,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "PermuteMaskedUint8x32", + argLen: 3, + generic: true, + }, { name: "PopCountUint8x32", argLen: 1, @@ -68299,6 +68744,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "CompressUint8x64", + argLen: 2, + generic: true, + }, { name: "EqualUint8x64", argLen: 2, @@ -68397,19 +68847,14 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, - { - name: "PermuteUint8x64", - argLen: 2, - generic: true, - }, { name: "PermuteInt8x64", argLen: 2, generic: true, }, { - name: "Permute2Int8x64", - argLen: 3, + name: "PermuteUint8x64", + argLen: 2, generic: true, }, { @@ -68417,6 +68862,11 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "Permute2Int8x64", + argLen: 3, + generic: true, + }, { name: "Permute2MaskedUint8x64", argLen: 4, @@ -68428,12 +68878,12 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PermuteMaskedInt8x64", + name: "PermuteMaskedUint8x64", argLen: 3, generic: true, }, { - name: "PermuteMaskedUint8x64", + name: "PermuteMaskedInt8x64", argLen: 3, generic: true, }, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 1aa36bee042..53dffe10e4e 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1185,6 +1185,66 @@ func rewriteValueAMD64(v *Value) bool { case OpCom8: v.Op = OpAMD64NOTL return true + case OpCompressFloat32x16: + return rewriteValueAMD64_OpCompressFloat32x16(v) + case OpCompressFloat32x4: + return rewriteValueAMD64_OpCompressFloat32x4(v) + case OpCompressFloat32x8: + return rewriteValueAMD64_OpCompressFloat32x8(v) + case OpCompressFloat64x2: + return rewriteValueAMD64_OpCompressFloat64x2(v) + case OpCompressFloat64x4: + return rewriteValueAMD64_OpCompressFloat64x4(v) + case OpCompressFloat64x8: + return rewriteValueAMD64_OpCompressFloat64x8(v) + case OpCompressInt16x16: + return rewriteValueAMD64_OpCompressInt16x16(v) + case OpCompressInt16x32: + return rewriteValueAMD64_OpCompressInt16x32(v) + case OpCompressInt16x8: + return rewriteValueAMD64_OpCompressInt16x8(v) + case OpCompressInt32x16: + return rewriteValueAMD64_OpCompressInt32x16(v) + case OpCompressInt32x4: + return rewriteValueAMD64_OpCompressInt32x4(v) + case OpCompressInt32x8: + return rewriteValueAMD64_OpCompressInt32x8(v) + case OpCompressInt64x2: + return rewriteValueAMD64_OpCompressInt64x2(v) + case OpCompressInt64x4: + return rewriteValueAMD64_OpCompressInt64x4(v) + case OpCompressInt64x8: + return rewriteValueAMD64_OpCompressInt64x8(v) + case OpCompressInt8x16: + return rewriteValueAMD64_OpCompressInt8x16(v) + case OpCompressInt8x32: + return rewriteValueAMD64_OpCompressInt8x32(v) + case OpCompressInt8x64: + return rewriteValueAMD64_OpCompressInt8x64(v) + case OpCompressUint16x16: + return rewriteValueAMD64_OpCompressUint16x16(v) + case OpCompressUint16x32: + return rewriteValueAMD64_OpCompressUint16x32(v) + case OpCompressUint16x8: + return rewriteValueAMD64_OpCompressUint16x8(v) + case OpCompressUint32x16: + return rewriteValueAMD64_OpCompressUint32x16(v) + case OpCompressUint32x4: + return rewriteValueAMD64_OpCompressUint32x4(v) + case OpCompressUint32x8: + return rewriteValueAMD64_OpCompressUint32x8(v) + case OpCompressUint64x2: + return rewriteValueAMD64_OpCompressUint64x2(v) + case OpCompressUint64x4: + return rewriteValueAMD64_OpCompressUint64x4(v) + case OpCompressUint64x8: + return rewriteValueAMD64_OpCompressUint64x8(v) + case OpCompressUint8x16: + return rewriteValueAMD64_OpCompressUint8x16(v) + case OpCompressUint8x32: + return rewriteValueAMD64_OpCompressUint8x32(v) + case OpCompressUint8x64: + return rewriteValueAMD64_OpCompressUint8x64(v) case OpCondSelect: return rewriteValueAMD64_OpCondSelect(v) case OpConst16: @@ -30451,6 +30511,486 @@ func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x8(v *Value) bool { return true } } +func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat32x16 x mask) + // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VCOMPRESSPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat32x4 x mask) + // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VCOMPRESSPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat32x8 x mask) + // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VCOMPRESSPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat64x2 x mask) + // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VCOMPRESSPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat64x4 x mask) + // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VCOMPRESSPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat64x8 x mask) + // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VCOMPRESSPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt16x16 x mask) + // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt16x32 x mask) + // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt16x8 x mask) + // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt32x16 x mask) + // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt32x4 x mask) + // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt32x8 x mask) + // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt64x2 x mask) + // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt64x4 x mask) + // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt64x8 x mask) + // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt8x16 x mask) + // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt8x32 x mask) + // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt8x64 x mask) + // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint16x16 x mask) + // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint16x32 x mask) + // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint16x8 x mask) + // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint32x16 x mask) + // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint32x4 x mask) + // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint32x8 x mask) + // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint64x2 x mask) + // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint64x4 x mask) + // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint64x8 x mask) + // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint8x16 x mask) + // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint8x32 x mask) + // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint8x64 x mask) + // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} func rewriteValueAMD64_OpCondSelect(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 3805ca35a87..1ef4369fa27 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -215,6 +215,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.Compress", opLen2(ssa.OpCompressFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.Compress", opLen2(ssa.OpCompressFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.Compress", opLen2(ssa.OpCompressFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.Compress", opLen2(ssa.OpCompressInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.Compress", opLen2(ssa.OpCompressInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.Compress", opLen2(ssa.OpCompressInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.Compress", opLen2(ssa.OpCompressInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.Compress", opLen2(ssa.OpCompressInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.Compress", opLen2(ssa.OpCompressInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.Compress", opLen2(ssa.OpCompressInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.Compress", opLen2(ssa.OpCompressInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.Compress", opLen2(ssa.OpCompressInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.Compress", opLen2(ssa.OpCompressInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.Compress", opLen2(ssa.OpCompressInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.Compress", opLen2(ssa.OpCompressInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.Compress", opLen2(ssa.OpCompressUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.Compress", opLen2(ssa.OpCompressUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.Compress", opLen2(ssa.OpCompressUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.Compress", opLen2(ssa.OpCompressUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.Compress", opLen2(ssa.OpCompressUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.Compress", opLen2(ssa.OpCompressUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.Compress", opLen2(ssa.OpCompressUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.Compress", opLen2(ssa.OpCompressUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.Compress", opLen2(ssa.OpCompressUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index ebb626358f8..7121a6d208f 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -1084,6 +1084,188 @@ func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512F func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 +/* Compress */ + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPS, CPU Feature: AVX512F +func (x Float32x4) Compress(mask Mask32x4) Float32x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPS, CPU Feature: AVX512F +func (x Float32x8) Compress(mask Mask32x8) Float32x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPS, CPU Feature: AVX512F +func (x Float32x16) Compress(mask Mask32x16) Float32x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPD, CPU Feature: AVX512F +func (x Float64x2) Compress(mask Mask64x2) Float64x2 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPD, CPU Feature: AVX512F +func (x Float64x4) Compress(mask Mask64x4) Float64x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPD, CPU Feature: AVX512F +func (x Float64x8) Compress(mask Mask64x8) Float64x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Int8x16) Compress(mask Mask8x16) Int8x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Int8x32) Compress(mask Mask8x32) Int8x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Int8x64) Compress(mask Mask8x64) Int8x64 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Int16x8) Compress(mask Mask16x8) Int16x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Int16x16) Compress(mask Mask16x16) Int16x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Int16x32) Compress(mask Mask16x32) Int16x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512F +func (x Int32x4) Compress(mask Mask32x4) Int32x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512F +func (x Int32x8) Compress(mask Mask32x8) Int32x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512F +func (x Int32x16) Compress(mask Mask32x16) Int32x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512F +func (x Int64x2) Compress(mask Mask64x2) Int64x2 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512F +func (x Int64x4) Compress(mask Mask64x4) Int64x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512F +func (x Int64x8) Compress(mask Mask64x8) Int64x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Uint8x16) Compress(mask Mask8x16) Uint8x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Uint8x32) Compress(mask Mask8x32) Uint8x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Uint8x64) Compress(mask Mask8x64) Uint8x64 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) Compress(mask Mask16x8) Uint16x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) Compress(mask Mask16x16) Uint16x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) Compress(mask Mask16x32) Uint16x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512F +func (x Uint32x4) Compress(mask Mask32x4) Uint32x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512F +func (x Uint32x8) Compress(mask Mask32x8) Uint32x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512F +func (x Uint32x16) Compress(mask Mask32x16) Uint32x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512F +func (x Uint64x2) Compress(mask Mask64x2) Uint64x2 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512F +func (x Uint64x4) Compress(mask Mask64x4) Uint64x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512F +func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 + /* DiffWithCeilWithPrecision */ // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index f1a2f11738c..d7010de10a9 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -186,6 +186,16 @@ func TestPermute2(t *testing.T) { } } +func TestCompress(t *testing.T) { + if !simd.HasAVX512() { + t.Skip("Test requires HasAVX512, not available on this hardware") + return + } + testInt32x4Mask32x4Int32x4(t, []int32{1, 2, 3, 4}, + []int32{0, -1, 0, -1}, + []int32{2, 4, 0, 0}, "Compress") +} + // checkInt8Slices ensures that b and a are equal, to the end of b. // also serves to use the slices, to prevent accidental optimization. func checkInt8Slices(t *testing.T, a, b []int8) { diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index 29452bdad0e..8f0fb665be6 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -117,6 +117,27 @@ func testFloat32x4Compare(t *testing.T, v0 []float32, v1 []float32, want []int32 } } +func testFloat32x4Mask32x4Float32x4(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x4 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x4Slice(v0) + vec1 := simd.LoadInt32x4Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x4()) + + default: + t.Errorf("Unknown method: Float32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat32x4MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x4 @@ -369,6 +390,27 @@ func testFloat32x8Compare(t *testing.T, v0 []float32, v1 []float32, want []int32 } } +func testFloat32x8Mask32x8Float32x8(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x8 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x8Slice(v0) + vec1 := simd.LoadInt32x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x8()) + + default: + t.Errorf("Unknown method: Float32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat32x8MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x8 @@ -613,6 +655,27 @@ func testFloat32x16Compare(t *testing.T, v0 []float32, v1 []float32, want []int3 } } +func testFloat32x16Mask32x16Float32x16(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x16()) + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat32x16MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x16 @@ -857,6 +920,27 @@ func testFloat64x2Compare(t *testing.T, v0 []float64, v1 []float64, want []int64 } } +func testFloat64x2Mask64x2Float64x2(t *testing.T, v0 []float64, v1 []int64, want []float64, which string) { + t.Helper() + var gotv simd.Float64x2 + got := make([]float64, len(want)) + vec0 := simd.LoadFloat64x2Slice(v0) + vec1 := simd.LoadInt64x2Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x2()) + + default: + t.Errorf("Unknown method: Float64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat64x2MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x2 @@ -1107,6 +1191,27 @@ func testFloat64x4Compare(t *testing.T, v0 []float64, v1 []float64, want []int64 } } +func testFloat64x4Mask64x4Float64x4(t *testing.T, v0 []float64, v1 []int64, want []float64, which string) { + t.Helper() + var gotv simd.Float64x4 + got := make([]float64, len(want)) + vec0 := simd.LoadFloat64x4Slice(v0) + vec1 := simd.LoadInt64x4Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x4()) + + default: + t.Errorf("Unknown method: Float64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat64x4MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x4 @@ -1351,6 +1456,27 @@ func testFloat64x8Compare(t *testing.T, v0 []float64, v1 []float64, want []int64 } } +func testFloat64x8Mask64x8Float64x8(t *testing.T, v0 []float64, v1 []int64, want []float64, which string) { + t.Helper() + var gotv simd.Float64x8 + got := make([]float64, len(want)) + vec0 := simd.LoadFloat64x8Slice(v0) + vec1 := simd.LoadInt64x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x8()) + + default: + t.Errorf("Unknown method: Float64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat64x8MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x8 @@ -1591,6 +1717,27 @@ func testInt8x16Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which s } } +func testInt8x16Mask8x16Int8x16(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask8x16()) + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt8x16MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { t.Helper() var gotv simd.Int8x16 @@ -1772,6 +1919,27 @@ func testInt8x32Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which s } } +func testInt8x32Mask8x32Int8x32(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask8x32()) + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt8x32MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { t.Helper() var gotv simd.Int8x32 @@ -1943,6 +2111,27 @@ func testInt8x64Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which s } } +func testInt8x64Mask8x64Int8x64(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask8x64()) + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt8x64MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { t.Helper() var gotv simd.Int8x64 @@ -2191,6 +2380,27 @@ func testInt16x8Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, whic } } +func testInt16x8Mask16x8Int16x8(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask16x8()) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x8 @@ -2488,6 +2698,27 @@ func testInt16x16Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, whi } } +func testInt16x16Mask16x16Int16x16(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x16 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x16Slice(v0) + vec1 := simd.LoadInt16x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask16x16()) + + default: + t.Errorf("Unknown method: Int16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x16MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x16 @@ -2767,6 +2998,27 @@ func testInt16x32Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, whi } } +func testInt16x32Mask16x32Int16x32(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x32 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x32Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask16x32()) + + default: + t.Errorf("Unknown method: Int16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x32MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x32 @@ -3091,6 +3343,27 @@ func testInt32x4Int16x8Int16x8Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int } } +func testInt32x4Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x4Slice(v0) + vec1 := simd.LoadInt32x4Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x4()) + + default: + t.Errorf("Unknown method: Int32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt32x4MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x4 @@ -3464,6 +3737,27 @@ func testInt32x8Int16x16Int16x16Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []i } } +func testInt32x8Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x8 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x8Slice(v0) + vec1 := simd.LoadInt32x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x8()) + + default: + t.Errorf("Unknown method: Int32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt32x8MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x8 @@ -3810,6 +4104,27 @@ func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1 } } +func testInt32x16Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x16()) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x16 @@ -4111,6 +4426,27 @@ func testInt64x2Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic } } +func testInt64x2Mask64x2Int64x2(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x2 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x2Slice(v0) + vec1 := simd.LoadInt64x2Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x2()) + + default: + t.Errorf("Unknown method: Int64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x2MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x2 @@ -4363,6 +4699,27 @@ func testInt64x4Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic } } +func testInt64x4Mask64x4Int64x4(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x4 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x4Slice(v0) + vec1 := simd.LoadInt64x4Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x4()) + + default: + t.Errorf("Unknown method: Int64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x4MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x4 @@ -4615,6 +4972,27 @@ func testInt64x8Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic } } +func testInt64x8Mask64x8Int64x8(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x8 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x8Slice(v0) + vec1 := simd.LoadInt64x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x8()) + + default: + t.Errorf("Unknown method: Int64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x8MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x8 @@ -4894,6 +5272,27 @@ func testUint8x16Int8x16Mask16x8Int16x8(t *testing.T, v0 []uint8, v1 []int8, v2 } } +func testUint8x16Mask8x16Uint8x16(t *testing.T, v0 []uint8, v1 []int8, want []uint8, which string) { + t.Helper() + var gotv simd.Uint8x16 + got := make([]uint8, len(want)) + vec0 := simd.LoadUint8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask8x16()) + + default: + t.Errorf("Unknown method: Uint8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint8x16MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, want []int8, which string) { t.Helper() var gotv simd.Int8x16 @@ -5120,6 +5519,27 @@ func testUint8x32Int8x32Mask16x16Int16x16(t *testing.T, v0 []uint8, v1 []int8, v } } +func testUint8x32Mask8x32Uint8x32(t *testing.T, v0 []uint8, v1 []int8, want []uint8, which string) { + t.Helper() + var gotv simd.Uint8x32 + got := make([]uint8, len(want)) + vec0 := simd.LoadUint8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask8x32()) + + default: + t.Errorf("Unknown method: Uint8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint8x32MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, want []int8, which string) { t.Helper() var gotv simd.Int8x32 @@ -5338,6 +5758,27 @@ func testUint8x64Int8x64Mask16x32Int16x32(t *testing.T, v0 []uint8, v1 []int8, v } } +func testUint8x64Mask8x64Uint8x64(t *testing.T, v0 []uint8, v1 []int8, want []uint8, which string) { + t.Helper() + var gotv simd.Uint8x64 + got := make([]uint8, len(want)) + vec0 := simd.LoadUint8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask8x64()) + + default: + t.Errorf("Unknown method: Uint8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint8x64MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, want []int8, which string) { t.Helper() var gotv simd.Int8x64 @@ -5533,6 +5974,27 @@ func testUint16x8Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, w } } +func testUint16x8Mask16x8Uint16x8(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask16x8()) + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x8 @@ -5777,6 +6239,27 @@ func testUint16x16Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, } } +func testUint16x16Mask16x16Uint16x16(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadInt16x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask16x16()) + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x16 @@ -6009,6 +6492,27 @@ func testUint16x32Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, } } +func testUint16x32Mask16x32Uint16x32(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask16x32()) + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x32 @@ -6274,6 +6778,27 @@ func testUint32x4Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, w } } +func testUint32x4Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadInt32x4Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x4()) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x4 @@ -6588,6 +7113,27 @@ func testUint32x8Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, w } } +func testUint32x8Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadInt32x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x8()) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x8 @@ -6877,6 +7423,27 @@ func testUint32x16Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, } } +func testUint32x16Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask32x16()) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x16 @@ -7170,6 +7737,27 @@ func testUint64x2Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, w } } +func testUint64x2Mask64x2Uint64x2(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadInt64x2Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x2()) + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x2 @@ -7414,6 +8002,27 @@ func testUint64x4Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, w } } +func testUint64x4Mask64x4Uint64x4(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadInt64x4Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x4()) + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x4 @@ -7658,6 +8267,27 @@ func testUint64x8Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, w } } +func testUint64x8Mask64x8Uint64x8(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadInt64x8Slice(v1) + switch which { + case "Compress": + gotv = vec0.Compress(vec1.AsMask64x8()) + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x8