[dev.simd] simd: add emulations for missing AVX2 comparisons

this also removes AVX512 versions of the operations
that would use the same names, but not run on AVX2-only

includes files generated by simdgen CL 692355

Change-Id: Iff29042245b7688133fed49a03e681e85235b8a8
Reviewed-on: https://go-review.googlesource.com/c/go/+/692335
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
David Chase 2025-08-01 09:23:45 -04:00
parent ddb689c7bb
commit 2080415aa2
11 changed files with 855 additions and 2827 deletions

View file

@ -886,29 +886,13 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
case ssa.OpAMD64VCMPPS512, case ssa.OpAMD64VCMPPS512,
ssa.OpAMD64VCMPPD512, ssa.OpAMD64VCMPPD512,
ssa.OpAMD64VPCMPUB128,
ssa.OpAMD64VPCMPUB256,
ssa.OpAMD64VPCMPUB512, ssa.OpAMD64VPCMPUB512,
ssa.OpAMD64VPCMPUW128,
ssa.OpAMD64VPCMPUW256,
ssa.OpAMD64VPCMPUW512, ssa.OpAMD64VPCMPUW512,
ssa.OpAMD64VPCMPUD128,
ssa.OpAMD64VPCMPUD256,
ssa.OpAMD64VPCMPUD512, ssa.OpAMD64VPCMPUD512,
ssa.OpAMD64VPCMPUQ128,
ssa.OpAMD64VPCMPUQ256,
ssa.OpAMD64VPCMPUQ512, ssa.OpAMD64VPCMPUQ512,
ssa.OpAMD64VPCMPB128,
ssa.OpAMD64VPCMPB256,
ssa.OpAMD64VPCMPB512, ssa.OpAMD64VPCMPB512,
ssa.OpAMD64VPCMPW128,
ssa.OpAMD64VPCMPW256,
ssa.OpAMD64VPCMPW512, ssa.OpAMD64VPCMPW512,
ssa.OpAMD64VPCMPD128,
ssa.OpAMD64VPCMPD256,
ssa.OpAMD64VPCMPD512, ssa.OpAMD64VPCMPD512,
ssa.OpAMD64VPCMPQ128,
ssa.OpAMD64VPCMPQ256,
ssa.OpAMD64VPCMPQ512: ssa.OpAMD64VPCMPQ512:
p = simdV2kImm8(s, v) p = simdV2kImm8(s, v)

View file

@ -590,17 +590,9 @@
(GreaterInt64x2 ...) => (VPCMPGTQ128 ...) (GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
(GreaterInt64x4 ...) => (VPCMPGTQ256 ...) (GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y)) (GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
(GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y)) (GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
(GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [14] x y))
(GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [14] x y))
(GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [14] x y)) (GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
(GreaterUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [14] x y))
(GreaterUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [14] x y))
(GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [14] x y)) (GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
(GreaterUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y))
(GreaterUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y))
(GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y)) (GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
(GreaterEqualFloat32x4 x y) => (VCMPPS128 [13] x y) (GreaterEqualFloat32x4 x y) => (VCMPPS128 [13] x y)
(GreaterEqualFloat32x8 x y) => (VCMPPS256 [13] x y) (GreaterEqualFloat32x8 x y) => (VCMPPS256 [13] x y)
@ -608,29 +600,13 @@
(GreaterEqualFloat64x2 x y) => (VCMPPD128 [13] x y) (GreaterEqualFloat64x2 x y) => (VCMPPD128 [13] x y)
(GreaterEqualFloat64x4 x y) => (VCMPPD256 [13] x y) (GreaterEqualFloat64x4 x y) => (VCMPPD256 [13] x y)
(GreaterEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [13] x y)) (GreaterEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
(GreaterEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [13] x y))
(GreaterEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [13] x y))
(GreaterEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [13] x y)) (GreaterEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
(GreaterEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [13] x y))
(GreaterEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [13] x y))
(GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [13] x y)) (GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
(GreaterEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [13] x y))
(GreaterEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [13] x y))
(GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [13] x y)) (GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
(GreaterEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [13] x y))
(GreaterEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [13] x y))
(GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [13] x y)) (GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
(GreaterEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [13] x y))
(GreaterEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [13] x y))
(GreaterEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [13] x y)) (GreaterEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
(GreaterEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [13] x y))
(GreaterEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [13] x y))
(GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y)) (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
(GreaterEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [13] x y))
(GreaterEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [13] x y))
(GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y)) (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
(GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y))
(GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y))
(GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask))) (GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask))) (GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
@ -710,29 +686,13 @@
(LessFloat64x2 x y) => (VCMPPD128 [1] x y) (LessFloat64x2 x y) => (VCMPPD128 [1] x y)
(LessFloat64x4 x y) => (VCMPPD256 [1] x y) (LessFloat64x4 x y) => (VCMPPD256 [1] x y)
(LessFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) (LessFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
(LessInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [1] x y))
(LessInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [1] x y))
(LessInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) (LessInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
(LessInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [1] x y))
(LessInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [1] x y))
(LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) (LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
(LessInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [1] x y))
(LessInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [1] x y))
(LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) (LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
(LessInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [1] x y))
(LessInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [1] x y))
(LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) (LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
(LessUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [1] x y))
(LessUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [1] x y))
(LessUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) (LessUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
(LessUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [1] x y))
(LessUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [1] x y))
(LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) (LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
(LessUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [1] x y))
(LessUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [1] x y))
(LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) (LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
(LessUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y))
(LessUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y))
(LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) (LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
(LessEqualFloat32x4 x y) => (VCMPPS128 [2] x y) (LessEqualFloat32x4 x y) => (VCMPPS128 [2] x y)
(LessEqualFloat32x8 x y) => (VCMPPS256 [2] x y) (LessEqualFloat32x8 x y) => (VCMPPS256 [2] x y)
@ -740,29 +700,13 @@
(LessEqualFloat64x2 x y) => (VCMPPD128 [2] x y) (LessEqualFloat64x2 x y) => (VCMPPD128 [2] x y)
(LessEqualFloat64x4 x y) => (VCMPPD256 [2] x y) (LessEqualFloat64x4 x y) => (VCMPPD256 [2] x y)
(LessEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) (LessEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
(LessEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [2] x y))
(LessEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [2] x y))
(LessEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) (LessEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
(LessEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [2] x y))
(LessEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [2] x y))
(LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) (LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
(LessEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [2] x y))
(LessEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [2] x y))
(LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) (LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
(LessEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [2] x y))
(LessEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [2] x y))
(LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) (LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
(LessEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [2] x y))
(LessEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [2] x y))
(LessEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) (LessEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
(LessEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [2] x y))
(LessEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [2] x y))
(LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
(LessEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [2] x y))
(LessEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [2] x y))
(LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
(LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y))
(LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y))
(LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask))) (LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask))) (LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
@ -1050,29 +994,13 @@
(NotEqualFloat64x2 x y) => (VCMPPD128 [4] x y) (NotEqualFloat64x2 x y) => (VCMPPD128 [4] x y)
(NotEqualFloat64x4 x y) => (VCMPPD256 [4] x y) (NotEqualFloat64x4 x y) => (VCMPPD256 [4] x y)
(NotEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) (NotEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
(NotEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [4] x y))
(NotEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [4] x y))
(NotEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) (NotEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
(NotEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [4] x y))
(NotEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [4] x y))
(NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) (NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
(NotEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [4] x y))
(NotEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [4] x y))
(NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) (NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
(NotEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [4] x y))
(NotEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [4] x y))
(NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) (NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
(NotEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [4] x y))
(NotEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [4] x y))
(NotEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) (NotEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
(NotEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [4] x y))
(NotEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [4] x y))
(NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
(NotEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [4] x y))
(NotEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [4] x y))
(NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
(NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y))
(NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y))
(NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask))) (NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask))) (NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))

View file

@ -986,29 +986,13 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPB128", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPW128", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPW256", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPD128", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPD256", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ128", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ256", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPROLD128", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPROLD128", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPROLD256", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPROLD256", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},

View file

@ -514,17 +514,9 @@ func simdGenericOps() []opData {
{name: "GreaterEqualFloat64x2", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x2", argLength: 2, commutative: false},
{name: "GreaterEqualFloat64x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x4", argLength: 2, commutative: false},
{name: "GreaterEqualFloat64x8", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x8", argLength: 2, commutative: false},
{name: "GreaterEqualInt8x16", argLength: 2, commutative: false},
{name: "GreaterEqualInt8x32", argLength: 2, commutative: false},
{name: "GreaterEqualInt8x64", argLength: 2, commutative: false}, {name: "GreaterEqualInt8x64", argLength: 2, commutative: false},
{name: "GreaterEqualInt16x8", argLength: 2, commutative: false},
{name: "GreaterEqualInt16x16", argLength: 2, commutative: false},
{name: "GreaterEqualInt16x32", argLength: 2, commutative: false}, {name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
{name: "GreaterEqualInt32x4", argLength: 2, commutative: false},
{name: "GreaterEqualInt32x8", argLength: 2, commutative: false},
{name: "GreaterEqualInt32x16", argLength: 2, commutative: false}, {name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
{name: "GreaterEqualInt64x2", argLength: 2, commutative: false},
{name: "GreaterEqualInt64x4", argLength: 2, commutative: false},
{name: "GreaterEqualInt64x8", argLength: 2, commutative: false}, {name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
{name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
{name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
@ -556,17 +548,9 @@ func simdGenericOps() []opData {
{name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
{name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
{name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
{name: "GreaterEqualUint8x16", argLength: 2, commutative: false},
{name: "GreaterEqualUint8x32", argLength: 2, commutative: false},
{name: "GreaterEqualUint8x64", argLength: 2, commutative: false}, {name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
{name: "GreaterEqualUint16x8", argLength: 2, commutative: false},
{name: "GreaterEqualUint16x16", argLength: 2, commutative: false},
{name: "GreaterEqualUint16x32", argLength: 2, commutative: false}, {name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
{name: "GreaterEqualUint32x4", argLength: 2, commutative: false},
{name: "GreaterEqualUint32x8", argLength: 2, commutative: false},
{name: "GreaterEqualUint32x16", argLength: 2, commutative: false}, {name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
{name: "GreaterEqualUint64x2", argLength: 2, commutative: false},
{name: "GreaterEqualUint64x4", argLength: 2, commutative: false},
{name: "GreaterEqualUint64x8", argLength: 2, commutative: false}, {name: "GreaterEqualUint64x8", argLength: 2, commutative: false},
{name: "GreaterFloat32x4", argLength: 2, commutative: false}, {name: "GreaterFloat32x4", argLength: 2, commutative: false},
{name: "GreaterFloat32x8", argLength: 2, commutative: false}, {name: "GreaterFloat32x8", argLength: 2, commutative: false},
@ -616,17 +600,9 @@ func simdGenericOps() []opData {
{name: "GreaterMaskedUint64x2", argLength: 3, commutative: false}, {name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
{name: "GreaterMaskedUint64x4", argLength: 3, commutative: false}, {name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
{name: "GreaterMaskedUint64x8", argLength: 3, commutative: false}, {name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
{name: "GreaterUint8x16", argLength: 2, commutative: false},
{name: "GreaterUint8x32", argLength: 2, commutative: false},
{name: "GreaterUint8x64", argLength: 2, commutative: false}, {name: "GreaterUint8x64", argLength: 2, commutative: false},
{name: "GreaterUint16x8", argLength: 2, commutative: false},
{name: "GreaterUint16x16", argLength: 2, commutative: false},
{name: "GreaterUint16x32", argLength: 2, commutative: false}, {name: "GreaterUint16x32", argLength: 2, commutative: false},
{name: "GreaterUint32x4", argLength: 2, commutative: false},
{name: "GreaterUint32x8", argLength: 2, commutative: false},
{name: "GreaterUint32x16", argLength: 2, commutative: false}, {name: "GreaterUint32x16", argLength: 2, commutative: false},
{name: "GreaterUint64x2", argLength: 2, commutative: false},
{name: "GreaterUint64x4", argLength: 2, commutative: false},
{name: "GreaterUint64x8", argLength: 2, commutative: false}, {name: "GreaterUint64x8", argLength: 2, commutative: false},
{name: "IsNanFloat32x4", argLength: 2, commutative: true}, {name: "IsNanFloat32x4", argLength: 2, commutative: true},
{name: "IsNanFloat32x8", argLength: 2, commutative: true}, {name: "IsNanFloat32x8", argLength: 2, commutative: true},
@ -646,17 +622,9 @@ func simdGenericOps() []opData {
{name: "LessEqualFloat64x2", argLength: 2, commutative: false}, {name: "LessEqualFloat64x2", argLength: 2, commutative: false},
{name: "LessEqualFloat64x4", argLength: 2, commutative: false}, {name: "LessEqualFloat64x4", argLength: 2, commutative: false},
{name: "LessEqualFloat64x8", argLength: 2, commutative: false}, {name: "LessEqualFloat64x8", argLength: 2, commutative: false},
{name: "LessEqualInt8x16", argLength: 2, commutative: false},
{name: "LessEqualInt8x32", argLength: 2, commutative: false},
{name: "LessEqualInt8x64", argLength: 2, commutative: false}, {name: "LessEqualInt8x64", argLength: 2, commutative: false},
{name: "LessEqualInt16x8", argLength: 2, commutative: false},
{name: "LessEqualInt16x16", argLength: 2, commutative: false},
{name: "LessEqualInt16x32", argLength: 2, commutative: false}, {name: "LessEqualInt16x32", argLength: 2, commutative: false},
{name: "LessEqualInt32x4", argLength: 2, commutative: false},
{name: "LessEqualInt32x8", argLength: 2, commutative: false},
{name: "LessEqualInt32x16", argLength: 2, commutative: false}, {name: "LessEqualInt32x16", argLength: 2, commutative: false},
{name: "LessEqualInt64x2", argLength: 2, commutative: false},
{name: "LessEqualInt64x4", argLength: 2, commutative: false},
{name: "LessEqualInt64x8", argLength: 2, commutative: false}, {name: "LessEqualInt64x8", argLength: 2, commutative: false},
{name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false}, {name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
{name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false}, {name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
@ -688,17 +656,9 @@ func simdGenericOps() []opData {
{name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false}, {name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
{name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false}, {name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
{name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false}, {name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
{name: "LessEqualUint8x16", argLength: 2, commutative: false},
{name: "LessEqualUint8x32", argLength: 2, commutative: false},
{name: "LessEqualUint8x64", argLength: 2, commutative: false}, {name: "LessEqualUint8x64", argLength: 2, commutative: false},
{name: "LessEqualUint16x8", argLength: 2, commutative: false},
{name: "LessEqualUint16x16", argLength: 2, commutative: false},
{name: "LessEqualUint16x32", argLength: 2, commutative: false}, {name: "LessEqualUint16x32", argLength: 2, commutative: false},
{name: "LessEqualUint32x4", argLength: 2, commutative: false},
{name: "LessEqualUint32x8", argLength: 2, commutative: false},
{name: "LessEqualUint32x16", argLength: 2, commutative: false}, {name: "LessEqualUint32x16", argLength: 2, commutative: false},
{name: "LessEqualUint64x2", argLength: 2, commutative: false},
{name: "LessEqualUint64x4", argLength: 2, commutative: false},
{name: "LessEqualUint64x8", argLength: 2, commutative: false}, {name: "LessEqualUint64x8", argLength: 2, commutative: false},
{name: "LessFloat32x4", argLength: 2, commutative: false}, {name: "LessFloat32x4", argLength: 2, commutative: false},
{name: "LessFloat32x8", argLength: 2, commutative: false}, {name: "LessFloat32x8", argLength: 2, commutative: false},
@ -706,17 +666,9 @@ func simdGenericOps() []opData {
{name: "LessFloat64x2", argLength: 2, commutative: false}, {name: "LessFloat64x2", argLength: 2, commutative: false},
{name: "LessFloat64x4", argLength: 2, commutative: false}, {name: "LessFloat64x4", argLength: 2, commutative: false},
{name: "LessFloat64x8", argLength: 2, commutative: false}, {name: "LessFloat64x8", argLength: 2, commutative: false},
{name: "LessInt8x16", argLength: 2, commutative: false},
{name: "LessInt8x32", argLength: 2, commutative: false},
{name: "LessInt8x64", argLength: 2, commutative: false}, {name: "LessInt8x64", argLength: 2, commutative: false},
{name: "LessInt16x8", argLength: 2, commutative: false},
{name: "LessInt16x16", argLength: 2, commutative: false},
{name: "LessInt16x32", argLength: 2, commutative: false}, {name: "LessInt16x32", argLength: 2, commutative: false},
{name: "LessInt32x4", argLength: 2, commutative: false},
{name: "LessInt32x8", argLength: 2, commutative: false},
{name: "LessInt32x16", argLength: 2, commutative: false}, {name: "LessInt32x16", argLength: 2, commutative: false},
{name: "LessInt64x2", argLength: 2, commutative: false},
{name: "LessInt64x4", argLength: 2, commutative: false},
{name: "LessInt64x8", argLength: 2, commutative: false}, {name: "LessInt64x8", argLength: 2, commutative: false},
{name: "LessMaskedFloat32x4", argLength: 3, commutative: false}, {name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
{name: "LessMaskedFloat32x8", argLength: 3, commutative: false}, {name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
@ -748,17 +700,9 @@ func simdGenericOps() []opData {
{name: "LessMaskedUint64x2", argLength: 3, commutative: false}, {name: "LessMaskedUint64x2", argLength: 3, commutative: false},
{name: "LessMaskedUint64x4", argLength: 3, commutative: false}, {name: "LessMaskedUint64x4", argLength: 3, commutative: false},
{name: "LessMaskedUint64x8", argLength: 3, commutative: false}, {name: "LessMaskedUint64x8", argLength: 3, commutative: false},
{name: "LessUint8x16", argLength: 2, commutative: false},
{name: "LessUint8x32", argLength: 2, commutative: false},
{name: "LessUint8x64", argLength: 2, commutative: false}, {name: "LessUint8x64", argLength: 2, commutative: false},
{name: "LessUint16x8", argLength: 2, commutative: false},
{name: "LessUint16x16", argLength: 2, commutative: false},
{name: "LessUint16x32", argLength: 2, commutative: false}, {name: "LessUint16x32", argLength: 2, commutative: false},
{name: "LessUint32x4", argLength: 2, commutative: false},
{name: "LessUint32x8", argLength: 2, commutative: false},
{name: "LessUint32x16", argLength: 2, commutative: false}, {name: "LessUint32x16", argLength: 2, commutative: false},
{name: "LessUint64x2", argLength: 2, commutative: false},
{name: "LessUint64x4", argLength: 2, commutative: false},
{name: "LessUint64x8", argLength: 2, commutative: false}, {name: "LessUint64x8", argLength: 2, commutative: false},
{name: "MaxFloat32x4", argLength: 2, commutative: true}, {name: "MaxFloat32x4", argLength: 2, commutative: true},
{name: "MaxFloat32x8", argLength: 2, commutative: true}, {name: "MaxFloat32x8", argLength: 2, commutative: true},
@ -986,17 +930,9 @@ func simdGenericOps() []opData {
{name: "NotEqualFloat64x2", argLength: 2, commutative: true}, {name: "NotEqualFloat64x2", argLength: 2, commutative: true},
{name: "NotEqualFloat64x4", argLength: 2, commutative: true}, {name: "NotEqualFloat64x4", argLength: 2, commutative: true},
{name: "NotEqualFloat64x8", argLength: 2, commutative: true}, {name: "NotEqualFloat64x8", argLength: 2, commutative: true},
{name: "NotEqualInt8x16", argLength: 2, commutative: true},
{name: "NotEqualInt8x32", argLength: 2, commutative: true},
{name: "NotEqualInt8x64", argLength: 2, commutative: true}, {name: "NotEqualInt8x64", argLength: 2, commutative: true},
{name: "NotEqualInt16x8", argLength: 2, commutative: true},
{name: "NotEqualInt16x16", argLength: 2, commutative: true},
{name: "NotEqualInt16x32", argLength: 2, commutative: true}, {name: "NotEqualInt16x32", argLength: 2, commutative: true},
{name: "NotEqualInt32x4", argLength: 2, commutative: true},
{name: "NotEqualInt32x8", argLength: 2, commutative: true},
{name: "NotEqualInt32x16", argLength: 2, commutative: true}, {name: "NotEqualInt32x16", argLength: 2, commutative: true},
{name: "NotEqualInt64x2", argLength: 2, commutative: true},
{name: "NotEqualInt64x4", argLength: 2, commutative: true},
{name: "NotEqualInt64x8", argLength: 2, commutative: true}, {name: "NotEqualInt64x8", argLength: 2, commutative: true},
{name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true}, {name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
{name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true}, {name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
@ -1028,17 +964,9 @@ func simdGenericOps() []opData {
{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
{name: "NotEqualUint8x16", argLength: 2, commutative: true},
{name: "NotEqualUint8x32", argLength: 2, commutative: true},
{name: "NotEqualUint8x64", argLength: 2, commutative: true}, {name: "NotEqualUint8x64", argLength: 2, commutative: true},
{name: "NotEqualUint16x8", argLength: 2, commutative: true},
{name: "NotEqualUint16x16", argLength: 2, commutative: true},
{name: "NotEqualUint16x32", argLength: 2, commutative: true}, {name: "NotEqualUint16x32", argLength: 2, commutative: true},
{name: "NotEqualUint32x4", argLength: 2, commutative: true},
{name: "NotEqualUint32x8", argLength: 2, commutative: true},
{name: "NotEqualUint32x16", argLength: 2, commutative: true}, {name: "NotEqualUint32x16", argLength: 2, commutative: true},
{name: "NotEqualUint64x2", argLength: 2, commutative: true},
{name: "NotEqualUint64x4", argLength: 2, commutative: true},
{name: "NotEqualUint64x8", argLength: 2, commutative: true}, {name: "NotEqualUint64x8", argLength: 2, commutative: true},
{name: "OnesCountInt8x16", argLength: 1, commutative: false}, {name: "OnesCountInt8x16", argLength: 1, commutative: false},
{name: "OnesCountInt8x32", argLength: 1, commutative: false}, {name: "OnesCountInt8x32", argLength: 1, commutative: false},

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -602,17 +602,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Greater", opLen2(ssa.OpGreaterUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Greater", opLen2(ssa.OpGreaterUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Greater", opLen2(ssa.OpGreaterUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Greater", opLen2(ssa.OpGreaterUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Greater", opLen2(ssa.OpGreaterUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Greater", opLen2(ssa.OpGreaterUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
@ -620,29 +612,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
@ -722,29 +698,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Less", opLen2(ssa.OpLessInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Less", opLen2(ssa.OpLessInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Less", opLen2(ssa.OpLessInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.Less", opLen2(ssa.OpLessInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Less", opLen2(ssa.OpLessInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Less", opLen2(ssa.OpLessInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Less", opLen2(ssa.OpLessInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Less", opLen2(ssa.OpLessInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Less", opLen2(ssa.OpLessUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Less", opLen2(ssa.OpLessUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Less", opLen2(ssa.OpLessUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Less", opLen2(ssa.OpLessUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Less", opLen2(ssa.OpLessUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Less", opLen2(ssa.OpLessUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Less", opLen2(ssa.OpLessUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Less", opLen2(ssa.OpLessUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
@ -752,29 +712,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.LessEqual", opLen2(ssa.OpLessEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.LessEqual", opLen2(ssa.OpLessEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.LessEqual", opLen2(ssa.OpLessEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.LessEqual", opLen2(ssa.OpLessEqualInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.LessEqual", opLen2(ssa.OpLessEqualInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.LessEqual", opLen2(ssa.OpLessEqualInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.LessEqual", opLen2(ssa.OpLessEqualInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.LessEqual", opLen2(ssa.OpLessEqualInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.LessEqual", opLen2(ssa.OpLessEqualUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.LessEqual", opLen2(ssa.OpLessEqualUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.LessEqual", opLen2(ssa.OpLessEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.LessEqual", opLen2(ssa.OpLessEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.LessEqual", opLen2(ssa.OpLessEqualUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.LessEqual", opLen2(ssa.OpLessEqualUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
@ -1062,29 +1006,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.NotEqual", opLen2(ssa.OpNotEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.NotEqual", opLen2(ssa.OpNotEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.NotEqual", opLen2(ssa.OpNotEqualInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.NotEqual", opLen2(ssa.OpNotEqualInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.NotEqual", opLen2(ssa.OpNotEqualInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.NotEqual", opLen2(ssa.OpNotEqualInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.NotEqual", opLen2(ssa.OpNotEqualUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.NotEqual", opLen2(ssa.OpNotEqualUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.NotEqual", opLen2(ssa.OpNotEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.NotEqual", opLen2(ssa.OpNotEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.NotEqual", opLen2(ssa.OpNotEqualUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.NotEqual", opLen2(ssa.OpNotEqualUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)

View file

@ -59,17 +59,32 @@ func TestLess(t *testing.T) {
testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64]) testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64])
testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64])
if comparisonFixed { testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16]) testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16]) testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32]) testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32]) testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64]) testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64]) testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8]) testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
} testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16])
testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8])
if simd.HasAVX512() { if simd.HasAVX512() {
testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16]) testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
@ -100,28 +115,25 @@ func TestLessEqual(t *testing.T) {
testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64]) testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64])
testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64])
if comparisonFixed { testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16])
testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16]) testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16])
testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16]) testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32])
testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32]) testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32])
testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32]) testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64])
testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64]) testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64])
testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64]) testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8])
testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8]) testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8])
testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8])
} testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16])
testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8])
if simd.HasAVX512() { if simd.HasAVX512() {
testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16])
testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8])
testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32]) testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32])
testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64]) testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64])
testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8]) testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8])
@ -151,16 +163,17 @@ func TestGreater(t *testing.T) {
testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8]) testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8])
testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8]) testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8])
if simd.HasAVX512() { testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16])
testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16]) testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16]) testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32])
testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32]) testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64]) testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64]) testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8]) testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8]) testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8])
if simd.HasAVX512() {
testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32]) testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32])
testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64]) testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64])
@ -181,28 +194,25 @@ func TestGreaterEqual(t *testing.T) {
testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64]) testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
if comparisonFixed { testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16]) testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32]) testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64]) testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8]) testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
} testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
if simd.HasAVX512() { if simd.HasAVX512() {
testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32]) testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64]) testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64])
testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8]) testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8])
@ -260,25 +270,23 @@ func TestNotEqual(t *testing.T) {
testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64]) testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64])
testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64])
if comparisonFixed { testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16])
testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16]) testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16])
testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16]) testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32])
testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32]) testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32])
testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32]) testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64])
testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64]) testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64])
testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64]) testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8])
testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8]) testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8])
testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8])
testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16]) testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16])
testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16]) testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16])
testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32]) testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32])
testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32]) testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32])
testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64]) testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64])
testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64]) testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64])
testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8]) testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8])
testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8]) testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8])
}
if simd.HasAVX512() { if simd.HasAVX512() {
testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32]) testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32])

View file

@ -87,6 +87,16 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality
floats: []int{32}, floats: []int{32},
} }
var avx2SignedComparisons = &shapes{
vecs: []int{128, 256},
ints: []int{8, 16, 32, 64},
}
var avx2UnsignedComparisons = &shapes{
vecs: []int{128, 256},
uints: []int{8, 16, 32, 64},
}
type templateData struct { type templateData struct {
Vec string // the type of the vector, e.g. Float32x4 Vec string // the type of the vector, e.g. Float32x4
AOrAn string // for documentation, the article "a" or "an" AOrAn string // for documentation, the article "a" or "an"
@ -486,6 +496,130 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
} }
`) `)
func (t templateData) CPUfeature() string {
switch t.Vwidth {
case 128:
return "AVX"
case 256:
return "AVX2"
case 512:
return "AVX512"
}
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
}
var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", `
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}()
return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}()
return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}()
return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
`)
// CPUfeatureAVX2if8 return AVX2 if the element width is 8,
// otherwise, it returns CPUfeature. This is for the cpufeature
// of unsigned comparison emulation, which uses shifts for all
// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2)
// for bytes.
func (t templateData) CPUfeatureAVX2if8() string {
if t.Width == 8 {
return "AVX2"
}
return t.CPUfeature()
}
var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", `
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
ones := x.Equal(x).AsInt{{.WxC}}()
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
ones := x.Equal(x).AsInt{{.WxC}}()
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}()
{{- if eq .Width 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
{{- else}}
signs := ones.ShiftAllLeft({{.Width}}-1)
{{- end }}
return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}()
return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
}
`)
var unsafePATemplate = templateOf("unsafe PA helper", ` var unsafePATemplate = templateOf("unsafe PA helper", `
// pa{{.Vec}} returns a type-unsafe pointer to array that can // pa{{.Vec}} returns a type-unsafe pointer to array that can
// only be used with partial load/store operations that only // only be used with partial load/store operations that only
@ -591,6 +725,8 @@ func main() {
avx2SmallLoadSlicePartTemplate, avx2SmallLoadSlicePartTemplate,
avx2MaskedTemplate, avx2MaskedTemplate,
avx512MaskedTemplate, avx512MaskedTemplate,
avx2SignedComparisonsTemplate,
avx2UnsignedComparisonsTemplate,
broadcastTemplate, broadcastTemplate,
) )
} }

View file

@ -3822,61 +3822,21 @@ func (x Float64x4) Greater(y Float64x4) Mask64x4
// Asm: VCMPPD, CPU Feature: AVX512 // Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Greater(y Float64x8) Mask64x8 func (x Float64x8) Greater(y Float64x8) Mask64x8
// Greater compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x16) Greater(y Uint8x16) Mask8x16
// Greater compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x32) Greater(y Uint8x32) Mask8x32
// Greater compares for greater than. // Greater compares for greater than.
// //
// Asm: VPCMPUB, CPU Feature: AVX512 // Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Greater(y Uint8x64) Mask8x64 func (x Uint8x64) Greater(y Uint8x64) Mask8x64
// Greater compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x8) Greater(y Uint16x8) Mask16x8
// Greater compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x16) Greater(y Uint16x16) Mask16x16
// Greater compares for greater than. // Greater compares for greater than.
// //
// Asm: VPCMPUW, CPU Feature: AVX512 // Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Greater(y Uint16x32) Mask16x32 func (x Uint16x32) Greater(y Uint16x32) Mask16x32
// Greater compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x4) Greater(y Uint32x4) Mask32x4
// Greater compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x8) Greater(y Uint32x8) Mask32x8
// Greater compares for greater than. // Greater compares for greater than.
// //
// Asm: VPCMPUD, CPU Feature: AVX512 // Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Greater(y Uint32x16) Mask32x16 func (x Uint32x16) Greater(y Uint32x16) Mask32x16
// Greater compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x2) Greater(y Uint64x2) Mask64x2
// Greater compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x4) Greater(y Uint64x4) Mask64x4
// Greater compares for greater than. // Greater compares for greater than.
// //
// Asm: VPCMPUQ, CPU Feature: AVX512 // Asm: VPCMPUQ, CPU Feature: AVX512
@ -3914,121 +3874,41 @@ func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
// Asm: VCMPPD, CPU Feature: AVX512 // Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8 func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPB, CPU Feature: AVX512 // Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64 func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPW, CPU Feature: AVX512 // Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32 func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPD, CPU Feature: AVX512 // Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16 func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPQ, CPU Feature: AVX512 // Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8 func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPUB, CPU Feature: AVX512 // Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64 func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPUW, CPU Feature: AVX512 // Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32 func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPUD, CPU Feature: AVX512 // Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16 func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2
// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4
// GreaterEqual compares for greater than or equal. // GreaterEqual compares for greater than or equal.
// //
// Asm: VPCMPUQ, CPU Feature: AVX512 // Asm: VPCMPUQ, CPU Feature: AVX512
@ -4566,121 +4446,41 @@ func (x Float64x4) Less(y Float64x4) Mask64x4
// Asm: VCMPPD, CPU Feature: AVX512 // Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Less(y Float64x8) Mask64x8 func (x Float64x8) Less(y Float64x8) Mask64x8
// Less compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x16) Less(y Int8x16) Mask8x16
// Less compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x32) Less(y Int8x32) Mask8x32
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPB, CPU Feature: AVX512 // Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) Less(y Int8x64) Mask8x64 func (x Int8x64) Less(y Int8x64) Mask8x64
// Less compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x8) Less(y Int16x8) Mask16x8
// Less compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x16) Less(y Int16x16) Mask16x16
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPW, CPU Feature: AVX512 // Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) Less(y Int16x32) Mask16x32 func (x Int16x32) Less(y Int16x32) Mask16x32
// Less compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x4) Less(y Int32x4) Mask32x4
// Less compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x8) Less(y Int32x8) Mask32x8
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPD, CPU Feature: AVX512 // Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) Less(y Int32x16) Mask32x16 func (x Int32x16) Less(y Int32x16) Mask32x16
// Less compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x2) Less(y Int64x2) Mask64x2
// Less compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x4) Less(y Int64x4) Mask64x4
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPQ, CPU Feature: AVX512 // Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) Less(y Int64x8) Mask64x8 func (x Int64x8) Less(y Int64x8) Mask64x8
// Less compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x16) Less(y Uint8x16) Mask8x16
// Less compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x32) Less(y Uint8x32) Mask8x32
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPUB, CPU Feature: AVX512 // Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Less(y Uint8x64) Mask8x64 func (x Uint8x64) Less(y Uint8x64) Mask8x64
// Less compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x8) Less(y Uint16x8) Mask16x8
// Less compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x16) Less(y Uint16x16) Mask16x16
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPUW, CPU Feature: AVX512 // Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Less(y Uint16x32) Mask16x32 func (x Uint16x32) Less(y Uint16x32) Mask16x32
// Less compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x4) Less(y Uint32x4) Mask32x4
// Less compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x8) Less(y Uint32x8) Mask32x8
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPUD, CPU Feature: AVX512 // Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Less(y Uint32x16) Mask32x16 func (x Uint32x16) Less(y Uint32x16) Mask32x16
// Less compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x2) Less(y Uint64x2) Mask64x2
// Less compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x4) Less(y Uint64x4) Mask64x4
// Less compares for less than. // Less compares for less than.
// //
// Asm: VPCMPUQ, CPU Feature: AVX512 // Asm: VPCMPUQ, CPU Feature: AVX512
@ -4718,121 +4518,41 @@ func (x Float64x4) LessEqual(y Float64x4) Mask64x4
// Asm: VCMPPD, CPU Feature: AVX512 // Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) LessEqual(y Float64x8) Mask64x8 func (x Float64x8) LessEqual(y Float64x8) Mask64x8
// LessEqual compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x16) LessEqual(y Int8x16) Mask8x16
// LessEqual compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x32) LessEqual(y Int8x32) Mask8x32
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPB, CPU Feature: AVX512 // Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) LessEqual(y Int8x64) Mask8x64 func (x Int8x64) LessEqual(y Int8x64) Mask8x64
// LessEqual compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x8) LessEqual(y Int16x8) Mask16x8
// LessEqual compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x16) LessEqual(y Int16x16) Mask16x16
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPW, CPU Feature: AVX512 // Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) LessEqual(y Int16x32) Mask16x32 func (x Int16x32) LessEqual(y Int16x32) Mask16x32
// LessEqual compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x4) LessEqual(y Int32x4) Mask32x4
// LessEqual compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x8) LessEqual(y Int32x8) Mask32x8
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPD, CPU Feature: AVX512 // Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) LessEqual(y Int32x16) Mask32x16 func (x Int32x16) LessEqual(y Int32x16) Mask32x16
// LessEqual compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x2) LessEqual(y Int64x2) Mask64x2
// LessEqual compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x4) LessEqual(y Int64x4) Mask64x4
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPQ, CPU Feature: AVX512 // Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) LessEqual(y Int64x8) Mask64x8 func (x Int64x8) LessEqual(y Int64x8) Mask64x8
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPUB, CPU Feature: AVX512 // Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPUW, CPU Feature: AVX512 // Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPUD, CPU Feature: AVX512 // Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2
// LessEqual compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4
// LessEqual compares for less than or equal. // LessEqual compares for less than or equal.
// //
// Asm: VPCMPUQ, CPU Feature: AVX512 // Asm: VPCMPUQ, CPU Feature: AVX512
@ -6644,121 +6364,41 @@ func (x Float64x4) NotEqual(y Float64x4) Mask64x4
// Asm: VCMPPD, CPU Feature: AVX512 // Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) NotEqual(y Float64x8) Mask64x8 func (x Float64x8) NotEqual(y Float64x8) Mask64x8
// NotEqual compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x16) NotEqual(y Int8x16) Mask8x16
// NotEqual compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x32) NotEqual(y Int8x32) Mask8x32
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPB, CPU Feature: AVX512 // Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) NotEqual(y Int8x64) Mask8x64 func (x Int8x64) NotEqual(y Int8x64) Mask8x64
// NotEqual compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x8) NotEqual(y Int16x8) Mask16x8
// NotEqual compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x16) NotEqual(y Int16x16) Mask16x16
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPW, CPU Feature: AVX512 // Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) NotEqual(y Int16x32) Mask16x32 func (x Int16x32) NotEqual(y Int16x32) Mask16x32
// NotEqual compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x4) NotEqual(y Int32x4) Mask32x4
// NotEqual compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x8) NotEqual(y Int32x8) Mask32x8
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPD, CPU Feature: AVX512 // Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) NotEqual(y Int32x16) Mask32x16 func (x Int32x16) NotEqual(y Int32x16) Mask32x16
// NotEqual compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x2) NotEqual(y Int64x2) Mask64x2
// NotEqual compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x4) NotEqual(y Int64x4) Mask64x4
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPQ, CPU Feature: AVX512 // Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) NotEqual(y Int64x8) Mask64x8 func (x Int64x8) NotEqual(y Int64x8) Mask64x8
// NotEqual compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16
// NotEqual compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPUB, CPU Feature: AVX512 // Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64 func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
// NotEqual compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8
// NotEqual compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPUW, CPU Feature: AVX512 // Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32 func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
// NotEqual compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4
// NotEqual compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPUD, CPU Feature: AVX512 // Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16 func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
// NotEqual compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2
// NotEqual compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4
// NotEqual compares for inequality. // NotEqual compares for inequality.
// //
// Asm: VPCMPUQ, CPU Feature: AVX512 // Asm: VPCMPUQ, CPU Feature: AVX512

View file

@ -1500,6 +1500,642 @@ func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
return iy.blendMasked(ix, mask).AsFloat64x8() return iy.blendMasked(ix, mask).AsFloat64x8()
} }
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Int8x16) Less(y Int8x16) Mask8x16 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 {
ones := x.Equal(x).AsInt8x16()
return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Int8x16) LessEqual(y Int8x16) Mask8x16 {
ones := x.Equal(x).AsInt8x16()
return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Int8x16) NotEqual(y Int8x16) Mask8x16 {
ones := x.Equal(x).AsInt8x16()
return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Int16x8) Less(y Int16x8) Mask16x8 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 {
ones := x.Equal(x).AsInt16x8()
return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Int16x8) LessEqual(y Int16x8) Mask16x8 {
ones := x.Equal(x).AsInt16x8()
return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Int16x8) NotEqual(y Int16x8) Mask16x8 {
ones := x.Equal(x).AsInt16x8()
return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Int32x4) Less(y Int32x4) Mask32x4 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 {
ones := x.Equal(x).AsInt32x4()
return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Int32x4) LessEqual(y Int32x4) Mask32x4 {
ones := x.Equal(x).AsInt32x4()
return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Int32x4) NotEqual(y Int32x4) Mask32x4 {
ones := x.Equal(x).AsInt32x4()
return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Int64x2) Less(y Int64x2) Mask64x2 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 {
ones := x.Equal(x).AsInt64x2()
return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Int64x2) LessEqual(y Int64x2) Mask64x2 {
ones := x.Equal(x).AsInt64x2()
return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Int64x2) NotEqual(y Int64x2) Mask64x2 {
ones := x.Equal(x).AsInt64x2()
return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Int8x32) Less(y Int8x32) Mask8x32 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 {
ones := x.Equal(x).AsInt8x32()
return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Int8x32) LessEqual(y Int8x32) Mask8x32 {
ones := x.Equal(x).AsInt8x32()
return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Int8x32) NotEqual(y Int8x32) Mask8x32 {
ones := x.Equal(x).AsInt8x32()
return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Int16x16) Less(y Int16x16) Mask16x16 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 {
ones := x.Equal(x).AsInt16x16()
return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Int16x16) LessEqual(y Int16x16) Mask16x16 {
ones := x.Equal(x).AsInt16x16()
return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Int16x16) NotEqual(y Int16x16) Mask16x16 {
ones := x.Equal(x).AsInt16x16()
return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Int32x8) Less(y Int32x8) Mask32x8 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 {
ones := x.Equal(x).AsInt32x8()
return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Int32x8) LessEqual(y Int32x8) Mask32x8 {
ones := x.Equal(x).AsInt32x8()
return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Int32x8) NotEqual(y Int32x8) Mask32x8 {
ones := x.Equal(x).AsInt32x8()
return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8()
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Int64x4) Less(y Int64x4) Mask64x4 {
return y.Greater(x)
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 {
ones := x.Equal(x).AsInt64x4()
return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Int64x4) LessEqual(y Int64x4) Mask64x4 {
ones := x.Equal(x).AsInt64x4()
return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Int64x4) NotEqual(y Int64x4) Mask64x4 {
ones := x.Equal(x).AsInt64x4()
return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX2
func (x Uint8x16) Greater(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
signs := BroadcastInt8x16(-1 << (8 - 1))
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Uint8x16) Less(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
signs := BroadcastInt8x16(-1 << (8 - 1))
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
ones := x.Equal(x).AsInt8x16()
signs := BroadcastInt8x16(-1 << (8 - 1))
return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
ones := x.Equal(x).AsInt8x16()
signs := BroadcastInt8x16(-1 << (8 - 1))
return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
ones := x.Equal(x).AsInt8x16()
return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX
func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).AsInt16x8()
signs := ones.ShiftAllLeft(16 - 1)
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).AsInt16x8()
signs := ones.ShiftAllLeft(16 - 1)
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).AsInt16x8()
signs := ones.ShiftAllLeft(16 - 1)
return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).AsInt16x8()
signs := ones.ShiftAllLeft(16 - 1)
return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).AsInt16x8()
return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX
func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).AsInt32x4()
signs := ones.ShiftAllLeft(32 - 1)
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).AsInt32x4()
signs := ones.ShiftAllLeft(32 - 1)
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).AsInt32x4()
signs := ones.ShiftAllLeft(32 - 1)
return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).AsInt32x4()
signs := ones.ShiftAllLeft(32 - 1)
return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).AsInt32x4()
return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX
func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).AsInt64x2()
signs := ones.ShiftAllLeft(64 - 1)
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX
func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).AsInt64x2()
signs := ones.ShiftAllLeft(64 - 1)
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).AsInt64x2()
signs := ones.ShiftAllLeft(64 - 1)
return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).AsInt64x2()
signs := ones.ShiftAllLeft(64 - 1)
return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX
func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).AsInt64x2()
return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX2
func (x Uint8x32) Greater(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
signs := BroadcastInt8x32(-1 << (8 - 1))
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Uint8x32) Less(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
signs := BroadcastInt8x32(-1 << (8 - 1))
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
ones := x.Equal(x).AsInt8x32()
signs := BroadcastInt8x32(-1 << (8 - 1))
return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
ones := x.Equal(x).AsInt8x32()
signs := BroadcastInt8x32(-1 << (8 - 1))
return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
ones := x.Equal(x).AsInt8x32()
return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX2
func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).AsInt16x16()
signs := ones.ShiftAllLeft(16 - 1)
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).AsInt16x16()
signs := ones.ShiftAllLeft(16 - 1)
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).AsInt16x16()
signs := ones.ShiftAllLeft(16 - 1)
return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).AsInt16x16()
signs := ones.ShiftAllLeft(16 - 1)
return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).AsInt16x16()
return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX2
func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).AsInt32x8()
signs := ones.ShiftAllLeft(32 - 1)
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).AsInt32x8()
signs := ones.ShiftAllLeft(32 - 1)
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).AsInt32x8()
signs := ones.ShiftAllLeft(32 - 1)
return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).AsInt32x8()
signs := ones.ShiftAllLeft(32 - 1)
return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).AsInt32x8()
return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8()
}
// Greater returns a mask whose elements indicate whether x > y
//
// Emulated, CPU Feature AVX2
func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).AsInt64x4()
signs := ones.ShiftAllLeft(64 - 1)
return a.Xor(signs).Greater(b.Xor(signs))
}
// Less returns a mask whose elements indicate whether x < y
//
// Emulated, CPU Feature AVX2
func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).AsInt64x4()
signs := ones.ShiftAllLeft(64 - 1)
return b.Xor(signs).Greater(a.Xor(signs))
}
// GreaterEqual returns a mask whose elements indicate whether x >= y
//
// Emulated, CPU Feature AVX2
func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).AsInt64x4()
signs := ones.ShiftAllLeft(64 - 1)
return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
}
// LessEqual returns a mask whose elements indicate whether x <= y
//
// Emulated, CPU Feature AVX2
func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).AsInt64x4()
signs := ones.ShiftAllLeft(64 - 1)
return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
}
// NotEqual returns a mask whose elements indicate whether x != y
//
// Emulated, CPU Feature AVX2
func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).AsInt64x4()
return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4()
}
// BroadcastInt8x16 returns a vector with the input // BroadcastInt8x16 returns a vector with the input
// x assigned to all elements of the output. // x assigned to all elements of the output.
// //