mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd: add emulations for missing AVX2 comparisons
this also removes AVX512 versions of the operations that would use the same names, but not run on AVX2-only includes files generated by simdgen CL 692355 Change-Id: Iff29042245b7688133fed49a03e681e85235b8a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/692335 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
ddb689c7bb
commit
2080415aa2
11 changed files with 855 additions and 2827 deletions
|
|
@ -886,29 +886,13 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
|
||||
case ssa.OpAMD64VCMPPS512,
|
||||
ssa.OpAMD64VCMPPD512,
|
||||
ssa.OpAMD64VPCMPUB128,
|
||||
ssa.OpAMD64VPCMPUB256,
|
||||
ssa.OpAMD64VPCMPUB512,
|
||||
ssa.OpAMD64VPCMPUW128,
|
||||
ssa.OpAMD64VPCMPUW256,
|
||||
ssa.OpAMD64VPCMPUW512,
|
||||
ssa.OpAMD64VPCMPUD128,
|
||||
ssa.OpAMD64VPCMPUD256,
|
||||
ssa.OpAMD64VPCMPUD512,
|
||||
ssa.OpAMD64VPCMPUQ128,
|
||||
ssa.OpAMD64VPCMPUQ256,
|
||||
ssa.OpAMD64VPCMPUQ512,
|
||||
ssa.OpAMD64VPCMPB128,
|
||||
ssa.OpAMD64VPCMPB256,
|
||||
ssa.OpAMD64VPCMPB512,
|
||||
ssa.OpAMD64VPCMPW128,
|
||||
ssa.OpAMD64VPCMPW256,
|
||||
ssa.OpAMD64VPCMPW512,
|
||||
ssa.OpAMD64VPCMPD128,
|
||||
ssa.OpAMD64VPCMPD256,
|
||||
ssa.OpAMD64VPCMPD512,
|
||||
ssa.OpAMD64VPCMPQ128,
|
||||
ssa.OpAMD64VPCMPQ256,
|
||||
ssa.OpAMD64VPCMPQ512:
|
||||
p = simdV2kImm8(s, v)
|
||||
|
||||
|
|
|
|||
|
|
@ -590,17 +590,9 @@
|
|||
(GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
|
||||
(GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
|
||||
(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
|
||||
(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
|
||||
(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
|
||||
(GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
|
||||
(GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [14] x y))
|
||||
(GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [14] x y))
|
||||
(GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
|
||||
(GreaterUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [14] x y))
|
||||
(GreaterUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [14] x y))
|
||||
(GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
|
||||
(GreaterUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y))
|
||||
(GreaterUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y))
|
||||
(GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
|
||||
(GreaterEqualFloat32x4 x y) => (VCMPPS128 [13] x y)
|
||||
(GreaterEqualFloat32x8 x y) => (VCMPPS256 [13] x y)
|
||||
|
|
@ -608,29 +600,13 @@
|
|||
(GreaterEqualFloat64x2 x y) => (VCMPPD128 [13] x y)
|
||||
(GreaterEqualFloat64x4 x y) => (VCMPPD256 [13] x y)
|
||||
(GreaterEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
|
||||
(GreaterEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [13] x y))
|
||||
(GreaterEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [13] x y))
|
||||
(GreaterEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
|
||||
(GreaterEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [13] x y))
|
||||
(GreaterEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [13] x y))
|
||||
(GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
|
||||
(GreaterEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [13] x y))
|
||||
(GreaterEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [13] x y))
|
||||
(GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
|
||||
(GreaterEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [13] x y))
|
||||
(GreaterEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [13] x y))
|
||||
(GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
|
||||
(GreaterEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [13] x y))
|
||||
(GreaterEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [13] x y))
|
||||
(GreaterEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
|
||||
(GreaterEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [13] x y))
|
||||
(GreaterEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [13] x y))
|
||||
(GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
|
||||
(GreaterEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [13] x y))
|
||||
(GreaterEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [13] x y))
|
||||
(GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
|
||||
(GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y))
|
||||
(GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y))
|
||||
(GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
|
||||
(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
|
||||
(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
|
||||
|
|
@ -710,29 +686,13 @@
|
|||
(LessFloat64x2 x y) => (VCMPPD128 [1] x y)
|
||||
(LessFloat64x4 x y) => (VCMPPD256 [1] x y)
|
||||
(LessFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
|
||||
(LessInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [1] x y))
|
||||
(LessInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [1] x y))
|
||||
(LessInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
|
||||
(LessInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [1] x y))
|
||||
(LessInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [1] x y))
|
||||
(LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
|
||||
(LessInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [1] x y))
|
||||
(LessInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [1] x y))
|
||||
(LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
|
||||
(LessInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [1] x y))
|
||||
(LessInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [1] x y))
|
||||
(LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
|
||||
(LessUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [1] x y))
|
||||
(LessUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [1] x y))
|
||||
(LessUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
|
||||
(LessUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [1] x y))
|
||||
(LessUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [1] x y))
|
||||
(LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
|
||||
(LessUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [1] x y))
|
||||
(LessUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [1] x y))
|
||||
(LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
|
||||
(LessUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y))
|
||||
(LessUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y))
|
||||
(LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
|
||||
(LessEqualFloat32x4 x y) => (VCMPPS128 [2] x y)
|
||||
(LessEqualFloat32x8 x y) => (VCMPPS256 [2] x y)
|
||||
|
|
@ -740,29 +700,13 @@
|
|||
(LessEqualFloat64x2 x y) => (VCMPPD128 [2] x y)
|
||||
(LessEqualFloat64x4 x y) => (VCMPPD256 [2] x y)
|
||||
(LessEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
|
||||
(LessEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [2] x y))
|
||||
(LessEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [2] x y))
|
||||
(LessEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
|
||||
(LessEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [2] x y))
|
||||
(LessEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [2] x y))
|
||||
(LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
|
||||
(LessEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [2] x y))
|
||||
(LessEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [2] x y))
|
||||
(LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
|
||||
(LessEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [2] x y))
|
||||
(LessEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [2] x y))
|
||||
(LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
|
||||
(LessEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [2] x y))
|
||||
(LessEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [2] x y))
|
||||
(LessEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
|
||||
(LessEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [2] x y))
|
||||
(LessEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [2] x y))
|
||||
(LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
|
||||
(LessEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [2] x y))
|
||||
(LessEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [2] x y))
|
||||
(LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
|
||||
(LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y))
|
||||
(LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y))
|
||||
(LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
|
||||
(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
|
||||
(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
|
||||
|
|
@ -1050,29 +994,13 @@
|
|||
(NotEqualFloat64x2 x y) => (VCMPPD128 [4] x y)
|
||||
(NotEqualFloat64x4 x y) => (VCMPPD256 [4] x y)
|
||||
(NotEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
|
||||
(NotEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [4] x y))
|
||||
(NotEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [4] x y))
|
||||
(NotEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
|
||||
(NotEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [4] x y))
|
||||
(NotEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [4] x y))
|
||||
(NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
|
||||
(NotEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [4] x y))
|
||||
(NotEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [4] x y))
|
||||
(NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
|
||||
(NotEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [4] x y))
|
||||
(NotEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [4] x y))
|
||||
(NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
|
||||
(NotEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [4] x y))
|
||||
(NotEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [4] x y))
|
||||
(NotEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
|
||||
(NotEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [4] x y))
|
||||
(NotEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [4] x y))
|
||||
(NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
|
||||
(NotEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [4] x y))
|
||||
(NotEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [4] x y))
|
||||
(NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
|
||||
(NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y))
|
||||
(NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y))
|
||||
(NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
|
||||
(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
|
||||
(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
|
||||
|
|
|
|||
|
|
@ -986,29 +986,13 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPB128", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPW128", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPW256", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPD128", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPD256", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPQ128", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPQ256", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPROLD128", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPROLD256", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -514,17 +514,9 @@ func simdGenericOps() []opData {
|
|||
{name: "GreaterEqualFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualFloat64x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt8x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt8x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt8x64", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt16x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt16x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt32x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt32x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt64x2", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt64x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
|
||||
{name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
|
||||
|
|
@ -556,17 +548,9 @@ func simdGenericOps() []opData {
|
|||
{name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
|
||||
{name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
|
||||
{name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
|
||||
{name: "GreaterEqualUint8x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint8x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint16x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint16x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint32x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint32x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint64x2", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint64x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterEqualUint64x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterFloat32x8", argLength: 2, commutative: false},
|
||||
|
|
@ -616,17 +600,9 @@ func simdGenericOps() []opData {
|
|||
{name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
|
||||
{name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
|
||||
{name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
|
||||
{name: "GreaterUint8x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint8x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint8x64", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint16x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint16x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint16x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint32x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint32x8", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint32x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint64x2", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint64x4", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint64x8", argLength: 2, commutative: false},
|
||||
{name: "IsNanFloat32x4", argLength: 2, commutative: true},
|
||||
{name: "IsNanFloat32x8", argLength: 2, commutative: true},
|
||||
|
|
@ -646,17 +622,9 @@ func simdGenericOps() []opData {
|
|||
{name: "LessEqualFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "LessEqualFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "LessEqualFloat64x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt8x16", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt8x32", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt8x64", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt16x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt16x16", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt16x32", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt32x4", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt32x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt32x16", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt64x2", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt64x4", argLength: 2, commutative: false},
|
||||
{name: "LessEqualInt64x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
|
||||
{name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
|
||||
|
|
@ -688,17 +656,9 @@ func simdGenericOps() []opData {
|
|||
{name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
|
||||
{name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
|
||||
{name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
|
||||
{name: "LessEqualUint8x16", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint8x32", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint8x64", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint16x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint16x16", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint16x32", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint32x4", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint32x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint32x16", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint64x2", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint64x4", argLength: 2, commutative: false},
|
||||
{name: "LessEqualUint64x8", argLength: 2, commutative: false},
|
||||
{name: "LessFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "LessFloat32x8", argLength: 2, commutative: false},
|
||||
|
|
@ -706,17 +666,9 @@ func simdGenericOps() []opData {
|
|||
{name: "LessFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "LessFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "LessFloat64x8", argLength: 2, commutative: false},
|
||||
{name: "LessInt8x16", argLength: 2, commutative: false},
|
||||
{name: "LessInt8x32", argLength: 2, commutative: false},
|
||||
{name: "LessInt8x64", argLength: 2, commutative: false},
|
||||
{name: "LessInt16x8", argLength: 2, commutative: false},
|
||||
{name: "LessInt16x16", argLength: 2, commutative: false},
|
||||
{name: "LessInt16x32", argLength: 2, commutative: false},
|
||||
{name: "LessInt32x4", argLength: 2, commutative: false},
|
||||
{name: "LessInt32x8", argLength: 2, commutative: false},
|
||||
{name: "LessInt32x16", argLength: 2, commutative: false},
|
||||
{name: "LessInt64x2", argLength: 2, commutative: false},
|
||||
{name: "LessInt64x4", argLength: 2, commutative: false},
|
||||
{name: "LessInt64x8", argLength: 2, commutative: false},
|
||||
{name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
|
||||
{name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
|
||||
|
|
@ -748,17 +700,9 @@ func simdGenericOps() []opData {
|
|||
{name: "LessMaskedUint64x2", argLength: 3, commutative: false},
|
||||
{name: "LessMaskedUint64x4", argLength: 3, commutative: false},
|
||||
{name: "LessMaskedUint64x8", argLength: 3, commutative: false},
|
||||
{name: "LessUint8x16", argLength: 2, commutative: false},
|
||||
{name: "LessUint8x32", argLength: 2, commutative: false},
|
||||
{name: "LessUint8x64", argLength: 2, commutative: false},
|
||||
{name: "LessUint16x8", argLength: 2, commutative: false},
|
||||
{name: "LessUint16x16", argLength: 2, commutative: false},
|
||||
{name: "LessUint16x32", argLength: 2, commutative: false},
|
||||
{name: "LessUint32x4", argLength: 2, commutative: false},
|
||||
{name: "LessUint32x8", argLength: 2, commutative: false},
|
||||
{name: "LessUint32x16", argLength: 2, commutative: false},
|
||||
{name: "LessUint64x2", argLength: 2, commutative: false},
|
||||
{name: "LessUint64x4", argLength: 2, commutative: false},
|
||||
{name: "LessUint64x8", argLength: 2, commutative: false},
|
||||
{name: "MaxFloat32x4", argLength: 2, commutative: true},
|
||||
{name: "MaxFloat32x8", argLength: 2, commutative: true},
|
||||
|
|
@ -986,17 +930,9 @@ func simdGenericOps() []opData {
|
|||
{name: "NotEqualFloat64x2", argLength: 2, commutative: true},
|
||||
{name: "NotEqualFloat64x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualFloat64x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt8x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt8x32", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt8x64", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt16x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt16x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt16x32", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt32x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt32x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt32x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt64x2", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt64x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt64x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
|
||||
{name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
|
||||
|
|
@ -1028,17 +964,9 @@ func simdGenericOps() []opData {
|
|||
{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
|
||||
{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
|
||||
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
|
||||
{name: "NotEqualUint8x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint8x32", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint8x64", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint16x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint16x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint16x32", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint32x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint32x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint32x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint64x2", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint64x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint64x8", argLength: 2, commutative: true},
|
||||
{name: "OnesCountInt8x16", argLength: 1, commutative: false},
|
||||
{name: "OnesCountInt8x32", argLength: 1, commutative: false},
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -602,17 +602,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Greater", opLen2(ssa.OpGreaterUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.Greater", opLen2(ssa.OpGreaterUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Greater", opLen2(ssa.OpGreaterUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.Greater", opLen2(ssa.OpGreaterUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Greater", opLen2(ssa.OpGreaterUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.Greater", opLen2(ssa.OpGreaterUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -620,29 +612,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -722,29 +698,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Less", opLen2(ssa.OpLessInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.Less", opLen2(ssa.OpLessInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Less", opLen2(ssa.OpLessInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.Less", opLen2(ssa.OpLessInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Less", opLen2(ssa.OpLessInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.Less", opLen2(ssa.OpLessInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Less", opLen2(ssa.OpLessInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.Less", opLen2(ssa.OpLessInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Less", opLen2(ssa.OpLessUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.Less", opLen2(ssa.OpLessUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Less", opLen2(ssa.OpLessUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.Less", opLen2(ssa.OpLessUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Less", opLen2(ssa.OpLessUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.Less", opLen2(ssa.OpLessUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Less", opLen2(ssa.OpLessUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.Less", opLen2(ssa.OpLessUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -752,29 +712,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.LessEqual", opLen2(ssa.OpLessEqualInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.LessEqual", opLen2(ssa.OpLessEqualInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.LessEqual", opLen2(ssa.OpLessEqualInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.LessEqual", opLen2(ssa.OpLessEqualInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.LessEqual", opLen2(ssa.OpLessEqualInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.LessEqual", opLen2(ssa.OpLessEqualInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.LessEqual", opLen2(ssa.OpLessEqualInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.LessEqual", opLen2(ssa.OpLessEqualInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.LessEqual", opLen2(ssa.OpLessEqualUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.LessEqual", opLen2(ssa.OpLessEqualUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.LessEqual", opLen2(ssa.OpLessEqualUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.LessEqual", opLen2(ssa.OpLessEqualUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.LessEqual", opLen2(ssa.OpLessEqualUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.LessEqual", opLen2(ssa.OpLessEqualUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -1062,29 +1006,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.NotEqual", opLen2(ssa.OpNotEqualInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.NotEqual", opLen2(ssa.OpNotEqualInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.NotEqual", opLen2(ssa.OpNotEqualInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.NotEqual", opLen2(ssa.OpNotEqualInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.NotEqual", opLen2(ssa.OpNotEqualInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.NotEqual", opLen2(ssa.OpNotEqualInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.NotEqual", opLen2(ssa.OpNotEqualUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.NotEqual", opLen2(ssa.OpNotEqualUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.NotEqual", opLen2(ssa.OpNotEqualUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.NotEqual", opLen2(ssa.OpNotEqualUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.NotEqual", opLen2(ssa.OpNotEqualUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.NotEqual", opLen2(ssa.OpNotEqualUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -59,7 +59,6 @@ func TestLess(t *testing.T) {
|
|||
testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64])
|
||||
testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64])
|
||||
|
||||
if comparisonFixed {
|
||||
testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
|
||||
testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
|
||||
testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
|
||||
|
|
@ -69,7 +68,23 @@ func TestLess(t *testing.T) {
|
|||
testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
|
||||
testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
|
||||
|
||||
}
|
||||
testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
|
||||
testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
|
||||
testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
|
||||
testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
|
||||
testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
|
||||
testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
|
||||
testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
|
||||
testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
|
||||
|
||||
testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
|
||||
testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16])
|
||||
testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32])
|
||||
testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32])
|
||||
testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64])
|
||||
testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64])
|
||||
testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8])
|
||||
testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8])
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
|
||||
|
|
@ -100,7 +115,6 @@ func TestLessEqual(t *testing.T) {
|
|||
testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64])
|
||||
testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64])
|
||||
|
||||
if comparisonFixed {
|
||||
testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16])
|
||||
testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16])
|
||||
testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32])
|
||||
|
|
@ -110,9 +124,6 @@ func TestLessEqual(t *testing.T) {
|
|||
testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8])
|
||||
testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8])
|
||||
|
||||
}
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16])
|
||||
testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16])
|
||||
testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32])
|
||||
|
|
@ -122,6 +133,7 @@ func TestLessEqual(t *testing.T) {
|
|||
testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8])
|
||||
testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8])
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32])
|
||||
testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64])
|
||||
testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8])
|
||||
|
|
@ -151,7 +163,6 @@ func TestGreater(t *testing.T) {
|
|||
testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8])
|
||||
testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8])
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16])
|
||||
testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16])
|
||||
testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32])
|
||||
|
|
@ -162,6 +173,8 @@ func TestGreater(t *testing.T) {
|
|||
testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8])
|
||||
testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8])
|
||||
|
||||
if simd.HasAVX512() {
|
||||
|
||||
testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32])
|
||||
testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64])
|
||||
testInt8x64Compare(t, simd.Int8x64.Greater, greaterSlice[int8])
|
||||
|
|
@ -181,7 +194,6 @@ func TestGreaterEqual(t *testing.T) {
|
|||
testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
|
||||
testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
|
||||
|
||||
if comparisonFixed {
|
||||
testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
|
||||
testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
|
||||
testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
|
||||
|
|
@ -191,9 +203,6 @@ func TestGreaterEqual(t *testing.T) {
|
|||
testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
|
||||
testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
|
||||
|
||||
}
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
|
||||
testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
|
||||
testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
|
||||
|
|
@ -203,6 +212,7 @@ func TestGreaterEqual(t *testing.T) {
|
|||
testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
|
||||
testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
|
||||
testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64])
|
||||
testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8])
|
||||
|
|
@ -260,7 +270,6 @@ func TestNotEqual(t *testing.T) {
|
|||
testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64])
|
||||
testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64])
|
||||
|
||||
if comparisonFixed {
|
||||
testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16])
|
||||
testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16])
|
||||
testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32])
|
||||
|
|
@ -278,7 +287,6 @@ func TestNotEqual(t *testing.T) {
|
|||
testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64])
|
||||
testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8])
|
||||
testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8])
|
||||
}
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32])
|
||||
|
|
|
|||
|
|
@ -87,6 +87,16 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality
|
|||
floats: []int{32},
|
||||
}
|
||||
|
||||
var avx2SignedComparisons = &shapes{
|
||||
vecs: []int{128, 256},
|
||||
ints: []int{8, 16, 32, 64},
|
||||
}
|
||||
|
||||
var avx2UnsignedComparisons = &shapes{
|
||||
vecs: []int{128, 256},
|
||||
uints: []int{8, 16, 32, 64},
|
||||
}
|
||||
|
||||
type templateData struct {
|
||||
Vec string // the type of the vector, e.g. Float32x4
|
||||
AOrAn string // for documentation, the article "a" or "an"
|
||||
|
|
@ -486,6 +496,130 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
|||
}
|
||||
`)
|
||||
|
||||
func (t templateData) CPUfeature() string {
|
||||
switch t.Vwidth {
|
||||
case 128:
|
||||
return "AVX"
|
||||
case 256:
|
||||
return "AVX2"
|
||||
case 512:
|
||||
return "AVX512"
|
||||
}
|
||||
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
|
||||
}
|
||||
|
||||
var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", `
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeature}}
|
||||
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeature}}
|
||||
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeature}}
|
||||
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeature}}
|
||||
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
|
||||
}
|
||||
`)
|
||||
|
||||
// CPUfeatureAVX2if8 return AVX2 if the element width is 8,
|
||||
// otherwise, it returns CPUfeature. This is for the cpufeature
|
||||
// of unsigned comparison emulation, which uses shifts for all
|
||||
// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2)
|
||||
// for bytes.
|
||||
func (t templateData) CPUfeatureAVX2if8() string {
|
||||
if t.Width == 8 {
|
||||
return "AVX2"
|
||||
}
|
||||
return t.CPUfeature()
|
||||
}
|
||||
|
||||
var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", `
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
||||
func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} {
|
||||
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
||||
{{- if eq .Width 8}}
|
||||
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
||||
{{- else}}
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
signs := ones.ShiftAllLeft({{.Width}}-1)
|
||||
{{- end }}
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
||||
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
|
||||
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
||||
{{- if eq .Width 8}}
|
||||
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
||||
{{- else}}
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
signs := ones.ShiftAllLeft({{.Width}}-1)
|
||||
{{- end }}
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
||||
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
|
||||
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
{{- if eq .Width 8}}
|
||||
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
||||
{{- else}}
|
||||
signs := ones.ShiftAllLeft({{.Width}}-1)
|
||||
{{- end }}
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
||||
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
|
||||
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
{{- if eq .Width 8}}
|
||||
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
||||
{{- else}}
|
||||
signs := ones.ShiftAllLeft({{.Width}}-1)
|
||||
{{- end }}
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeature}}
|
||||
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
|
||||
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
||||
ones := x.Equal(x).AsInt{{.WxC}}()
|
||||
return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
|
||||
}
|
||||
`)
|
||||
|
||||
var unsafePATemplate = templateOf("unsafe PA helper", `
|
||||
// pa{{.Vec}} returns a type-unsafe pointer to array that can
|
||||
// only be used with partial load/store operations that only
|
||||
|
|
@ -591,6 +725,8 @@ func main() {
|
|||
avx2SmallLoadSlicePartTemplate,
|
||||
avx2MaskedTemplate,
|
||||
avx512MaskedTemplate,
|
||||
avx2SignedComparisonsTemplate,
|
||||
avx2UnsignedComparisonsTemplate,
|
||||
broadcastTemplate,
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3822,61 +3822,21 @@ func (x Float64x4) Greater(y Float64x4) Mask64x4
|
|||
// Asm: VCMPPD, CPU Feature: AVX512
|
||||
func (x Float64x8) Greater(y Float64x8) Mask64x8
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Greater(y Uint8x16) Mask8x16
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x32) Greater(y Uint8x32) Mask8x32
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x64) Greater(y Uint8x64) Mask8x64
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Greater(y Uint16x8) Mask16x8
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x16) Greater(y Uint16x16) Mask16x16
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x32) Greater(y Uint16x32) Mask16x32
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Greater(y Uint32x4) Mask32x4
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x8) Greater(y Uint32x8) Mask32x8
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x16) Greater(y Uint32x16) Mask32x16
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Greater(y Uint64x2) Mask64x2
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x4) Greater(y Uint64x4) Mask64x4
|
||||
|
||||
// Greater compares for greater than.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
|
|
@ -3914,121 +3874,41 @@ func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
|
|||
// Asm: VCMPPD, CPU Feature: AVX512
|
||||
func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4
|
||||
|
||||
// GreaterEqual compares for greater than or equal.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
|
|
@ -4566,121 +4446,41 @@ func (x Float64x4) Less(y Float64x4) Mask64x4
|
|||
// Asm: VCMPPD, CPU Feature: AVX512
|
||||
func (x Float64x8) Less(y Float64x8) Mask64x8
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x16) Less(y Int8x16) Mask8x16
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x32) Less(y Int8x32) Mask8x32
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x64) Less(y Int8x64) Mask8x64
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x8) Less(y Int16x8) Mask16x8
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x16) Less(y Int16x16) Mask16x16
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x32) Less(y Int16x32) Mask16x32
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x4) Less(y Int32x4) Mask32x4
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x8) Less(y Int32x8) Mask32x8
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x16) Less(y Int32x16) Mask32x16
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x2) Less(y Int64x2) Mask64x2
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x4) Less(y Int64x4) Mask64x4
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x8) Less(y Int64x8) Mask64x8
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Less(y Uint8x16) Mask8x16
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x32) Less(y Uint8x32) Mask8x32
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x64) Less(y Uint8x64) Mask8x64
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Less(y Uint16x8) Mask16x8
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x16) Less(y Uint16x16) Mask16x16
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x32) Less(y Uint16x32) Mask16x32
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Less(y Uint32x4) Mask32x4
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x8) Less(y Uint32x8) Mask32x8
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x16) Less(y Uint32x16) Mask32x16
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Less(y Uint64x2) Mask64x2
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x4) Less(y Uint64x4) Mask64x4
|
||||
|
||||
// Less compares for less than.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
|
|
@ -4718,121 +4518,41 @@ func (x Float64x4) LessEqual(y Float64x4) Mask64x4
|
|||
// Asm: VCMPPD, CPU Feature: AVX512
|
||||
func (x Float64x8) LessEqual(y Float64x8) Mask64x8
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x16) LessEqual(y Int8x16) Mask8x16
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x32) LessEqual(y Int8x32) Mask8x32
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x64) LessEqual(y Int8x64) Mask8x64
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x8) LessEqual(y Int16x8) Mask16x8
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x16) LessEqual(y Int16x16) Mask16x16
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x32) LessEqual(y Int16x32) Mask16x32
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x4) LessEqual(y Int32x4) Mask32x4
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x8) LessEqual(y Int32x8) Mask32x8
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x16) LessEqual(y Int32x16) Mask32x16
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x2) LessEqual(y Int64x2) Mask64x2
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x4) LessEqual(y Int64x4) Mask64x4
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x8) LessEqual(y Int64x8) Mask64x8
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4
|
||||
|
||||
// LessEqual compares for less than or equal.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
|
|
@ -6644,121 +6364,41 @@ func (x Float64x4) NotEqual(y Float64x4) Mask64x4
|
|||
// Asm: VCMPPD, CPU Feature: AVX512
|
||||
func (x Float64x8) NotEqual(y Float64x8) Mask64x8
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x16) NotEqual(y Int8x16) Mask8x16
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x32) NotEqual(y Int8x32) Mask8x32
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPB, CPU Feature: AVX512
|
||||
func (x Int8x64) NotEqual(y Int8x64) Mask8x64
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x8) NotEqual(y Int16x8) Mask16x8
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x16) NotEqual(y Int16x16) Mask16x16
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPW, CPU Feature: AVX512
|
||||
func (x Int16x32) NotEqual(y Int16x32) Mask16x32
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x4) NotEqual(y Int32x4) Mask32x4
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x8) NotEqual(y Int32x8) Mask32x8
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPD, CPU Feature: AVX512
|
||||
func (x Int32x16) NotEqual(y Int32x16) Mask32x16
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x2) NotEqual(y Int64x2) Mask64x2
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x4) NotEqual(y Int64x4) Mask64x4
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPQ, CPU Feature: AVX512
|
||||
func (x Int64x8) NotEqual(y Int64x8) Mask64x8
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUB, CPU Feature: AVX512
|
||||
func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUW, CPU Feature: AVX512
|
||||
func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUD, CPU Feature: AVX512
|
||||
func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4
|
||||
|
||||
// NotEqual compares for inequality.
|
||||
//
|
||||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
|
|
|
|||
|
|
@ -1500,6 +1500,642 @@ func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
|
|||
return iy.blendMasked(ix, mask).AsFloat64x8()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int8x16) Less(y Int8x16) Mask8x16 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 {
|
||||
ones := x.Equal(x).AsInt8x16()
|
||||
return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int8x16) LessEqual(y Int8x16) Mask8x16 {
|
||||
ones := x.Equal(x).AsInt8x16()
|
||||
return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int8x16) NotEqual(y Int8x16) Mask8x16 {
|
||||
ones := x.Equal(x).AsInt8x16()
|
||||
return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int16x8) Less(y Int16x8) Mask16x8 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 {
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int16x8) LessEqual(y Int16x8) Mask16x8 {
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int16x8) NotEqual(y Int16x8) Mask16x8 {
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int32x4) Less(y Int32x4) Mask32x4 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 {
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int32x4) LessEqual(y Int32x4) Mask32x4 {
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int32x4) NotEqual(y Int32x4) Mask32x4 {
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int64x2) Less(y Int64x2) Mask64x2 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 {
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int64x2) LessEqual(y Int64x2) Mask64x2 {
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Int64x2) NotEqual(y Int64x2) Mask64x2 {
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int8x32) Less(y Int8x32) Mask8x32 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 {
|
||||
ones := x.Equal(x).AsInt8x32()
|
||||
return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int8x32) LessEqual(y Int8x32) Mask8x32 {
|
||||
ones := x.Equal(x).AsInt8x32()
|
||||
return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int8x32) NotEqual(y Int8x32) Mask8x32 {
|
||||
ones := x.Equal(x).AsInt8x32()
|
||||
return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int16x16) Less(y Int16x16) Mask16x16 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 {
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int16x16) LessEqual(y Int16x16) Mask16x16 {
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int16x16) NotEqual(y Int16x16) Mask16x16 {
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int32x8) Less(y Int32x8) Mask32x8 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 {
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int32x8) LessEqual(y Int32x8) Mask32x8 {
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int32x8) NotEqual(y Int32x8) Mask32x8 {
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8()
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int64x4) Less(y Int64x4) Mask64x4 {
|
||||
return y.Greater(x)
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 {
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int64x4) LessEqual(y Int64x4) Mask64x4 {
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Int64x4) NotEqual(y Int64x4) Mask64x4 {
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x16) Greater(y Uint8x16) Mask8x16 {
|
||||
a, b := x.AsInt8x16(), y.AsInt8x16()
|
||||
signs := BroadcastInt8x16(-1 << (8 - 1))
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x16) Less(y Uint8x16) Mask8x16 {
|
||||
a, b := x.AsInt8x16(), y.AsInt8x16()
|
||||
signs := BroadcastInt8x16(-1 << (8 - 1))
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
|
||||
a, b := x.AsInt8x16(), y.AsInt8x16()
|
||||
ones := x.Equal(x).AsInt8x16()
|
||||
signs := BroadcastInt8x16(-1 << (8 - 1))
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
|
||||
a, b := x.AsInt8x16(), y.AsInt8x16()
|
||||
ones := x.Equal(x).AsInt8x16()
|
||||
signs := BroadcastInt8x16(-1 << (8 - 1))
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 {
|
||||
a, b := x.AsInt8x16(), y.AsInt8x16()
|
||||
ones := x.Equal(x).AsInt8x16()
|
||||
return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
|
||||
a, b := x.AsInt16x8(), y.AsInt16x8()
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
|
||||
a, b := x.AsInt16x8(), y.AsInt16x8()
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
|
||||
a, b := x.AsInt16x8(), y.AsInt16x8()
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
|
||||
a, b := x.AsInt16x8(), y.AsInt16x8()
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 {
|
||||
a, b := x.AsInt16x8(), y.AsInt16x8()
|
||||
ones := x.Equal(x).AsInt16x8()
|
||||
return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
|
||||
a, b := x.AsInt32x4(), y.AsInt32x4()
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
|
||||
a, b := x.AsInt32x4(), y.AsInt32x4()
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
|
||||
a, b := x.AsInt32x4(), y.AsInt32x4()
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
|
||||
a, b := x.AsInt32x4(), y.AsInt32x4()
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 {
|
||||
a, b := x.AsInt32x4(), y.AsInt32x4()
|
||||
ones := x.Equal(x).AsInt32x4()
|
||||
return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
|
||||
a, b := x.AsInt64x2(), y.AsInt64x2()
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
|
||||
a, b := x.AsInt64x2(), y.AsInt64x2()
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
|
||||
a, b := x.AsInt64x2(), y.AsInt64x2()
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
|
||||
a, b := x.AsInt64x2(), y.AsInt64x2()
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX
|
||||
func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 {
|
||||
a, b := x.AsInt64x2(), y.AsInt64x2()
|
||||
ones := x.Equal(x).AsInt64x2()
|
||||
return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x32) Greater(y Uint8x32) Mask8x32 {
|
||||
a, b := x.AsInt8x32(), y.AsInt8x32()
|
||||
signs := BroadcastInt8x32(-1 << (8 - 1))
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x32) Less(y Uint8x32) Mask8x32 {
|
||||
a, b := x.AsInt8x32(), y.AsInt8x32()
|
||||
signs := BroadcastInt8x32(-1 << (8 - 1))
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
|
||||
a, b := x.AsInt8x32(), y.AsInt8x32()
|
||||
ones := x.Equal(x).AsInt8x32()
|
||||
signs := BroadcastInt8x32(-1 << (8 - 1))
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
|
||||
a, b := x.AsInt8x32(), y.AsInt8x32()
|
||||
ones := x.Equal(x).AsInt8x32()
|
||||
signs := BroadcastInt8x32(-1 << (8 - 1))
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 {
|
||||
a, b := x.AsInt8x32(), y.AsInt8x32()
|
||||
ones := x.Equal(x).AsInt8x32()
|
||||
return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
|
||||
a, b := x.AsInt16x16(), y.AsInt16x16()
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
|
||||
a, b := x.AsInt16x16(), y.AsInt16x16()
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
|
||||
a, b := x.AsInt16x16(), y.AsInt16x16()
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
|
||||
a, b := x.AsInt16x16(), y.AsInt16x16()
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
signs := ones.ShiftAllLeft(16 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 {
|
||||
a, b := x.AsInt16x16(), y.AsInt16x16()
|
||||
ones := x.Equal(x).AsInt16x16()
|
||||
return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
|
||||
a, b := x.AsInt32x8(), y.AsInt32x8()
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
|
||||
a, b := x.AsInt32x8(), y.AsInt32x8()
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
|
||||
a, b := x.AsInt32x8(), y.AsInt32x8()
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
|
||||
a, b := x.AsInt32x8(), y.AsInt32x8()
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
signs := ones.ShiftAllLeft(32 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 {
|
||||
a, b := x.AsInt32x8(), y.AsInt32x8()
|
||||
ones := x.Equal(x).AsInt32x8()
|
||||
return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8()
|
||||
}
|
||||
|
||||
// Greater returns a mask whose elements indicate whether x > y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
|
||||
a, b := x.AsInt64x4(), y.AsInt64x4()
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs))
|
||||
}
|
||||
|
||||
// Less returns a mask whose elements indicate whether x < y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
|
||||
a, b := x.AsInt64x4(), y.AsInt64x4()
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs))
|
||||
}
|
||||
|
||||
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
|
||||
a, b := x.AsInt64x4(), y.AsInt64x4()
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
|
||||
}
|
||||
|
||||
// LessEqual returns a mask whose elements indicate whether x <= y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
|
||||
a, b := x.AsInt64x4(), y.AsInt64x4()
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
signs := ones.ShiftAllLeft(64 - 1)
|
||||
return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
|
||||
}
|
||||
|
||||
// NotEqual returns a mask whose elements indicate whether x != y
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 {
|
||||
a, b := x.AsInt64x4(), y.AsInt64x4()
|
||||
ones := x.Equal(x).AsInt64x4()
|
||||
return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4()
|
||||
}
|
||||
|
||||
// BroadcastInt8x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue