mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: generated simd code to add some conversions
Generated by arch/internal/simdgen CL 689735 A small number of conversions for testing purposes Change-Id: I4d52c643d08c02794c3fea9778bb1ecbb5507de4 Reviewed-on: https://go-review.googlesource.com/c/go/+/689716 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
e62e377ed6
commit
ec5c20ba5a
8 changed files with 518 additions and 0 deletions
|
|
@ -36,6 +36,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VRSQRT14PD128,
|
ssa.OpAMD64VRSQRT14PD128,
|
||||||
ssa.OpAMD64VRSQRT14PD256,
|
ssa.OpAMD64VRSQRT14PD256,
|
||||||
ssa.OpAMD64VRSQRT14PD512,
|
ssa.OpAMD64VRSQRT14PD512,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQ128,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQ256,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQ512,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQ128,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQ256,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQ512,
|
||||||
ssa.OpAMD64VPOPCNTB128,
|
ssa.OpAMD64VPOPCNTB128,
|
||||||
ssa.OpAMD64VPOPCNTB256,
|
ssa.OpAMD64VPOPCNTB256,
|
||||||
ssa.OpAMD64VPOPCNTB512,
|
ssa.OpAMD64VPOPCNTB512,
|
||||||
|
|
@ -628,6 +634,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked128,
|
ssa.OpAMD64VPCOMPRESSQMasked128,
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked256,
|
ssa.OpAMD64VPCOMPRESSQMasked256,
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked512,
|
ssa.OpAMD64VPCOMPRESSQMasked512,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQMasked512,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||||
ssa.OpAMD64VPOPCNTBMasked128,
|
ssa.OpAMD64VPOPCNTBMasked128,
|
||||||
ssa.OpAMD64VPOPCNTBMasked256,
|
ssa.OpAMD64VPOPCNTBMasked256,
|
||||||
ssa.OpAMD64VPOPCNTBMasked512,
|
ssa.OpAMD64VPOPCNTBMasked512,
|
||||||
|
|
@ -1124,6 +1136,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked128,
|
ssa.OpAMD64VPCOMPRESSQMasked128,
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked256,
|
ssa.OpAMD64VPCOMPRESSQMasked256,
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked512,
|
ssa.OpAMD64VPCOMPRESSQMasked512,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||||
|
ssa.OpAMD64VCVTTPS2DQMasked512,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||||
|
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||||
ssa.OpAMD64VREDUCEPSMasked128,
|
ssa.OpAMD64VREDUCEPSMasked128,
|
||||||
ssa.OpAMD64VREDUCEPSMasked256,
|
ssa.OpAMD64VREDUCEPSMasked256,
|
||||||
ssa.OpAMD64VREDUCEPSMasked512,
|
ssa.OpAMD64VREDUCEPSMasked512,
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,18 @@
|
||||||
(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||||
(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||||
(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||||
|
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
|
||||||
|
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
|
||||||
|
(ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
|
||||||
|
(ConvertToInt32MaskedFloat32x4 x mask) => (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
|
(ConvertToInt32MaskedFloat32x8 x mask) => (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
|
(ConvertToInt32MaskedFloat32x16 x mask) => (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
|
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
|
||||||
|
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
|
||||||
|
(ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
|
||||||
|
(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
|
(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
|
(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
(DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
|
(DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
|
||||||
(DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
|
(DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
|
||||||
(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
|
(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VCOMPRESSPSMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VCOMPRESSPSMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VCOMPRESSPSMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VCOMPRESSPSMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VCOMPRESSPSMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VCOMPRESSPSMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VCVTPS2UDQ128", argLength: 1, reg: w11, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VCVTPS2UDQ256", argLength: 1, reg: w11, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VCVTPS2UDQ512", argLength: 1, reg: w11, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VCVTPS2UDQMasked128", argLength: 2, reg: wkw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VCVTPS2UDQMasked256", argLength: 2, reg: wkw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VCVTPS2UDQMasked512", argLength: 2, reg: wkw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VCVTTPS2DQ128", argLength: 1, reg: v11, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VCVTTPS2DQ256", argLength: 1, reg: v11, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VCVTTPS2DQ512", argLength: 1, reg: w11, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VCVTTPS2DQMasked128", argLength: 2, reg: wkw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VCVTTPS2DQMasked256", argLength: 2, reg: wkw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VCVTTPS2DQMasked512", argLength: 2, reg: wkw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VDIVPD128", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VDIVPD128", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VDIVPD256", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VDIVPD256", argLength: 2, reg: v21, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VDIVPD512", argLength: 2, reg: w21, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VDIVPD512", argLength: 2, reg: w21, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
|
||||||
|
|
@ -225,6 +225,18 @@ func simdGenericOps() []opData {
|
||||||
{name: "CompressUint64x2", argLength: 2, commutative: false},
|
{name: "CompressUint64x2", argLength: 2, commutative: false},
|
||||||
{name: "CompressUint64x4", argLength: 2, commutative: false},
|
{name: "CompressUint64x4", argLength: 2, commutative: false},
|
||||||
{name: "CompressUint64x8", argLength: 2, commutative: false},
|
{name: "CompressUint64x8", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToInt32MaskedFloat32x4", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToInt32MaskedFloat32x8", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToInt32MaskedFloat32x16", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
|
||||||
{name: "DivFloat32x4", argLength: 2, commutative: false},
|
{name: "DivFloat32x4", argLength: 2, commutative: false},
|
||||||
{name: "DivFloat32x8", argLength: 2, commutative: false},
|
{name: "DivFloat32x8", argLength: 2, commutative: false},
|
||||||
{name: "DivFloat32x16", argLength: 2, commutative: false},
|
{name: "DivFloat32x16", argLength: 2, commutative: false},
|
||||||
|
|
|
||||||
|
|
@ -1230,6 +1230,18 @@ const (
|
||||||
OpAMD64VCOMPRESSPSMasked128
|
OpAMD64VCOMPRESSPSMasked128
|
||||||
OpAMD64VCOMPRESSPSMasked256
|
OpAMD64VCOMPRESSPSMasked256
|
||||||
OpAMD64VCOMPRESSPSMasked512
|
OpAMD64VCOMPRESSPSMasked512
|
||||||
|
OpAMD64VCVTPS2UDQ128
|
||||||
|
OpAMD64VCVTPS2UDQ256
|
||||||
|
OpAMD64VCVTPS2UDQ512
|
||||||
|
OpAMD64VCVTPS2UDQMasked128
|
||||||
|
OpAMD64VCVTPS2UDQMasked256
|
||||||
|
OpAMD64VCVTPS2UDQMasked512
|
||||||
|
OpAMD64VCVTTPS2DQ128
|
||||||
|
OpAMD64VCVTTPS2DQ256
|
||||||
|
OpAMD64VCVTTPS2DQ512
|
||||||
|
OpAMD64VCVTTPS2DQMasked128
|
||||||
|
OpAMD64VCVTTPS2DQMasked256
|
||||||
|
OpAMD64VCVTTPS2DQMasked512
|
||||||
OpAMD64VDIVPD128
|
OpAMD64VDIVPD128
|
||||||
OpAMD64VDIVPD256
|
OpAMD64VDIVPD256
|
||||||
OpAMD64VDIVPD512
|
OpAMD64VDIVPD512
|
||||||
|
|
@ -4671,6 +4683,18 @@ const (
|
||||||
OpCompressUint64x2
|
OpCompressUint64x2
|
||||||
OpCompressUint64x4
|
OpCompressUint64x4
|
||||||
OpCompressUint64x8
|
OpCompressUint64x8
|
||||||
|
OpConvertToInt32Float32x4
|
||||||
|
OpConvertToInt32Float32x8
|
||||||
|
OpConvertToInt32Float32x16
|
||||||
|
OpConvertToInt32MaskedFloat32x4
|
||||||
|
OpConvertToInt32MaskedFloat32x8
|
||||||
|
OpConvertToInt32MaskedFloat32x16
|
||||||
|
OpConvertToUint32Float32x4
|
||||||
|
OpConvertToUint32Float32x8
|
||||||
|
OpConvertToUint32Float32x16
|
||||||
|
OpConvertToUint32MaskedFloat32x4
|
||||||
|
OpConvertToUint32MaskedFloat32x8
|
||||||
|
OpConvertToUint32MaskedFloat32x16
|
||||||
OpDivFloat32x4
|
OpDivFloat32x4
|
||||||
OpDivFloat32x8
|
OpDivFloat32x8
|
||||||
OpDivFloat32x16
|
OpDivFloat32x16
|
||||||
|
|
@ -19331,6 +19355,168 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTPS2UDQ128",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVCVTPS2UDQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTPS2UDQ256",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVCVTPS2UDQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTPS2UDQ512",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVCVTPS2UDQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTPS2UDQMasked128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVCVTPS2UDQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTPS2UDQMasked256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVCVTPS2UDQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTPS2UDQMasked512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVCVTPS2UDQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTTPS2DQ128",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVCVTTPS2DQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTTPS2DQ256",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVCVTTPS2DQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTTPS2DQ512",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVCVTTPS2DQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTTPS2DQMasked128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVCVTTPS2DQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTTPS2DQMasked256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVCVTTPS2DQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VCVTTPS2DQMasked512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVCVTTPS2DQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VDIVPD128",
|
name: "VDIVPD128",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -62407,6 +62593,66 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt32Float32x4",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt32Float32x8",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt32Float32x16",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt32MaskedFloat32x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt32MaskedFloat32x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt32MaskedFloat32x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32Float32x4",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32Float32x8",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32Float32x16",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32MaskedFloat32x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32MaskedFloat32x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32MaskedFloat32x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "DivFloat32x4",
|
name: "DivFloat32x4",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
|
||||||
|
|
@ -1267,6 +1267,36 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpConstBool(v)
|
return rewriteValueAMD64_OpConstBool(v)
|
||||||
case OpConstNil:
|
case OpConstNil:
|
||||||
return rewriteValueAMD64_OpConstNil(v)
|
return rewriteValueAMD64_OpConstNil(v)
|
||||||
|
case OpConvertToInt32Float32x16:
|
||||||
|
v.Op = OpAMD64VCVTTPS2DQ512
|
||||||
|
return true
|
||||||
|
case OpConvertToInt32Float32x4:
|
||||||
|
v.Op = OpAMD64VCVTTPS2DQ128
|
||||||
|
return true
|
||||||
|
case OpConvertToInt32Float32x8:
|
||||||
|
v.Op = OpAMD64VCVTTPS2DQ256
|
||||||
|
return true
|
||||||
|
case OpConvertToInt32MaskedFloat32x16:
|
||||||
|
return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v)
|
||||||
|
case OpConvertToInt32MaskedFloat32x4:
|
||||||
|
return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v)
|
||||||
|
case OpConvertToInt32MaskedFloat32x8:
|
||||||
|
return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v)
|
||||||
|
case OpConvertToUint32Float32x16:
|
||||||
|
v.Op = OpAMD64VCVTPS2UDQ512
|
||||||
|
return true
|
||||||
|
case OpConvertToUint32Float32x4:
|
||||||
|
v.Op = OpAMD64VCVTPS2UDQ128
|
||||||
|
return true
|
||||||
|
case OpConvertToUint32Float32x8:
|
||||||
|
v.Op = OpAMD64VCVTPS2UDQ256
|
||||||
|
return true
|
||||||
|
case OpConvertToUint32MaskedFloat32x16:
|
||||||
|
return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v)
|
||||||
|
case OpConvertToUint32MaskedFloat32x4:
|
||||||
|
return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v)
|
||||||
|
case OpConvertToUint32MaskedFloat32x8:
|
||||||
|
return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v)
|
||||||
case OpCtz16:
|
case OpCtz16:
|
||||||
return rewriteValueAMD64_OpCtz16(v)
|
return rewriteValueAMD64_OpCtz16(v)
|
||||||
case OpCtz16NonZero:
|
case OpCtz16NonZero:
|
||||||
|
|
@ -31928,6 +31958,102 @@ func rewriteValueAMD64_OpConstNil(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (ConvertToInt32MaskedFloat32x16 x mask)
|
||||||
|
// result: (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VCVTTPS2DQMasked512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg2(x, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (ConvertToInt32MaskedFloat32x4 x mask)
|
||||||
|
// result: (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VCVTTPS2DQMasked128)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg2(x, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (ConvertToInt32MaskedFloat32x8 x mask)
|
||||||
|
// result: (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VCVTTPS2DQMasked256)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg2(x, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (ConvertToUint32MaskedFloat32x16 x mask)
|
||||||
|
// result: (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VCVTPS2UDQMasked512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg2(x, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (ConvertToUint32MaskedFloat32x4 x mask)
|
||||||
|
// result: (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VCVTPS2UDQMasked128)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg2(x, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (ConvertToUint32MaskedFloat32x8 x mask)
|
||||||
|
// result: (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VCVTPS2UDQMasked256)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg2(x, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpCtz16(v *Value) bool {
|
func rewriteValueAMD64_OpCtz16(v *Value) bool {
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
b := v.Block
|
b := v.Block
|
||||||
|
|
|
||||||
|
|
@ -245,6 +245,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x8.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x16.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -1446,6 +1446,86 @@ func (x Uint64x4) Compress(mask Mask64x4) Uint64x4
|
||||||
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
|
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
|
||||||
func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
|
func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
|
||||||
|
|
||||||
|
/* ConvertToInt32 */
|
||||||
|
|
||||||
|
// ConvertToInt32 converts element values to int32.
|
||||||
|
//
|
||||||
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX
|
||||||
|
func (x Float32x4) ConvertToInt32() Int32x4
|
||||||
|
|
||||||
|
// ConvertToInt32 converts element values to int32.
|
||||||
|
//
|
||||||
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX
|
||||||
|
func (x Float32x8) ConvertToInt32() Int32x8
|
||||||
|
|
||||||
|
// ConvertToInt32 converts element values to int32.
|
||||||
|
//
|
||||||
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x16) ConvertToInt32() Int32x16
|
||||||
|
|
||||||
|
/* ConvertToInt32Masked */
|
||||||
|
|
||||||
|
// ConvertToInt32 converts element values to int32.
|
||||||
|
//
|
||||||
|
// This operation is applied selectively under a write mask.
|
||||||
|
//
|
||||||
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x4) ConvertToInt32Masked(mask Mask32x4) Int32x4
|
||||||
|
|
||||||
|
// ConvertToInt32 converts element values to int32.
|
||||||
|
//
|
||||||
|
// This operation is applied selectively under a write mask.
|
||||||
|
//
|
||||||
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x8) ConvertToInt32Masked(mask Mask32x8) Int32x8
|
||||||
|
|
||||||
|
// ConvertToInt32 converts element values to int32.
|
||||||
|
//
|
||||||
|
// This operation is applied selectively under a write mask.
|
||||||
|
//
|
||||||
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x16) ConvertToInt32Masked(mask Mask32x16) Int32x16
|
||||||
|
|
||||||
|
/* ConvertToUint32 */
|
||||||
|
|
||||||
|
// ConvertToUint32Masked converts element values to uint32.
|
||||||
|
//
|
||||||
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x4) ConvertToUint32() Uint32x4
|
||||||
|
|
||||||
|
// ConvertToUint32Masked converts element values to uint32.
|
||||||
|
//
|
||||||
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x8) ConvertToUint32() Uint32x8
|
||||||
|
|
||||||
|
// ConvertToUint32Masked converts element values to uint32.
|
||||||
|
//
|
||||||
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x16) ConvertToUint32() Uint32x16
|
||||||
|
|
||||||
|
/* ConvertToUint32Masked */
|
||||||
|
|
||||||
|
// ConvertToUint32Masked converts element values to uint32.
|
||||||
|
//
|
||||||
|
// This operation is applied selectively under a write mask.
|
||||||
|
//
|
||||||
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x4) ConvertToUint32Masked(mask Mask32x4) Uint32x4
|
||||||
|
|
||||||
|
// ConvertToUint32Masked converts element values to uint32.
|
||||||
|
//
|
||||||
|
// This operation is applied selectively under a write mask.
|
||||||
|
//
|
||||||
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x8) ConvertToUint32Masked(mask Mask32x8) Uint32x8
|
||||||
|
|
||||||
|
// ConvertToUint32Masked converts element values to uint32.
|
||||||
|
//
|
||||||
|
// This operation is applied selectively under a write mask.
|
||||||
|
//
|
||||||
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
|
||||||
|
func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
|
||||||
|
|
||||||
/* DiffWithCeilWithPrecision */
|
/* DiffWithCeilWithPrecision */
|
||||||
|
|
||||||
// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
|
// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue