mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add packed saturated u?int conversions
This CL should complete the conversions between int and uint. Change-Id: I46742a62214f346e014a68b9c72a9b116a127f67 Reviewed-on: https://go-review.googlesource.com/c/go/+/698236 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Commit-Queue: David Chase <drchase@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
4fa23b0d29
commit
bc217d4170
10 changed files with 397 additions and 0 deletions
|
|
@ -200,6 +200,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPAVGW128,
|
ssa.OpAMD64VPAVGW128,
|
||||||
ssa.OpAMD64VPAVGW256,
|
ssa.OpAMD64VPAVGW256,
|
||||||
ssa.OpAMD64VPAVGW512,
|
ssa.OpAMD64VPAVGW512,
|
||||||
|
ssa.OpAMD64VPACKSSDW128,
|
||||||
|
ssa.OpAMD64VPACKSSDW256,
|
||||||
|
ssa.OpAMD64VPACKSSDW512,
|
||||||
|
ssa.OpAMD64VPACKUSDW128,
|
||||||
|
ssa.OpAMD64VPACKUSDW256,
|
||||||
|
ssa.OpAMD64VPACKUSDW512,
|
||||||
ssa.OpAMD64VPSIGNB128,
|
ssa.OpAMD64VPSIGNB128,
|
||||||
ssa.OpAMD64VPSIGNB256,
|
ssa.OpAMD64VPSIGNB256,
|
||||||
ssa.OpAMD64VPSIGNW128,
|
ssa.OpAMD64VPSIGNW128,
|
||||||
|
|
@ -492,6 +498,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPAVGWMasked128,
|
ssa.OpAMD64VPAVGWMasked128,
|
||||||
ssa.OpAMD64VPAVGWMasked256,
|
ssa.OpAMD64VPAVGWMasked256,
|
||||||
ssa.OpAMD64VPAVGWMasked512,
|
ssa.OpAMD64VPAVGWMasked512,
|
||||||
|
ssa.OpAMD64VPACKSSDWMasked128,
|
||||||
|
ssa.OpAMD64VPACKSSDWMasked256,
|
||||||
|
ssa.OpAMD64VPACKSSDWMasked512,
|
||||||
|
ssa.OpAMD64VPACKUSDWMasked128,
|
||||||
|
ssa.OpAMD64VPACKUSDWMasked256,
|
||||||
|
ssa.OpAMD64VPACKUSDWMasked512,
|
||||||
ssa.OpAMD64VDIVPSMasked128,
|
ssa.OpAMD64VDIVPSMasked128,
|
||||||
ssa.OpAMD64VDIVPSMasked256,
|
ssa.OpAMD64VDIVPSMasked256,
|
||||||
ssa.OpAMD64VDIVPSMasked512,
|
ssa.OpAMD64VDIVPSMasked512,
|
||||||
|
|
@ -1437,6 +1449,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPMOVSDWMasked128,
|
ssa.OpAMD64VPMOVSDWMasked128,
|
||||||
ssa.OpAMD64VPMOVSDWMasked256,
|
ssa.OpAMD64VPMOVSDWMasked256,
|
||||||
ssa.OpAMD64VPMOVSQWMasked128,
|
ssa.OpAMD64VPMOVSQWMasked128,
|
||||||
|
ssa.OpAMD64VPACKSSDWMasked128,
|
||||||
|
ssa.OpAMD64VPACKSSDWMasked256,
|
||||||
|
ssa.OpAMD64VPACKSSDWMasked512,
|
||||||
ssa.OpAMD64VPMOVSXBWMasked128,
|
ssa.OpAMD64VPMOVSXBWMasked128,
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked128,
|
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked256,
|
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||||
|
|
@ -1468,6 +1483,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPMOVUSDWMasked128,
|
ssa.OpAMD64VPMOVUSDWMasked128,
|
||||||
ssa.OpAMD64VPMOVUSDWMasked256,
|
ssa.OpAMD64VPMOVUSDWMasked256,
|
||||||
ssa.OpAMD64VPMOVUSQWMasked128,
|
ssa.OpAMD64VPMOVUSQWMasked128,
|
||||||
|
ssa.OpAMD64VPACKUSDWMasked128,
|
||||||
|
ssa.OpAMD64VPACKUSDWMasked256,
|
||||||
|
ssa.OpAMD64VPACKUSDWMasked512,
|
||||||
ssa.OpAMD64VPMOVZXBWMasked128,
|
ssa.OpAMD64VPMOVZXBWMasked128,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||||
|
|
|
||||||
|
|
@ -243,6 +243,9 @@
|
||||||
(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128 ...)
|
(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128 ...)
|
||||||
(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128 ...)
|
(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128 ...)
|
||||||
(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128 ...)
|
(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128 ...)
|
||||||
|
(ConvertToInt16SaturatedPackedInt32x4 ...) => (VPACKSSDW128 ...)
|
||||||
|
(ConvertToInt16SaturatedPackedInt32x8 ...) => (VPACKSSDW256 ...)
|
||||||
|
(ConvertToInt16SaturatedPackedInt32x16 ...) => (VPACKSSDW512 ...)
|
||||||
(ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...)
|
(ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...)
|
||||||
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
|
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
|
||||||
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
|
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
|
||||||
|
|
@ -299,6 +302,9 @@
|
||||||
(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128 ...)
|
(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128 ...)
|
||||||
(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128 ...)
|
(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128 ...)
|
||||||
(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128 ...)
|
(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128 ...)
|
||||||
|
(ConvertToUint16SaturatedPackedUint32x4 ...) => (VPACKUSDW128 ...)
|
||||||
|
(ConvertToUint16SaturatedPackedUint32x8 ...) => (VPACKUSDW256 ...)
|
||||||
|
(ConvertToUint16SaturatedPackedUint32x16 ...) => (VPACKUSDW512 ...)
|
||||||
(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
|
(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
|
||||||
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
|
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
|
||||||
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
|
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
|
||||||
|
|
@ -1244,6 +1250,7 @@
|
||||||
(VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512 [a] x mask)
|
(VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512 [a] x mask)
|
||||||
(VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
|
(VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
|
||||||
(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask)
|
(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask)
|
||||||
|
(VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask)
|
||||||
(VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512 x mask)
|
(VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512 x mask)
|
||||||
(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask)
|
(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask)
|
||||||
(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask)
|
(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask)
|
||||||
|
|
@ -1251,6 +1258,7 @@
|
||||||
(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask)
|
(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask)
|
||||||
(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask)
|
(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask)
|
||||||
(VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask)
|
(VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask)
|
||||||
|
(VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask)
|
||||||
(VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512 x mask)
|
(VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512 x mask)
|
||||||
(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask)
|
(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask)
|
||||||
(VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask)
|
(VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask)
|
||||||
|
|
|
||||||
|
|
@ -182,6 +182,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPABSWMasked128", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPABSWMasked128", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPABSWMasked256", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPABSWMasked256", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPABSWMasked512", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPABSWMasked512", argLength: 2, reg: wkw, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPACKSSDW128", argLength: 2, reg: v21, asm: "VPACKSSDW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPACKSSDW256", argLength: 2, reg: v21, asm: "VPACKSSDW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPACKSSDW512", argLength: 2, reg: w21, asm: "VPACKSSDW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPACKSSDWMasked128", argLength: 3, reg: w2kw, asm: "VPACKSSDW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPACKSSDWMasked256", argLength: 3, reg: w2kw, asm: "VPACKSSDW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPACKSSDWMasked512", argLength: 3, reg: w2kw, asm: "VPACKSSDW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPACKUSDW128", argLength: 2, reg: v21, asm: "VPACKUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPACKUSDW256", argLength: 2, reg: v21, asm: "VPACKUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPACKUSDW512", argLength: 2, reg: w21, asm: "VPACKUSDW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPACKUSDWMasked128", argLength: 3, reg: w2kw, asm: "VPACKUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPACKUSDWMasked256", argLength: 3, reg: w2kw, asm: "VPACKUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPACKUSDWMasked512", argLength: 3, reg: w2kw, asm: "VPACKUSDW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPADDB128", argLength: 2, reg: v21, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPADDB128", argLength: 2, reg: v21, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPADDB256", argLength: 2, reg: v21, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPADDB256", argLength: 2, reg: v21, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
|
||||||
|
|
@ -235,6 +235,9 @@ func simdGenericOps() []opData {
|
||||||
{name: "ConvertToInt16SaturatedInt64x2", argLength: 1, commutative: false},
|
{name: "ConvertToInt16SaturatedInt64x2", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt16SaturatedInt64x4", argLength: 1, commutative: false},
|
{name: "ConvertToInt16SaturatedInt64x4", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt16SaturatedInt64x8", argLength: 1, commutative: false},
|
{name: "ConvertToInt16SaturatedInt64x8", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToInt16SaturatedPackedInt32x4", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToInt16SaturatedPackedInt32x8", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToInt16SaturatedPackedInt32x16", argLength: 2, commutative: false},
|
||||||
{name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false},
|
{name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
|
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
|
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
|
||||||
|
|
@ -277,6 +280,9 @@ func simdGenericOps() []opData {
|
||||||
{name: "ConvertToUint8Uint64x2", argLength: 1, commutative: false},
|
{name: "ConvertToUint8Uint64x2", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint8Uint64x4", argLength: 1, commutative: false},
|
{name: "ConvertToUint8Uint64x4", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint8Uint64x8", argLength: 1, commutative: false},
|
{name: "ConvertToUint8Uint64x8", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint16SaturatedPackedUint32x4", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToUint16SaturatedPackedUint32x8", argLength: 2, commutative: false},
|
||||||
|
{name: "ConvertToUint16SaturatedPackedUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "ConvertToUint16SaturatedUint32x4", argLength: 1, commutative: false},
|
{name: "ConvertToUint16SaturatedUint32x4", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint16SaturatedUint32x8", argLength: 1, commutative: false},
|
{name: "ConvertToUint16SaturatedUint32x8", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint16SaturatedUint32x16", argLength: 1, commutative: false},
|
{name: "ConvertToUint16SaturatedUint32x16", argLength: 1, commutative: false},
|
||||||
|
|
|
||||||
|
|
@ -1405,6 +1405,18 @@ const (
|
||||||
OpAMD64VPABSWMasked128
|
OpAMD64VPABSWMasked128
|
||||||
OpAMD64VPABSWMasked256
|
OpAMD64VPABSWMasked256
|
||||||
OpAMD64VPABSWMasked512
|
OpAMD64VPABSWMasked512
|
||||||
|
OpAMD64VPACKSSDW128
|
||||||
|
OpAMD64VPACKSSDW256
|
||||||
|
OpAMD64VPACKSSDW512
|
||||||
|
OpAMD64VPACKSSDWMasked128
|
||||||
|
OpAMD64VPACKSSDWMasked256
|
||||||
|
OpAMD64VPACKSSDWMasked512
|
||||||
|
OpAMD64VPACKUSDW128
|
||||||
|
OpAMD64VPACKUSDW256
|
||||||
|
OpAMD64VPACKUSDW512
|
||||||
|
OpAMD64VPACKUSDWMasked128
|
||||||
|
OpAMD64VPACKUSDWMasked256
|
||||||
|
OpAMD64VPACKUSDWMasked512
|
||||||
OpAMD64VPADDB128
|
OpAMD64VPADDB128
|
||||||
OpAMD64VPADDB256
|
OpAMD64VPADDB256
|
||||||
OpAMD64VPADDB512
|
OpAMD64VPADDB512
|
||||||
|
|
@ -5002,6 +5014,9 @@ const (
|
||||||
OpConvertToInt16SaturatedInt64x2
|
OpConvertToInt16SaturatedInt64x2
|
||||||
OpConvertToInt16SaturatedInt64x4
|
OpConvertToInt16SaturatedInt64x4
|
||||||
OpConvertToInt16SaturatedInt64x8
|
OpConvertToInt16SaturatedInt64x8
|
||||||
|
OpConvertToInt16SaturatedPackedInt32x4
|
||||||
|
OpConvertToInt16SaturatedPackedInt32x8
|
||||||
|
OpConvertToInt16SaturatedPackedInt32x16
|
||||||
OpConvertToInt16x8Int8x16
|
OpConvertToInt16x8Int8x16
|
||||||
OpConvertToInt32Float32x4
|
OpConvertToInt32Float32x4
|
||||||
OpConvertToInt32Float32x8
|
OpConvertToInt32Float32x8
|
||||||
|
|
@ -5044,6 +5059,9 @@ const (
|
||||||
OpConvertToUint8Uint64x2
|
OpConvertToUint8Uint64x2
|
||||||
OpConvertToUint8Uint64x4
|
OpConvertToUint8Uint64x4
|
||||||
OpConvertToUint8Uint64x8
|
OpConvertToUint8Uint64x8
|
||||||
|
OpConvertToUint16SaturatedPackedUint32x4
|
||||||
|
OpConvertToUint16SaturatedPackedUint32x8
|
||||||
|
OpConvertToUint16SaturatedPackedUint32x16
|
||||||
OpConvertToUint16SaturatedUint32x4
|
OpConvertToUint16SaturatedUint32x4
|
||||||
OpConvertToUint16SaturatedUint32x8
|
OpConvertToUint16SaturatedUint32x8
|
||||||
OpConvertToUint16SaturatedUint32x16
|
OpConvertToUint16SaturatedUint32x16
|
||||||
|
|
@ -21608,6 +21626,180 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKSSDW128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPACKSSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKSSDW256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPACKSSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKSSDW512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPACKSSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKSSDWMasked128",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPACKSSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKSSDWMasked256",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPACKSSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKSSDWMasked512",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPACKSSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKUSDW128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPACKUSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKUSDW256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPACKUSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKUSDW512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPACKUSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKUSDWMasked128",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPACKUSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKUSDWMasked256",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPACKUSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPACKUSDWMasked512",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPACKUSDW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPADDB128",
|
name: "VPADDB128",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -66238,6 +66430,21 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt16SaturatedPackedInt32x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt16SaturatedPackedInt32x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToInt16SaturatedPackedInt32x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "ConvertToInt16x8Int8x16",
|
name: "ConvertToInt16x8Int8x16",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -66448,6 +66655,21 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint16SaturatedPackedUint32x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint16SaturatedPackedUint32x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint16SaturatedPackedUint32x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "ConvertToUint16SaturatedUint32x4",
|
name: "ConvertToUint16SaturatedUint32x4",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
|
||||||
|
|
@ -1412,6 +1412,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpConvertToInt16SaturatedInt64x8:
|
case OpConvertToInt16SaturatedInt64x8:
|
||||||
v.Op = OpAMD64VPMOVSQW128
|
v.Op = OpAMD64VPMOVSQW128
|
||||||
return true
|
return true
|
||||||
|
case OpConvertToInt16SaturatedPackedInt32x16:
|
||||||
|
v.Op = OpAMD64VPACKSSDW512
|
||||||
|
return true
|
||||||
|
case OpConvertToInt16SaturatedPackedInt32x4:
|
||||||
|
v.Op = OpAMD64VPACKSSDW128
|
||||||
|
return true
|
||||||
|
case OpConvertToInt16SaturatedPackedInt32x8:
|
||||||
|
v.Op = OpAMD64VPACKSSDW256
|
||||||
|
return true
|
||||||
case OpConvertToInt16x8Int8x16:
|
case OpConvertToInt16x8Int8x16:
|
||||||
v.Op = OpAMD64VPMOVSXBW128
|
v.Op = OpAMD64VPMOVSXBW128
|
||||||
return true
|
return true
|
||||||
|
|
@ -1538,6 +1547,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpConvertToInt8SaturatedInt64x8:
|
case OpConvertToInt8SaturatedInt64x8:
|
||||||
v.Op = OpAMD64VPMOVSQB128
|
v.Op = OpAMD64VPMOVSQB128
|
||||||
return true
|
return true
|
||||||
|
case OpConvertToUint16SaturatedPackedUint32x16:
|
||||||
|
v.Op = OpAMD64VPACKUSDW512
|
||||||
|
return true
|
||||||
|
case OpConvertToUint16SaturatedPackedUint32x4:
|
||||||
|
v.Op = OpAMD64VPACKUSDW128
|
||||||
|
return true
|
||||||
|
case OpConvertToUint16SaturatedPackedUint32x8:
|
||||||
|
v.Op = OpAMD64VPACKUSDW256
|
||||||
|
return true
|
||||||
case OpConvertToUint16SaturatedUint32x16:
|
case OpConvertToUint16SaturatedUint32x16:
|
||||||
v.Op = OpAMD64VPMOVUSDW256
|
v.Op = OpAMD64VPMOVUSDW256
|
||||||
return true
|
return true
|
||||||
|
|
@ -27007,6 +27025,19 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
||||||
v.AddArg2(x, mask)
|
v.AddArg2(x, mask)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask)
|
||||||
|
// result: (VPACKSSDWMasked512 x y mask)
|
||||||
|
for {
|
||||||
|
if v_0.Op != OpAMD64VPACKSSDW512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
y := v_0.Args[1]
|
||||||
|
x := v_0.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VPACKSSDWMasked512)
|
||||||
|
v.AddArg3(x, y, mask)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask)
|
// match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask)
|
||||||
// result: (VCVTTPS2DQMasked512 x mask)
|
// result: (VCVTTPS2DQMasked512 x mask)
|
||||||
for {
|
for {
|
||||||
|
|
@ -27031,6 +27062,19 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
||||||
v.AddArg2(x, mask)
|
v.AddArg2(x, mask)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask)
|
||||||
|
// result: (VPACKUSDWMasked512 x y mask)
|
||||||
|
for {
|
||||||
|
if v_0.Op != OpAMD64VPACKUSDW512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
y := v_0.Args[1]
|
||||||
|
x := v_0.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
v.reset(OpAMD64VPACKUSDWMasked512)
|
||||||
|
v.AddArg3(x, y, mask)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask)
|
// match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask)
|
||||||
// result: (VCVTPS2UDQMasked512 x mask)
|
// result: (VCVTPS2UDQMasked512 x mask)
|
||||||
for {
|
for {
|
||||||
|
|
|
||||||
|
|
@ -255,6 +255,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Int64x2.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int64x2.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x4.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int64x4.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x8.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int64x8.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x8, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x4.ConvertToInt16SaturatedPacked", opLen2(ssa.OpConvertToInt16SaturatedPackedInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x8.ConvertToInt16SaturatedPacked", opLen2(ssa.OpConvertToInt16SaturatedPackedInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x16.ConvertToInt16SaturatedPacked", opLen2(ssa.OpConvertToInt16SaturatedPackedInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
|
@ -311,6 +314,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x2.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x2.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x4.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x8.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x8, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.ConvertToUint16SaturatedPacked", opLen2(ssa.OpConvertToUint16SaturatedPackedUint32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x8.ConvertToUint16SaturatedPacked", opLen2(ssa.OpConvertToUint16SaturatedPackedUint32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x16.ConvertToUint16SaturatedPacked", opLen2(ssa.OpConvertToUint16SaturatedPackedUint32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,14 @@
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME converts element values to uint32 with saturation.
|
// NAME converts element values to uint32 with saturation.
|
||||||
|
- go: ConvertToInt16SaturatedPacked
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME converts element values to int16 with saturation.
|
||||||
|
- go: ConvertToUint16SaturatedPacked
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME converts element values to uint16 with saturation.
|
||||||
|
|
||||||
# low-part only conversions
|
# low-part only conversions
|
||||||
# int<->int or uint<->uint widening conversions.
|
# int<->int or uint<->uint widening conversions.
|
||||||
|
|
|
||||||
|
|
@ -280,6 +280,27 @@
|
||||||
- base: uint
|
- base: uint
|
||||||
out:
|
out:
|
||||||
- base: uint
|
- base: uint
|
||||||
|
# Truncating saturated packed
|
||||||
|
- go: ConvertToInt16SaturatedPacked
|
||||||
|
asm: "VPACKSSDW"
|
||||||
|
addDoc: &satDocPacked
|
||||||
|
!string |-
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
in:
|
||||||
|
- base: int
|
||||||
|
- base: int
|
||||||
|
out:
|
||||||
|
- base: int
|
||||||
|
- go: ConvertToUint16SaturatedPacked
|
||||||
|
asm: "VPACKUSDW"
|
||||||
|
addDoc: *satDocPacked
|
||||||
|
in:
|
||||||
|
- base: uint
|
||||||
|
- base: uint
|
||||||
|
out:
|
||||||
|
- base: uint
|
||||||
|
|
||||||
# low-part only conversions.
|
# low-part only conversions.
|
||||||
# uint8->uint16
|
# uint8->uint16
|
||||||
|
|
|
||||||
|
|
@ -1408,6 +1408,32 @@ func (x Int64x4) ConvertToInt16Saturated() Int16x8
|
||||||
// Asm: VPMOVSQW, CPU Feature: AVX512
|
// Asm: VPMOVSQW, CPU Feature: AVX512
|
||||||
func (x Int64x8) ConvertToInt16Saturated() Int16x8
|
func (x Int64x8) ConvertToInt16Saturated() Int16x8
|
||||||
|
|
||||||
|
/* ConvertToInt16SaturatedPacked */
|
||||||
|
|
||||||
|
// ConvertToInt16SaturatedPacked converts element values to int16 with saturation.
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
//
|
||||||
|
// Asm: VPACKSSDW, CPU Feature: AVX
|
||||||
|
func (x Int32x4) ConvertToInt16SaturatedPacked(y Int32x4) Int16x8
|
||||||
|
|
||||||
|
// ConvertToInt16SaturatedPacked converts element values to int16 with saturation.
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
//
|
||||||
|
// Asm: VPACKSSDW, CPU Feature: AVX2
|
||||||
|
func (x Int32x8) ConvertToInt16SaturatedPacked(y Int32x8) Int16x16
|
||||||
|
|
||||||
|
// ConvertToInt16SaturatedPacked converts element values to int16 with saturation.
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
//
|
||||||
|
// Asm: VPACKSSDW, CPU Feature: AVX512
|
||||||
|
func (x Int32x16) ConvertToInt16SaturatedPacked(y Int32x16) Int16x32
|
||||||
|
|
||||||
/* ConvertToInt16x8 */
|
/* ConvertToInt16x8 */
|
||||||
|
|
||||||
// ConvertToInt16x8 converts 8 lowest vector element values to int16.
|
// ConvertToInt16x8 converts 8 lowest vector element values to int16.
|
||||||
|
|
@ -1768,6 +1794,32 @@ func (x Uint64x4) ConvertToUint16Saturated() Uint16x8
|
||||||
// Asm: VPMOVUSQW, CPU Feature: AVX512
|
// Asm: VPMOVUSQW, CPU Feature: AVX512
|
||||||
func (x Uint64x8) ConvertToUint16Saturated() Uint16x8
|
func (x Uint64x8) ConvertToUint16Saturated() Uint16x8
|
||||||
|
|
||||||
|
/* ConvertToUint16SaturatedPacked */
|
||||||
|
|
||||||
|
// ConvertToUint16SaturatedPacked converts element values to uint16 with saturation.
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
//
|
||||||
|
// Asm: VPACKUSDW, CPU Feature: AVX
|
||||||
|
func (x Uint32x4) ConvertToUint16SaturatedPacked(y Uint32x4) Uint16x8
|
||||||
|
|
||||||
|
// ConvertToUint16SaturatedPacked converts element values to uint16 with saturation.
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
//
|
||||||
|
// Asm: VPACKUSDW, CPU Feature: AVX2
|
||||||
|
func (x Uint32x8) ConvertToUint16SaturatedPacked(y Uint32x8) Uint16x16
|
||||||
|
|
||||||
|
// ConvertToUint16SaturatedPacked converts element values to uint16 with saturation.
|
||||||
|
// With each 128-bit as a group:
|
||||||
|
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||||
|
// the converted group from the second second input vector will be packed to the upper part of the result vector.
|
||||||
|
//
|
||||||
|
// Asm: VPACKUSDW, CPU Feature: AVX512
|
||||||
|
func (x Uint32x16) ConvertToUint16SaturatedPacked(y Uint32x16) Uint16x32
|
||||||
|
|
||||||
/* ConvertToUint16x8 */
|
/* ConvertToUint16x8 */
|
||||||
|
|
||||||
// ConvertToUint16x8 converts 8 lowest vector element values to uint16.
|
// ConvertToUint16x8 converts 8 lowest vector element values to uint16.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue