mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: complete u?int widening conversions
Change-Id: I21da09261b6b278768d99229fe2db387aef1e812 Reviewed-on: https://go-review.googlesource.com/c/go/+/697915 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
6af8881adb
commit
f4c41d9922
10 changed files with 1993 additions and 14 deletions
|
|
@ -41,18 +41,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPBROADCASTW512,
|
||||
ssa.OpAMD64VPBROADCASTD512,
|
||||
ssa.OpAMD64VPBROADCASTQ512,
|
||||
ssa.OpAMD64VPMOVSXBW256,
|
||||
ssa.OpAMD64VPMOVSXBW512,
|
||||
ssa.OpAMD64VPMOVSXBW128,
|
||||
ssa.OpAMD64VCVTTPS2DQ128,
|
||||
ssa.OpAMD64VCVTTPS2DQ256,
|
||||
ssa.OpAMD64VCVTTPS2DQ512,
|
||||
ssa.OpAMD64VPMOVSXBD512,
|
||||
ssa.OpAMD64VPMOVSXWD256,
|
||||
ssa.OpAMD64VPMOVSXWD512,
|
||||
ssa.OpAMD64VPMOVSXBD128,
|
||||
ssa.OpAMD64VPMOVSXWD128,
|
||||
ssa.OpAMD64VPMOVSXBD256,
|
||||
ssa.OpAMD64VPMOVSXWQ512,
|
||||
ssa.OpAMD64VPMOVSXDQ256,
|
||||
ssa.OpAMD64VPMOVSXDQ512,
|
||||
ssa.OpAMD64VPMOVSXBQ128,
|
||||
ssa.OpAMD64VPMOVSXWQ128,
|
||||
ssa.OpAMD64VPMOVSXDQ128,
|
||||
ssa.OpAMD64VPMOVSXBQ256,
|
||||
ssa.OpAMD64VPMOVSXBQ512,
|
||||
ssa.OpAMD64VPMOVZXBW256,
|
||||
ssa.OpAMD64VPMOVZXBW512,
|
||||
ssa.OpAMD64VPMOVZXBW128,
|
||||
ssa.OpAMD64VCVTPS2UDQ128,
|
||||
ssa.OpAMD64VCVTPS2UDQ256,
|
||||
ssa.OpAMD64VCVTPS2UDQ512,
|
||||
ssa.OpAMD64VPMOVZXBD512,
|
||||
ssa.OpAMD64VPMOVZXWD256,
|
||||
ssa.OpAMD64VPMOVZXWD512,
|
||||
ssa.OpAMD64VPMOVZXBD128,
|
||||
ssa.OpAMD64VPMOVZXWD128,
|
||||
ssa.OpAMD64VPMOVZXBD256,
|
||||
ssa.OpAMD64VPMOVZXWQ512,
|
||||
ssa.OpAMD64VPMOVZXDQ256,
|
||||
ssa.OpAMD64VPMOVZXDQ512,
|
||||
ssa.OpAMD64VPMOVZXBQ128,
|
||||
ssa.OpAMD64VPMOVZXWQ128,
|
||||
ssa.OpAMD64VPMOVZXDQ128,
|
||||
ssa.OpAMD64VPMOVSXWQ256,
|
||||
ssa.OpAMD64VPMOVZXBQ256,
|
||||
ssa.OpAMD64VPMOVZXWQ256,
|
||||
ssa.OpAMD64VPMOVZXBQ512,
|
||||
ssa.OpAMD64VPOPCNTB128,
|
||||
ssa.OpAMD64VPOPCNTB256,
|
||||
ssa.OpAMD64VPOPCNTB512,
|
||||
|
|
@ -685,18 +715,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPCOMPRESSQMasked128,
|
||||
ssa.OpAMD64VPCOMPRESSQMasked256,
|
||||
ssa.OpAMD64VPCOMPRESSQMasked512,
|
||||
ssa.OpAMD64VPMOVSXBWMasked256,
|
||||
ssa.OpAMD64VPMOVSXBWMasked512,
|
||||
ssa.OpAMD64VPMOVSXBWMasked128,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked512,
|
||||
ssa.OpAMD64VPMOVSXBDMasked512,
|
||||
ssa.OpAMD64VPMOVSXWDMasked256,
|
||||
ssa.OpAMD64VPMOVSXWDMasked512,
|
||||
ssa.OpAMD64VPMOVSXBDMasked128,
|
||||
ssa.OpAMD64VPMOVSXWDMasked128,
|
||||
ssa.OpAMD64VPMOVSXBDMasked256,
|
||||
ssa.OpAMD64VPMOVSXWQMasked512,
|
||||
ssa.OpAMD64VPMOVSXDQMasked256,
|
||||
ssa.OpAMD64VPMOVSXDQMasked512,
|
||||
ssa.OpAMD64VPMOVSXBQMasked128,
|
||||
ssa.OpAMD64VPMOVSXWQMasked128,
|
||||
ssa.OpAMD64VPMOVSXDQMasked128,
|
||||
ssa.OpAMD64VPMOVSXBQMasked256,
|
||||
ssa.OpAMD64VPMOVSXBQMasked512,
|
||||
ssa.OpAMD64VPMOVZXBWMasked256,
|
||||
ssa.OpAMD64VPMOVZXBWMasked512,
|
||||
ssa.OpAMD64VPMOVZXBWMasked128,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||
ssa.OpAMD64VPMOVZXBDMasked512,
|
||||
ssa.OpAMD64VPMOVZXWDMasked256,
|
||||
ssa.OpAMD64VPMOVZXWDMasked512,
|
||||
ssa.OpAMD64VPMOVZXBDMasked128,
|
||||
ssa.OpAMD64VPMOVZXWDMasked128,
|
||||
ssa.OpAMD64VPMOVZXBDMasked256,
|
||||
ssa.OpAMD64VPMOVZXWQMasked512,
|
||||
ssa.OpAMD64VPMOVZXDQMasked256,
|
||||
ssa.OpAMD64VPMOVZXDQMasked512,
|
||||
ssa.OpAMD64VPMOVZXBQMasked128,
|
||||
ssa.OpAMD64VPMOVZXWQMasked128,
|
||||
ssa.OpAMD64VPMOVZXDQMasked128,
|
||||
ssa.OpAMD64VPMOVSXWQMasked256,
|
||||
ssa.OpAMD64VPMOVZXBQMasked256,
|
||||
ssa.OpAMD64VPMOVZXWQMasked256,
|
||||
ssa.OpAMD64VPMOVZXBQMasked512,
|
||||
ssa.OpAMD64VEXPANDPSMasked128,
|
||||
ssa.OpAMD64VEXPANDPSMasked256,
|
||||
ssa.OpAMD64VEXPANDPSMasked512,
|
||||
|
|
@ -1307,18 +1367,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPCOMPRESSQMasked128,
|
||||
ssa.OpAMD64VPCOMPRESSQMasked256,
|
||||
ssa.OpAMD64VPCOMPRESSQMasked512,
|
||||
ssa.OpAMD64VPMOVSXBWMasked256,
|
||||
ssa.OpAMD64VPMOVSXBWMasked512,
|
||||
ssa.OpAMD64VPMOVSXBWMasked128,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked512,
|
||||
ssa.OpAMD64VPMOVSXBDMasked512,
|
||||
ssa.OpAMD64VPMOVSXWDMasked256,
|
||||
ssa.OpAMD64VPMOVSXWDMasked512,
|
||||
ssa.OpAMD64VPMOVSXBDMasked128,
|
||||
ssa.OpAMD64VPMOVSXWDMasked128,
|
||||
ssa.OpAMD64VPMOVSXBDMasked256,
|
||||
ssa.OpAMD64VPMOVSXWQMasked512,
|
||||
ssa.OpAMD64VPMOVSXDQMasked256,
|
||||
ssa.OpAMD64VPMOVSXDQMasked512,
|
||||
ssa.OpAMD64VPMOVSXBQMasked128,
|
||||
ssa.OpAMD64VPMOVSXWQMasked128,
|
||||
ssa.OpAMD64VPMOVSXDQMasked128,
|
||||
ssa.OpAMD64VPMOVSXBQMasked256,
|
||||
ssa.OpAMD64VPMOVSXBQMasked512,
|
||||
ssa.OpAMD64VPMOVZXBWMasked256,
|
||||
ssa.OpAMD64VPMOVZXBWMasked512,
|
||||
ssa.OpAMD64VPMOVZXBWMasked128,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||
ssa.OpAMD64VPMOVZXBDMasked512,
|
||||
ssa.OpAMD64VPMOVZXWDMasked256,
|
||||
ssa.OpAMD64VPMOVZXWDMasked512,
|
||||
ssa.OpAMD64VPMOVZXBDMasked128,
|
||||
ssa.OpAMD64VPMOVZXWDMasked128,
|
||||
ssa.OpAMD64VPMOVZXBDMasked256,
|
||||
ssa.OpAMD64VPMOVZXWQMasked512,
|
||||
ssa.OpAMD64VPMOVZXDQMasked256,
|
||||
ssa.OpAMD64VPMOVZXDQMasked512,
|
||||
ssa.OpAMD64VPMOVZXBQMasked128,
|
||||
ssa.OpAMD64VPMOVZXWQMasked128,
|
||||
ssa.OpAMD64VPMOVZXDQMasked128,
|
||||
ssa.OpAMD64VPMOVSXWQMasked256,
|
||||
ssa.OpAMD64VPMOVZXBQMasked256,
|
||||
ssa.OpAMD64VPMOVZXWQMasked256,
|
||||
ssa.OpAMD64VPMOVZXBQMasked512,
|
||||
ssa.OpAMD64VDIVPSMasked128,
|
||||
ssa.OpAMD64VDIVPSMasked256,
|
||||
ssa.OpAMD64VDIVPSMasked512,
|
||||
|
|
|
|||
|
|
@ -211,18 +211,48 @@
|
|||
(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...)
|
||||
(ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...)
|
||||
(ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...)
|
||||
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
|
||||
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
|
||||
(ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
|
||||
(ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...)
|
||||
(ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...)
|
||||
(ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...)
|
||||
(ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...)
|
||||
(ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...)
|
||||
(ConvertToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...)
|
||||
(ConvertToInt64Int16x8 ...) => (VPMOVSXWQ512 ...)
|
||||
(ConvertToInt64Int32x4 ...) => (VPMOVSXDQ256 ...)
|
||||
(ConvertToInt64Int32x8 ...) => (VPMOVSXDQ512 ...)
|
||||
(ConvertToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...)
|
||||
(ConvertToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...)
|
||||
(ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...)
|
||||
(ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...)
|
||||
(ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...)
|
||||
(ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...)
|
||||
(ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...)
|
||||
(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
|
||||
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
|
||||
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
|
||||
(ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
|
||||
(ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...)
|
||||
(ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...)
|
||||
(ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...)
|
||||
(ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...)
|
||||
(ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...)
|
||||
(ConvertToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...)
|
||||
(ConvertToUint64Uint16x8 ...) => (VPMOVZXWQ512 ...)
|
||||
(ConvertToUint64Uint32x4 ...) => (VPMOVZXDQ256 ...)
|
||||
(ConvertToUint64Uint32x8 ...) => (VPMOVZXDQ512 ...)
|
||||
(ConvertToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...)
|
||||
(ConvertToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...)
|
||||
(ConvertToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...)
|
||||
(ConvertToUint64x4Int16x8 ...) => (VPMOVSXWQ256 ...)
|
||||
(ConvertToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...)
|
||||
(ConvertToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...)
|
||||
(ConvertToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...)
|
||||
(CopySignInt8x16 ...) => (VPSIGNB128 ...)
|
||||
(CopySignInt8x32 ...) => (VPSIGNB256 ...)
|
||||
(CopySignInt16x8 ...) => (VPSIGNW128 ...)
|
||||
|
|
@ -1141,10 +1171,20 @@
|
|||
(VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512 [a] x mask)
|
||||
(VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512 [a] x mask)
|
||||
(VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
|
||||
(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask)
|
||||
(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask)
|
||||
(VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) => (VPMOVSXWQMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask)
|
||||
(VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask)
|
||||
(VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) => (VPMOVZXWQMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) => (VPMOVZXDQMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) => (VPMOVZXBQMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VDIVPS512 x y) mask) => (VDIVPSMasked512 x y mask)
|
||||
(VMOVDQU64Masked512 (VDIVPD512 x y) mask) => (VDIVPDMasked512 x y mask)
|
||||
(VMOVDQU16Masked512 (VPMADDWD512 x y) mask) => (VPMADDWDMasked512 x y mask)
|
||||
|
|
|
|||
|
|
@ -548,18 +548,78 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXBD512", argLength: 1, reg: w11, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXBDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXBDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXBDMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXBQ128", argLength: 1, reg: v11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXBQ256", argLength: 1, reg: v11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXBQ512", argLength: 1, reg: w11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXBQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXBQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXBQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXBW128", argLength: 1, reg: v11, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXBW256", argLength: 1, reg: v11, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXBW512", argLength: 1, reg: w11, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXDQ128", argLength: 1, reg: v11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXDQ256", argLength: 1, reg: v11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXDQ512", argLength: 1, reg: w11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXDQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXDQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXDQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXWD128", argLength: 1, reg: v11, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXWD256", argLength: 1, reg: v11, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXWD512", argLength: 1, reg: w11, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXWQ128", argLength: 1, reg: v11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXWQ256", argLength: 1, reg: v11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXWQ512", argLength: 1, reg: w11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXBD512", argLength: 1, reg: w11, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXBDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXBDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXBDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXBQ128", argLength: 1, reg: v11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXBQ256", argLength: 1, reg: v11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXBQ512", argLength: 1, reg: w11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXBQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXBQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXBQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXBW128", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXBW256", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXBW512", argLength: 1, reg: w11, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXDQ128", argLength: 1, reg: v11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXDQ256", argLength: 1, reg: v11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXDQ512", argLength: 1, reg: w11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXDQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXDQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXDQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXWD128", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXWD256", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXWD512", argLength: 1, reg: w11, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXWQ128", argLength: 1, reg: v11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXWQ256", argLength: 1, reg: v11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXWQ512", argLength: 1, reg: w11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMOVZXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMOVZXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMOVZXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -203,18 +203,48 @@ func simdGenericOps() []opData {
|
|||
{name: "CompressUint64x2", argLength: 2, commutative: false},
|
||||
{name: "CompressUint64x4", argLength: 2, commutative: false},
|
||||
{name: "CompressUint64x8", argLength: 2, commutative: false},
|
||||
{name: "ConvertToInt16Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt16Int8x32", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32Int16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32Int16x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32x4Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32x4Int16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt32x8Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64Int16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64Int32x4", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64Int32x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64x2Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64x2Int16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64x2Int32x4", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64x4Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToInt64x8Int8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32x4Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint32x8Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64Uint32x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x2Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x2Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x2Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x4Int16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x4Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x4Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "ConvertToUint64x8Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "CopySignInt8x16", argLength: 2, commutative: false},
|
||||
{name: "CopySignInt8x32", argLength: 2, commutative: false},
|
||||
{name: "CopySignInt16x8", argLength: 2, commutative: false},
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1370,6 +1370,15 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpConstBool(v)
|
||||
case OpConstNil:
|
||||
return rewriteValueAMD64_OpConstNil(v)
|
||||
case OpConvertToInt16Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBW256
|
||||
return true
|
||||
case OpConvertToInt16Int8x32:
|
||||
v.Op = OpAMD64VPMOVSXBW512
|
||||
return true
|
||||
case OpConvertToInt16x8Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBW128
|
||||
return true
|
||||
case OpConvertToInt32Float32x16:
|
||||
v.Op = OpAMD64VCVTTPS2DQ512
|
||||
return true
|
||||
|
|
@ -1379,6 +1388,48 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpConvertToInt32Float32x8:
|
||||
v.Op = OpAMD64VCVTTPS2DQ256
|
||||
return true
|
||||
case OpConvertToInt32Int16x16:
|
||||
v.Op = OpAMD64VPMOVSXWD512
|
||||
return true
|
||||
case OpConvertToInt32Int16x8:
|
||||
v.Op = OpAMD64VPMOVSXWD256
|
||||
return true
|
||||
case OpConvertToInt32Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBD512
|
||||
return true
|
||||
case OpConvertToInt32x4Int16x8:
|
||||
v.Op = OpAMD64VPMOVSXWD128
|
||||
return true
|
||||
case OpConvertToInt32x4Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBD128
|
||||
return true
|
||||
case OpConvertToInt32x8Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBD256
|
||||
return true
|
||||
case OpConvertToInt64Int16x8:
|
||||
v.Op = OpAMD64VPMOVSXWQ512
|
||||
return true
|
||||
case OpConvertToInt64Int32x4:
|
||||
v.Op = OpAMD64VPMOVSXDQ256
|
||||
return true
|
||||
case OpConvertToInt64Int32x8:
|
||||
v.Op = OpAMD64VPMOVSXDQ512
|
||||
return true
|
||||
case OpConvertToInt64x2Int16x8:
|
||||
v.Op = OpAMD64VPMOVSXWQ128
|
||||
return true
|
||||
case OpConvertToInt64x2Int32x4:
|
||||
v.Op = OpAMD64VPMOVSXDQ128
|
||||
return true
|
||||
case OpConvertToInt64x2Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBQ128
|
||||
return true
|
||||
case OpConvertToInt64x4Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBQ256
|
||||
return true
|
||||
case OpConvertToInt64x8Int8x16:
|
||||
v.Op = OpAMD64VPMOVSXBQ512
|
||||
return true
|
||||
case OpConvertToUint16Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBW256
|
||||
return true
|
||||
|
|
@ -1403,9 +1454,48 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpConvertToUint32Uint16x8:
|
||||
v.Op = OpAMD64VPMOVZXWD256
|
||||
return true
|
||||
case OpConvertToUint32Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBD512
|
||||
return true
|
||||
case OpConvertToUint32x4Uint16x8:
|
||||
v.Op = OpAMD64VPMOVZXWD128
|
||||
return true
|
||||
case OpConvertToUint32x4Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBD128
|
||||
return true
|
||||
case OpConvertToUint32x8Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBD256
|
||||
return true
|
||||
case OpConvertToUint64Uint16x8:
|
||||
v.Op = OpAMD64VPMOVZXWQ512
|
||||
return true
|
||||
case OpConvertToUint64Uint32x4:
|
||||
v.Op = OpAMD64VPMOVZXDQ256
|
||||
return true
|
||||
case OpConvertToUint64Uint32x8:
|
||||
v.Op = OpAMD64VPMOVZXDQ512
|
||||
return true
|
||||
case OpConvertToUint64x2Uint16x8:
|
||||
v.Op = OpAMD64VPMOVZXWQ128
|
||||
return true
|
||||
case OpConvertToUint64x2Uint32x4:
|
||||
v.Op = OpAMD64VPMOVZXDQ128
|
||||
return true
|
||||
case OpConvertToUint64x2Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBQ128
|
||||
return true
|
||||
case OpConvertToUint64x4Int16x8:
|
||||
v.Op = OpAMD64VPMOVSXWQ256
|
||||
return true
|
||||
case OpConvertToUint64x4Uint16x8:
|
||||
v.Op = OpAMD64VPMOVZXWQ256
|
||||
return true
|
||||
case OpConvertToUint64x4Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBQ256
|
||||
return true
|
||||
case OpConvertToUint64x8Uint8x16:
|
||||
v.Op = OpAMD64VPMOVZXBQ512
|
||||
return true
|
||||
case OpCopySignInt16x16:
|
||||
v.Op = OpAMD64VPSIGNW256
|
||||
return true
|
||||
|
|
@ -26103,6 +26193,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
|
|||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask)
|
||||
// result: (VPMOVSXWDMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVSXWD512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVSXWDMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask)
|
||||
// result: (VPMOVSXWQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVSXWQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVSXWQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask)
|
||||
// result: (VPMOVZXWDMasked512 x mask)
|
||||
for {
|
||||
|
|
@ -26115,6 +26229,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
|
|||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask)
|
||||
// result: (VPMOVZXWQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVZXWQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVZXWQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask)
|
||||
// result: (VPMADDWDMasked512 x y mask)
|
||||
for {
|
||||
|
|
@ -26677,6 +26803,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
|||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask)
|
||||
// result: (VPMOVSXDQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVSXDQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVSXDQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask)
|
||||
// result: (VCVTPS2UDQMasked512 x mask)
|
||||
for {
|
||||
|
|
@ -26689,6 +26827,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
|||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask)
|
||||
// result: (VPMOVZXDQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVZXDQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVZXDQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask)
|
||||
// result: (VDIVPSMasked512 x y mask)
|
||||
for {
|
||||
|
|
@ -28007,6 +28157,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
|
|||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask)
|
||||
// result: (VPMOVSXBWMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVSXBW512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVSXBWMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask)
|
||||
// result: (VPMOVSXBDMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVSXBD512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVSXBDMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask)
|
||||
// result: (VPMOVSXBQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVSXBQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVSXBQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask)
|
||||
// result: (VPMOVZXBWMasked512 x mask)
|
||||
for {
|
||||
|
|
@ -28019,6 +28205,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
|
|||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask)
|
||||
// result: (VPMOVZXBDMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVZXBD512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVZXBDMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask)
|
||||
// result: (VPMOVZXBQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVZXBQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPMOVZXBQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask)
|
||||
// result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask)
|
||||
for {
|
||||
|
|
|
|||
|
|
@ -223,18 +223,48 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt32x8", opLen1(ssa.OpConvertToInt32x8Int8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt64x4", opLen1(ssa.OpConvertToInt64x4Int8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.ConvertToInt64x8", opLen1(ssa.OpConvertToInt64x8Int8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint32x8", opLen1(ssa.OpConvertToUint32x8Uint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Int16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.ConvertToUint64x8", opLen1(ssa.OpConvertToUint64x8Uint8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -1,20 +1,57 @@
|
|||
!sum
|
||||
# Non-truncating conversions
|
||||
# Could be widening int<->int or uint<->uint conversions or float<->int|uint conversions.
|
||||
# int<->int or uint<->uint widening or float<->int|uint conversions.
|
||||
- go: ConvertToInt16
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts element values to int16.
|
||||
- go: ConvertToInt32
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts element values to int32.
|
||||
- go: ConvertToUint32
|
||||
- go: ConvertToInt64
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts element values to uint32.
|
||||
// NAME converts element values to int64.
|
||||
- go: ConvertToUint16
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts element values to uint16.
|
||||
- go: ConvertToUint32
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts element values to uint32.
|
||||
- go: ConvertToUint64
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts element values to uint64.
|
||||
|
||||
# Truncating conversions, int<->int or uint<->uint.
|
||||
# Truncating conversions
|
||||
# int<->int or uint<->uint widening conversions.
|
||||
- go: ConvertToInt16x8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 8 lowest vector element values to int16.
|
||||
- go: ConvertToInt32x4
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 4 lowest vector element values to int32.
|
||||
- go: ConvertToInt32x8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 8 lowest vector element values to int32.
|
||||
- go: ConvertToInt64x2
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 2 lowest vector element values to int64.
|
||||
- go: ConvertToInt64x4
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 4 lowest vector element values to int64.
|
||||
- go: ConvertToInt64x8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 8 lowest vector element values to int64.
|
||||
- go: ConvertToUint16x8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
|
|
@ -23,3 +60,19 @@
|
|||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 4 lowest vector element values to uint32.
|
||||
- go: ConvertToUint32x8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 8 lowest vector element values to uint32.
|
||||
- go: ConvertToUint64x2
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 2 lowest vector element values to uint64.
|
||||
- go: ConvertToUint64x4
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 4 lowest vector element values to uint64.
|
||||
- go: ConvertToUint64x8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME converts 8 lowest vector element values to uint64.
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
!sum
|
||||
# Float <-> Int conversions
|
||||
# TODO: this right now only has Float32 -> Int32|Uint32, more to add.
|
||||
# float32 -> int32
|
||||
- go: ConvertToInt32
|
||||
asm: "VCVTTPS2DQ"
|
||||
in:
|
||||
|
|
@ -12,6 +12,7 @@
|
|||
go: $u
|
||||
base: int
|
||||
elemBits: 32
|
||||
# float32 -> uint32
|
||||
- go: ConvertToUint32
|
||||
asm: "VCVTPS2UDQ"
|
||||
in:
|
||||
|
|
@ -22,8 +23,8 @@
|
|||
base: uint
|
||||
elemBits: 32
|
||||
|
||||
# Uint -> Uint widening conversions.
|
||||
# TODO: this right now only has uint8 -> uint16 and uint16->uint32.
|
||||
# Widening integer conversions.
|
||||
# uint8 -> uint16
|
||||
- go: ConvertToUint16
|
||||
asm: "VPMOVZXBW"
|
||||
in:
|
||||
|
|
@ -36,7 +37,6 @@
|
|||
base: uint
|
||||
elemBits: 16
|
||||
bits: 256
|
||||
|
||||
- go: ConvertToUint16
|
||||
asm: "VPMOVZXBW"
|
||||
in:
|
||||
|
|
@ -49,7 +49,32 @@
|
|||
base: uint
|
||||
elemBits: 16
|
||||
bits: 512
|
||||
|
||||
# int8 -> int16
|
||||
- go: ConvertToInt16
|
||||
asm: "VPMOVSXBW"
|
||||
in:
|
||||
- &i8x16
|
||||
base: int
|
||||
elemBits: 8
|
||||
bits: 128
|
||||
out:
|
||||
- &i16x16
|
||||
base: int
|
||||
elemBits: 16
|
||||
bits: 256
|
||||
- go: ConvertToInt16
|
||||
asm: "VPMOVSXBW"
|
||||
in:
|
||||
- &i8x32
|
||||
base: int
|
||||
elemBits: 8
|
||||
bits: 256
|
||||
out:
|
||||
- &i16x32
|
||||
base: int
|
||||
elemBits: 16
|
||||
bits: 512
|
||||
# uint16->uint32
|
||||
- go: ConvertToUint32
|
||||
asm: "VPMOVZXWD"
|
||||
in:
|
||||
|
|
@ -62,7 +87,6 @@
|
|||
base: uint
|
||||
elemBits: 32
|
||||
bits: 256
|
||||
|
||||
- go: ConvertToUint32
|
||||
asm: "VPMOVZXWD"
|
||||
in:
|
||||
|
|
@ -72,21 +96,237 @@
|
|||
base: uint
|
||||
elemBits: 32
|
||||
bits: 512
|
||||
# int16->int32
|
||||
- go: ConvertToInt32
|
||||
asm: "VPMOVSXWD"
|
||||
in:
|
||||
- &i16x8
|
||||
base: int
|
||||
elemBits: 16
|
||||
bits: 128
|
||||
out:
|
||||
- &i32x8
|
||||
base: int
|
||||
elemBits: 32
|
||||
bits: 256
|
||||
- go: ConvertToInt32
|
||||
asm: "VPMOVSXWD"
|
||||
in:
|
||||
- *i16x16
|
||||
out:
|
||||
- &i32x16
|
||||
base: int
|
||||
elemBits: 32
|
||||
bits: 512
|
||||
# uint32 -> uint64
|
||||
- go: ConvertToUint64
|
||||
asm: "VPMOVZXDQ"
|
||||
in:
|
||||
- &u32x4
|
||||
base: uint
|
||||
elemBits: 32
|
||||
bits: 128
|
||||
out:
|
||||
- &u64x4
|
||||
base: uint
|
||||
elemBits: 64
|
||||
bits: 256
|
||||
- go: ConvertToUint64
|
||||
asm: "VPMOVZXDQ"
|
||||
in:
|
||||
- *u32x8
|
||||
out:
|
||||
- &u64x8
|
||||
base: uint
|
||||
elemBits: 64
|
||||
bits: 512
|
||||
# int32 -> int64
|
||||
- go: ConvertToInt64
|
||||
asm: "VPMOVSXDQ"
|
||||
in:
|
||||
- &i32x4
|
||||
base: int
|
||||
elemBits: 32
|
||||
bits: 128
|
||||
out:
|
||||
- &i64x4
|
||||
base: int
|
||||
elemBits: 64
|
||||
bits: 256
|
||||
- go: ConvertToInt64
|
||||
asm: "VPMOVSXDQ"
|
||||
in:
|
||||
- *i32x8
|
||||
out:
|
||||
- &i64x8
|
||||
base: int
|
||||
elemBits: 64
|
||||
bits: 512
|
||||
# uint16 -> uint64
|
||||
- go: ConvertToUint64
|
||||
asm: "VPMOVZXWQ"
|
||||
in:
|
||||
- *u16x8
|
||||
out:
|
||||
- *u64x8
|
||||
# int16 -> int64
|
||||
- go: ConvertToInt64
|
||||
asm: "VPMOVSXWQ"
|
||||
in:
|
||||
- *i16x8
|
||||
out:
|
||||
- *i64x8
|
||||
# uint8 -> uint32
|
||||
- go: ConvertToUint32
|
||||
asm: "VPMOVZXBD"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u32x16
|
||||
# int8 -> int32
|
||||
- go: ConvertToInt32
|
||||
asm: "VPMOVSXBD"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i32x16
|
||||
|
||||
# Truncating conversions.
|
||||
# TODO: this right now only has uint8->uint16 and uint16->uint32.
|
||||
# uint8->uint16
|
||||
- go: ConvertToUint16x8
|
||||
asm: "VPMOVZXBW"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u16x8
|
||||
# int8->int16
|
||||
- go: ConvertToInt16x8
|
||||
asm: "VPMOVSXBW"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i16x8
|
||||
# uint16->uint32
|
||||
- go: ConvertToUint32x4
|
||||
asm: "VPMOVZXWD"
|
||||
in:
|
||||
- *u16x8
|
||||
out:
|
||||
- &u32x4
|
||||
- *u32x4
|
||||
# int16->int32
|
||||
- go: ConvertToInt32x4
|
||||
asm: "VPMOVSXWD"
|
||||
in:
|
||||
- *i16x8
|
||||
out:
|
||||
- *i32x4
|
||||
# uint32 -> uint64
|
||||
- go: ConvertToUint64x2
|
||||
asm: "VPMOVZXDQ"
|
||||
in:
|
||||
- *u32x4
|
||||
out:
|
||||
- &u64x2
|
||||
base: uint
|
||||
elemBits: 32
|
||||
elemBits: 64
|
||||
bits: 128
|
||||
# int32 -> int64
|
||||
- go: ConvertToInt64x2
|
||||
asm: "VPMOVSXDQ"
|
||||
in:
|
||||
- *i32x4
|
||||
out:
|
||||
- &i64x2
|
||||
base: int
|
||||
elemBits: 64
|
||||
bits: 128
|
||||
# uint16 -> uint64
|
||||
- go: ConvertToUint64x2
|
||||
asm: "VPMOVZXWQ"
|
||||
in:
|
||||
- *u16x8
|
||||
out:
|
||||
- *u64x2
|
||||
- go: ConvertToUint64x4
|
||||
asm: "VPMOVZXWQ"
|
||||
in:
|
||||
- *u16x8
|
||||
out:
|
||||
- *u64x4
|
||||
# int16 -> int64
|
||||
- go: ConvertToInt64x2
|
||||
asm: "VPMOVSXWQ"
|
||||
in:
|
||||
- *i16x8
|
||||
out:
|
||||
- *i64x2
|
||||
- go: ConvertToUint64x4
|
||||
asm: "VPMOVSXWQ"
|
||||
in:
|
||||
- *i16x8
|
||||
out:
|
||||
- *i64x4
|
||||
# uint8 -> uint32
|
||||
- go: ConvertToUint32x4
|
||||
asm: "VPMOVZXBD"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u32x4
|
||||
- go: ConvertToUint32x8
|
||||
asm: "VPMOVZXBD"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u32x8
|
||||
# int8 -> int32
|
||||
- go: ConvertToInt32x4
|
||||
asm: "VPMOVSXBD"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i32x4
|
||||
- go: ConvertToInt32x8
|
||||
asm: "VPMOVSXBD"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i32x8
|
||||
# uint8 -> uint64
|
||||
- go: ConvertToUint64x2
|
||||
asm: "VPMOVZXBQ"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u64x2
|
||||
- go: ConvertToUint64x4
|
||||
asm: "VPMOVZXBQ"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u64x4
|
||||
- go: ConvertToUint64x8
|
||||
asm: "VPMOVZXBQ"
|
||||
in:
|
||||
- *u8x16
|
||||
out:
|
||||
- *u64x8
|
||||
# int8 -> int64
|
||||
- go: ConvertToInt64x2
|
||||
asm: "VPMOVSXBQ"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i64x2
|
||||
- go: ConvertToInt64x4
|
||||
asm: "VPMOVSXBQ"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i64x4
|
||||
- go: ConvertToInt64x8
|
||||
asm: "VPMOVSXBQ"
|
||||
in:
|
||||
- *i8x16
|
||||
out:
|
||||
- *i64x8
|
||||
|
|
@ -1195,6 +1195,25 @@ func (x Uint64x4) Compress(mask Mask64x4) Uint64x4
|
|||
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
|
||||
func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
|
||||
|
||||
/* ConvertToInt16 */
|
||||
|
||||
// ConvertToInt16 converts element values to int16.
|
||||
//
|
||||
// Asm: VPMOVSXBW, CPU Feature: AVX2
|
||||
func (x Int8x16) ConvertToInt16() Int16x16
|
||||
|
||||
// ConvertToInt16 converts element values to int16.
|
||||
//
|
||||
// Asm: VPMOVSXBW, CPU Feature: AVX512
|
||||
func (x Int8x32) ConvertToInt16() Int16x32
|
||||
|
||||
/* ConvertToInt16x8 */
|
||||
|
||||
// ConvertToInt16x8 converts 8 lowest vector element values to int16.
|
||||
//
|
||||
// Asm: VPMOVSXBW, CPU Feature: AVX
|
||||
func (x Int8x16) ConvertToInt16x8() Int16x8
|
||||
|
||||
/* ConvertToInt32 */
|
||||
|
||||
// ConvertToInt32 converts element values to int32.
|
||||
|
|
@ -1212,6 +1231,88 @@ func (x Float32x8) ConvertToInt32() Int32x8
|
|||
// Asm: VCVTTPS2DQ, CPU Feature: AVX512
|
||||
func (x Float32x16) ConvertToInt32() Int32x16
|
||||
|
||||
// ConvertToInt32 converts element values to int32.
|
||||
//
|
||||
// Asm: VPMOVSXBD, CPU Feature: AVX512
|
||||
func (x Int8x16) ConvertToInt32() Int32x16
|
||||
|
||||
// ConvertToInt32 converts element values to int32.
|
||||
//
|
||||
// Asm: VPMOVSXWD, CPU Feature: AVX2
|
||||
func (x Int16x8) ConvertToInt32() Int32x8
|
||||
|
||||
// ConvertToInt32 converts element values to int32.
|
||||
//
|
||||
// Asm: VPMOVSXWD, CPU Feature: AVX512
|
||||
func (x Int16x16) ConvertToInt32() Int32x16
|
||||
|
||||
/* ConvertToInt32x4 */
|
||||
|
||||
// ConvertToInt32x4 converts 4 lowest vector element values to int32.
|
||||
//
|
||||
// Asm: VPMOVSXBD, CPU Feature: AVX
|
||||
func (x Int8x16) ConvertToInt32x4() Int32x4
|
||||
|
||||
// ConvertToInt32x4 converts 4 lowest vector element values to int32.
|
||||
//
|
||||
// Asm: VPMOVSXWD, CPU Feature: AVX
|
||||
func (x Int16x8) ConvertToInt32x4() Int32x4
|
||||
|
||||
/* ConvertToInt32x8 */
|
||||
|
||||
// ConvertToInt32x8 converts 8 lowest vector element values to int32.
|
||||
//
|
||||
// Asm: VPMOVSXBD, CPU Feature: AVX2
|
||||
func (x Int8x16) ConvertToInt32x8() Int32x8
|
||||
|
||||
/* ConvertToInt64 */
|
||||
|
||||
// ConvertToInt64 converts element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXWQ, CPU Feature: AVX512
|
||||
func (x Int16x8) ConvertToInt64() Int64x8
|
||||
|
||||
// ConvertToInt64 converts element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXDQ, CPU Feature: AVX2
|
||||
func (x Int32x4) ConvertToInt64() Int64x4
|
||||
|
||||
// ConvertToInt64 converts element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXDQ, CPU Feature: AVX512
|
||||
func (x Int32x8) ConvertToInt64() Int64x8
|
||||
|
||||
/* ConvertToInt64x2 */
|
||||
|
||||
// ConvertToInt64x2 converts 2 lowest vector element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXBQ, CPU Feature: AVX
|
||||
func (x Int8x16) ConvertToInt64x2() Int64x2
|
||||
|
||||
// ConvertToInt64x2 converts 2 lowest vector element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXWQ, CPU Feature: AVX
|
||||
func (x Int16x8) ConvertToInt64x2() Int64x2
|
||||
|
||||
// ConvertToInt64x2 converts 2 lowest vector element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXDQ, CPU Feature: AVX
|
||||
func (x Int32x4) ConvertToInt64x2() Int64x2
|
||||
|
||||
/* ConvertToInt64x4 */
|
||||
|
||||
// ConvertToInt64x4 converts 4 lowest vector element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXBQ, CPU Feature: AVX2
|
||||
func (x Int8x16) ConvertToInt64x4() Int64x4
|
||||
|
||||
/* ConvertToInt64x8 */
|
||||
|
||||
// ConvertToInt64x8 converts 8 lowest vector element values to int64.
|
||||
//
|
||||
// Asm: VPMOVSXBQ, CPU Feature: AVX512
|
||||
func (x Int8x16) ConvertToInt64x8() Int64x8
|
||||
|
||||
/* ConvertToUint16 */
|
||||
|
||||
// ConvertToUint16 converts element values to uint16.
|
||||
|
|
@ -1248,6 +1349,11 @@ func (x Float32x8) ConvertToUint32() Uint32x8
|
|||
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
||||
func (x Float32x16) ConvertToUint32() Uint32x16
|
||||
|
||||
// ConvertToUint32 converts element values to uint32.
|
||||
//
|
||||
// Asm: VPMOVZXBD, CPU Feature: AVX512
|
||||
func (x Uint8x16) ConvertToUint32() Uint32x16
|
||||
|
||||
// ConvertToUint32 converts element values to uint32.
|
||||
//
|
||||
// Asm: VPMOVZXWD, CPU Feature: AVX2
|
||||
|
|
@ -1260,11 +1366,81 @@ func (x Uint16x16) ConvertToUint32() Uint32x16
|
|||
|
||||
/* ConvertToUint32x4 */
|
||||
|
||||
// ConvertToUint32x4 converts 4 lowest vector element values to uint32.
|
||||
//
|
||||
// Asm: VPMOVZXBD, CPU Feature: AVX
|
||||
func (x Uint8x16) ConvertToUint32x4() Uint32x4
|
||||
|
||||
// ConvertToUint32x4 converts 4 lowest vector element values to uint32.
|
||||
//
|
||||
// Asm: VPMOVZXWD, CPU Feature: AVX
|
||||
func (x Uint16x8) ConvertToUint32x4() Uint32x4
|
||||
|
||||
/* ConvertToUint32x8 */
|
||||
|
||||
// ConvertToUint32x8 converts 8 lowest vector element values to uint32.
|
||||
//
|
||||
// Asm: VPMOVZXBD, CPU Feature: AVX2
|
||||
func (x Uint8x16) ConvertToUint32x8() Uint32x8
|
||||
|
||||
/* ConvertToUint64 */
|
||||
|
||||
// ConvertToUint64 converts element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXWQ, CPU Feature: AVX512
|
||||
func (x Uint16x8) ConvertToUint64() Uint64x8
|
||||
|
||||
// ConvertToUint64 converts element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXDQ, CPU Feature: AVX2
|
||||
func (x Uint32x4) ConvertToUint64() Uint64x4
|
||||
|
||||
// ConvertToUint64 converts element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXDQ, CPU Feature: AVX512
|
||||
func (x Uint32x8) ConvertToUint64() Uint64x8
|
||||
|
||||
/* ConvertToUint64x2 */
|
||||
|
||||
// ConvertToUint64x2 converts 2 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXBQ, CPU Feature: AVX
|
||||
func (x Uint8x16) ConvertToUint64x2() Uint64x2
|
||||
|
||||
// ConvertToUint64x2 converts 2 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXWQ, CPU Feature: AVX
|
||||
func (x Uint16x8) ConvertToUint64x2() Uint64x2
|
||||
|
||||
// ConvertToUint64x2 converts 2 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXDQ, CPU Feature: AVX
|
||||
func (x Uint32x4) ConvertToUint64x2() Uint64x2
|
||||
|
||||
/* ConvertToUint64x4 */
|
||||
|
||||
// ConvertToUint64x4 converts 4 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVSXWQ, CPU Feature: AVX2
|
||||
func (x Int16x8) ConvertToUint64x4() Int64x4
|
||||
|
||||
// ConvertToUint64x4 converts 4 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXBQ, CPU Feature: AVX2
|
||||
func (x Uint8x16) ConvertToUint64x4() Uint64x4
|
||||
|
||||
// ConvertToUint64x4 converts 4 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXWQ, CPU Feature: AVX2
|
||||
func (x Uint16x8) ConvertToUint64x4() Uint64x4
|
||||
|
||||
/* ConvertToUint64x8 */
|
||||
|
||||
// ConvertToUint64x8 converts 8 lowest vector element values to uint64.
|
||||
//
|
||||
// Asm: VPMOVZXBQ, CPU Feature: AVX512
|
||||
func (x Uint8x16) ConvertToUint64x8() Uint64x8
|
||||
|
||||
/* CopySign */
|
||||
|
||||
// CopySign returns the product of the first operand with -1, 0, or 1,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue