[dev.simd] cmd/compile, simd: complete u?int widening conversions

Change-Id: I21da09261b6b278768d99229fe2db387aef1e812
Reviewed-on: https://go-review.googlesource.com/c/go/+/697915
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-08-21 02:47:53 +00:00
parent 6af8881adb
commit f4c41d9922
10 changed files with 1993 additions and 14 deletions

View file

@ -41,18 +41,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPBROADCASTW512,
ssa.OpAMD64VPBROADCASTD512,
ssa.OpAMD64VPBROADCASTQ512,
ssa.OpAMD64VPMOVSXBW256,
ssa.OpAMD64VPMOVSXBW512,
ssa.OpAMD64VPMOVSXBW128,
ssa.OpAMD64VCVTTPS2DQ128,
ssa.OpAMD64VCVTTPS2DQ256,
ssa.OpAMD64VCVTTPS2DQ512,
ssa.OpAMD64VPMOVSXBD512,
ssa.OpAMD64VPMOVSXWD256,
ssa.OpAMD64VPMOVSXWD512,
ssa.OpAMD64VPMOVSXBD128,
ssa.OpAMD64VPMOVSXWD128,
ssa.OpAMD64VPMOVSXBD256,
ssa.OpAMD64VPMOVSXWQ512,
ssa.OpAMD64VPMOVSXDQ256,
ssa.OpAMD64VPMOVSXDQ512,
ssa.OpAMD64VPMOVSXBQ128,
ssa.OpAMD64VPMOVSXWQ128,
ssa.OpAMD64VPMOVSXDQ128,
ssa.OpAMD64VPMOVSXBQ256,
ssa.OpAMD64VPMOVSXBQ512,
ssa.OpAMD64VPMOVZXBW256,
ssa.OpAMD64VPMOVZXBW512,
ssa.OpAMD64VPMOVZXBW128,
ssa.OpAMD64VCVTPS2UDQ128,
ssa.OpAMD64VCVTPS2UDQ256,
ssa.OpAMD64VCVTPS2UDQ512,
ssa.OpAMD64VPMOVZXBD512,
ssa.OpAMD64VPMOVZXWD256,
ssa.OpAMD64VPMOVZXWD512,
ssa.OpAMD64VPMOVZXBD128,
ssa.OpAMD64VPMOVZXWD128,
ssa.OpAMD64VPMOVZXBD256,
ssa.OpAMD64VPMOVZXWQ512,
ssa.OpAMD64VPMOVZXDQ256,
ssa.OpAMD64VPMOVZXDQ512,
ssa.OpAMD64VPMOVZXBQ128,
ssa.OpAMD64VPMOVZXWQ128,
ssa.OpAMD64VPMOVZXDQ128,
ssa.OpAMD64VPMOVSXWQ256,
ssa.OpAMD64VPMOVZXBQ256,
ssa.OpAMD64VPMOVZXWQ256,
ssa.OpAMD64VPMOVZXBQ512,
ssa.OpAMD64VPOPCNTB128,
ssa.OpAMD64VPOPCNTB256,
ssa.OpAMD64VPOPCNTB512,
@ -685,18 +715,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPCOMPRESSQMasked128,
ssa.OpAMD64VPCOMPRESSQMasked256,
ssa.OpAMD64VPCOMPRESSQMasked512,
ssa.OpAMD64VPMOVSXBWMasked256,
ssa.OpAMD64VPMOVSXBWMasked512,
ssa.OpAMD64VPMOVSXBWMasked128,
ssa.OpAMD64VCVTTPS2DQMasked128,
ssa.OpAMD64VCVTTPS2DQMasked256,
ssa.OpAMD64VCVTTPS2DQMasked512,
ssa.OpAMD64VPMOVSXBDMasked512,
ssa.OpAMD64VPMOVSXWDMasked256,
ssa.OpAMD64VPMOVSXWDMasked512,
ssa.OpAMD64VPMOVSXBDMasked128,
ssa.OpAMD64VPMOVSXWDMasked128,
ssa.OpAMD64VPMOVSXBDMasked256,
ssa.OpAMD64VPMOVSXWQMasked512,
ssa.OpAMD64VPMOVSXDQMasked256,
ssa.OpAMD64VPMOVSXDQMasked512,
ssa.OpAMD64VPMOVSXBQMasked128,
ssa.OpAMD64VPMOVSXWQMasked128,
ssa.OpAMD64VPMOVSXDQMasked128,
ssa.OpAMD64VPMOVSXBQMasked256,
ssa.OpAMD64VPMOVSXBQMasked512,
ssa.OpAMD64VPMOVZXBWMasked256,
ssa.OpAMD64VPMOVZXBWMasked512,
ssa.OpAMD64VPMOVZXBWMasked128,
ssa.OpAMD64VCVTPS2UDQMasked128,
ssa.OpAMD64VCVTPS2UDQMasked256,
ssa.OpAMD64VCVTPS2UDQMasked512,
ssa.OpAMD64VPMOVZXBDMasked512,
ssa.OpAMD64VPMOVZXWDMasked256,
ssa.OpAMD64VPMOVZXWDMasked512,
ssa.OpAMD64VPMOVZXBDMasked128,
ssa.OpAMD64VPMOVZXWDMasked128,
ssa.OpAMD64VPMOVZXBDMasked256,
ssa.OpAMD64VPMOVZXWQMasked512,
ssa.OpAMD64VPMOVZXDQMasked256,
ssa.OpAMD64VPMOVZXDQMasked512,
ssa.OpAMD64VPMOVZXBQMasked128,
ssa.OpAMD64VPMOVZXWQMasked128,
ssa.OpAMD64VPMOVZXDQMasked128,
ssa.OpAMD64VPMOVSXWQMasked256,
ssa.OpAMD64VPMOVZXBQMasked256,
ssa.OpAMD64VPMOVZXWQMasked256,
ssa.OpAMD64VPMOVZXBQMasked512,
ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512,
@ -1307,18 +1367,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPCOMPRESSQMasked128,
ssa.OpAMD64VPCOMPRESSQMasked256,
ssa.OpAMD64VPCOMPRESSQMasked512,
ssa.OpAMD64VPMOVSXBWMasked256,
ssa.OpAMD64VPMOVSXBWMasked512,
ssa.OpAMD64VPMOVSXBWMasked128,
ssa.OpAMD64VCVTTPS2DQMasked128,
ssa.OpAMD64VCVTTPS2DQMasked256,
ssa.OpAMD64VCVTTPS2DQMasked512,
ssa.OpAMD64VPMOVSXBDMasked512,
ssa.OpAMD64VPMOVSXWDMasked256,
ssa.OpAMD64VPMOVSXWDMasked512,
ssa.OpAMD64VPMOVSXBDMasked128,
ssa.OpAMD64VPMOVSXWDMasked128,
ssa.OpAMD64VPMOVSXBDMasked256,
ssa.OpAMD64VPMOVSXWQMasked512,
ssa.OpAMD64VPMOVSXDQMasked256,
ssa.OpAMD64VPMOVSXDQMasked512,
ssa.OpAMD64VPMOVSXBQMasked128,
ssa.OpAMD64VPMOVSXWQMasked128,
ssa.OpAMD64VPMOVSXDQMasked128,
ssa.OpAMD64VPMOVSXBQMasked256,
ssa.OpAMD64VPMOVSXBQMasked512,
ssa.OpAMD64VPMOVZXBWMasked256,
ssa.OpAMD64VPMOVZXBWMasked512,
ssa.OpAMD64VPMOVZXBWMasked128,
ssa.OpAMD64VCVTPS2UDQMasked128,
ssa.OpAMD64VCVTPS2UDQMasked256,
ssa.OpAMD64VCVTPS2UDQMasked512,
ssa.OpAMD64VPMOVZXBDMasked512,
ssa.OpAMD64VPMOVZXWDMasked256,
ssa.OpAMD64VPMOVZXWDMasked512,
ssa.OpAMD64VPMOVZXBDMasked128,
ssa.OpAMD64VPMOVZXWDMasked128,
ssa.OpAMD64VPMOVZXBDMasked256,
ssa.OpAMD64VPMOVZXWQMasked512,
ssa.OpAMD64VPMOVZXDQMasked256,
ssa.OpAMD64VPMOVZXDQMasked512,
ssa.OpAMD64VPMOVZXBQMasked128,
ssa.OpAMD64VPMOVZXWQMasked128,
ssa.OpAMD64VPMOVZXDQMasked128,
ssa.OpAMD64VPMOVSXWQMasked256,
ssa.OpAMD64VPMOVZXBQMasked256,
ssa.OpAMD64VPMOVZXWQMasked256,
ssa.OpAMD64VPMOVZXBQMasked512,
ssa.OpAMD64VDIVPSMasked128,
ssa.OpAMD64VDIVPSMasked256,
ssa.OpAMD64VDIVPSMasked512,

View file

@ -211,18 +211,48 @@
(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...)
(ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...)
(ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...)
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
(ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
(ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...)
(ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...)
(ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...)
(ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...)
(ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...)
(ConvertToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...)
(ConvertToInt64Int16x8 ...) => (VPMOVSXWQ512 ...)
(ConvertToInt64Int32x4 ...) => (VPMOVSXDQ256 ...)
(ConvertToInt64Int32x8 ...) => (VPMOVSXDQ512 ...)
(ConvertToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...)
(ConvertToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...)
(ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...)
(ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...)
(ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...)
(ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...)
(ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...)
(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
(ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
(ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...)
(ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...)
(ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...)
(ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...)
(ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...)
(ConvertToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...)
(ConvertToUint64Uint16x8 ...) => (VPMOVZXWQ512 ...)
(ConvertToUint64Uint32x4 ...) => (VPMOVZXDQ256 ...)
(ConvertToUint64Uint32x8 ...) => (VPMOVZXDQ512 ...)
(ConvertToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...)
(ConvertToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...)
(ConvertToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...)
(ConvertToUint64x4Int16x8 ...) => (VPMOVSXWQ256 ...)
(ConvertToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...)
(ConvertToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...)
(ConvertToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...)
(CopySignInt8x16 ...) => (VPSIGNB128 ...)
(CopySignInt8x32 ...) => (VPSIGNB256 ...)
(CopySignInt16x8 ...) => (VPSIGNW128 ...)
@ -1141,10 +1171,20 @@
(VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512 [a] x mask)
(VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512 [a] x mask)
(VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask)
(VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512 x mask)
(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask)
(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask)
(VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) => (VPMOVSXWQMasked512 x mask)
(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask)
(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask)
(VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask)
(VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512 x mask)
(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask)
(VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask)
(VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) => (VPMOVZXWQMasked512 x mask)
(VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) => (VPMOVZXDQMasked512 x mask)
(VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) => (VPMOVZXBQMasked512 x mask)
(VMOVDQU32Masked512 (VDIVPS512 x y) mask) => (VDIVPSMasked512 x y mask)
(VMOVDQU64Masked512 (VDIVPD512 x y) mask) => (VDIVPDMasked512 x y mask)
(VMOVDQU16Masked512 (VPMADDWD512 x y) mask) => (VPMADDWDMasked512 x y mask)

View file

@ -548,18 +548,78 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXBD512", argLength: 1, reg: w11, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXBDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXBDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXBDMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXBQ128", argLength: 1, reg: v11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXBQ256", argLength: 1, reg: v11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXBQ512", argLength: 1, reg: w11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXBQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXBQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXBQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXBW128", argLength: 1, reg: v11, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXBW256", argLength: 1, reg: v11, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXBW512", argLength: 1, reg: w11, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXDQ128", argLength: 1, reg: v11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXDQ256", argLength: 1, reg: v11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXDQ512", argLength: 1, reg: w11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXDQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXDQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXDQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXWD128", argLength: 1, reg: v11, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXWD256", argLength: 1, reg: v11, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXWD512", argLength: 1, reg: w11, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXWQ128", argLength: 1, reg: v11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXWQ256", argLength: 1, reg: v11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXWQ512", argLength: 1, reg: w11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXBD512", argLength: 1, reg: w11, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXBDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXBDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXBDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXBQ128", argLength: 1, reg: v11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXBQ256", argLength: 1, reg: v11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXBQ512", argLength: 1, reg: w11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXBQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXBQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXBQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXBW128", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXBW256", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXBW512", argLength: 1, reg: w11, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXDQ128", argLength: 1, reg: v11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXDQ256", argLength: 1, reg: v11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXDQ512", argLength: 1, reg: w11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXDQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXDQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXDQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXWD128", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXWD256", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXWD512", argLength: 1, reg: w11, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXWQ128", argLength: 1, reg: v11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXWQ256", argLength: 1, reg: v11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXWQ512", argLength: 1, reg: w11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMOVZXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVZXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVZXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},

View file

@ -203,18 +203,48 @@ func simdGenericOps() []opData {
{name: "CompressUint64x2", argLength: 2, commutative: false},
{name: "CompressUint64x4", argLength: 2, commutative: false},
{name: "CompressUint64x8", argLength: 2, commutative: false},
{name: "ConvertToInt16Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt16Int8x32", argLength: 1, commutative: false},
{name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
{name: "ConvertToInt32Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt32Int16x8", argLength: 1, commutative: false},
{name: "ConvertToInt32Int16x16", argLength: 1, commutative: false},
{name: "ConvertToInt32x4Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt32x4Int16x8", argLength: 1, commutative: false},
{name: "ConvertToInt32x8Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt64Int16x8", argLength: 1, commutative: false},
{name: "ConvertToInt64Int32x4", argLength: 1, commutative: false},
{name: "ConvertToInt64Int32x8", argLength: 1, commutative: false},
{name: "ConvertToInt64x2Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt64x2Int16x8", argLength: 1, commutative: false},
{name: "ConvertToInt64x2Int32x4", argLength: 1, commutative: false},
{name: "ConvertToInt64x4Int8x16", argLength: 1, commutative: false},
{name: "ConvertToInt64x8Int8x16", argLength: 1, commutative: false},
{name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false},
{name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
{name: "ConvertToUint32Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false},
{name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false},
{name: "ConvertToUint32x4Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false},
{name: "ConvertToUint32x8Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint64Uint16x8", argLength: 1, commutative: false},
{name: "ConvertToUint64Uint32x4", argLength: 1, commutative: false},
{name: "ConvertToUint64Uint32x8", argLength: 1, commutative: false},
{name: "ConvertToUint64x2Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint64x2Uint16x8", argLength: 1, commutative: false},
{name: "ConvertToUint64x2Uint32x4", argLength: 1, commutative: false},
{name: "ConvertToUint64x4Int16x8", argLength: 1, commutative: false},
{name: "ConvertToUint64x4Uint8x16", argLength: 1, commutative: false},
{name: "ConvertToUint64x4Uint16x8", argLength: 1, commutative: false},
{name: "ConvertToUint64x8Uint8x16", argLength: 1, commutative: false},
{name: "CopySignInt8x16", argLength: 2, commutative: false},
{name: "CopySignInt8x32", argLength: 2, commutative: false},
{name: "CopySignInt16x8", argLength: 2, commutative: false},

File diff suppressed because it is too large Load diff

View file

@ -1370,6 +1370,15 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpConstBool(v)
case OpConstNil:
return rewriteValueAMD64_OpConstNil(v)
case OpConvertToInt16Int8x16:
v.Op = OpAMD64VPMOVSXBW256
return true
case OpConvertToInt16Int8x32:
v.Op = OpAMD64VPMOVSXBW512
return true
case OpConvertToInt16x8Int8x16:
v.Op = OpAMD64VPMOVSXBW128
return true
case OpConvertToInt32Float32x16:
v.Op = OpAMD64VCVTTPS2DQ512
return true
@ -1379,6 +1388,48 @@ func rewriteValueAMD64(v *Value) bool {
case OpConvertToInt32Float32x8:
v.Op = OpAMD64VCVTTPS2DQ256
return true
case OpConvertToInt32Int16x16:
v.Op = OpAMD64VPMOVSXWD512
return true
case OpConvertToInt32Int16x8:
v.Op = OpAMD64VPMOVSXWD256
return true
case OpConvertToInt32Int8x16:
v.Op = OpAMD64VPMOVSXBD512
return true
case OpConvertToInt32x4Int16x8:
v.Op = OpAMD64VPMOVSXWD128
return true
case OpConvertToInt32x4Int8x16:
v.Op = OpAMD64VPMOVSXBD128
return true
case OpConvertToInt32x8Int8x16:
v.Op = OpAMD64VPMOVSXBD256
return true
case OpConvertToInt64Int16x8:
v.Op = OpAMD64VPMOVSXWQ512
return true
case OpConvertToInt64Int32x4:
v.Op = OpAMD64VPMOVSXDQ256
return true
case OpConvertToInt64Int32x8:
v.Op = OpAMD64VPMOVSXDQ512
return true
case OpConvertToInt64x2Int16x8:
v.Op = OpAMD64VPMOVSXWQ128
return true
case OpConvertToInt64x2Int32x4:
v.Op = OpAMD64VPMOVSXDQ128
return true
case OpConvertToInt64x2Int8x16:
v.Op = OpAMD64VPMOVSXBQ128
return true
case OpConvertToInt64x4Int8x16:
v.Op = OpAMD64VPMOVSXBQ256
return true
case OpConvertToInt64x8Int8x16:
v.Op = OpAMD64VPMOVSXBQ512
return true
case OpConvertToUint16Uint8x16:
v.Op = OpAMD64VPMOVZXBW256
return true
@ -1403,9 +1454,48 @@ func rewriteValueAMD64(v *Value) bool {
case OpConvertToUint32Uint16x8:
v.Op = OpAMD64VPMOVZXWD256
return true
case OpConvertToUint32Uint8x16:
v.Op = OpAMD64VPMOVZXBD512
return true
case OpConvertToUint32x4Uint16x8:
v.Op = OpAMD64VPMOVZXWD128
return true
case OpConvertToUint32x4Uint8x16:
v.Op = OpAMD64VPMOVZXBD128
return true
case OpConvertToUint32x8Uint8x16:
v.Op = OpAMD64VPMOVZXBD256
return true
case OpConvertToUint64Uint16x8:
v.Op = OpAMD64VPMOVZXWQ512
return true
case OpConvertToUint64Uint32x4:
v.Op = OpAMD64VPMOVZXDQ256
return true
case OpConvertToUint64Uint32x8:
v.Op = OpAMD64VPMOVZXDQ512
return true
case OpConvertToUint64x2Uint16x8:
v.Op = OpAMD64VPMOVZXWQ128
return true
case OpConvertToUint64x2Uint32x4:
v.Op = OpAMD64VPMOVZXDQ128
return true
case OpConvertToUint64x2Uint8x16:
v.Op = OpAMD64VPMOVZXBQ128
return true
case OpConvertToUint64x4Int16x8:
v.Op = OpAMD64VPMOVSXWQ256
return true
case OpConvertToUint64x4Uint16x8:
v.Op = OpAMD64VPMOVZXWQ256
return true
case OpConvertToUint64x4Uint8x16:
v.Op = OpAMD64VPMOVZXBQ256
return true
case OpConvertToUint64x8Uint8x16:
v.Op = OpAMD64VPMOVZXBQ512
return true
case OpCopySignInt16x16:
v.Op = OpAMD64VPSIGNW256
return true
@ -26103,6 +26193,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask)
// result: (VPMOVSXWDMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVSXWD512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVSXWDMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask)
// result: (VPMOVSXWQMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVSXWQ512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVSXWQMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask)
// result: (VPMOVZXWDMasked512 x mask)
for {
@ -26115,6 +26229,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask)
// result: (VPMOVZXWQMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVZXWQ512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVZXWQMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask)
// result: (VPMADDWDMasked512 x y mask)
for {
@ -26677,6 +26803,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask)
// result: (VPMOVSXDQMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVSXDQ512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVSXDQMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask)
// result: (VCVTPS2UDQMasked512 x mask)
for {
@ -26689,6 +26827,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask)
// result: (VPMOVZXDQMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVZXDQ512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVZXDQMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask)
// result: (VDIVPSMasked512 x y mask)
for {
@ -28007,6 +28157,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask)
// result: (VPMOVSXBWMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVSXBW512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVSXBWMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask)
// result: (VPMOVSXBDMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVSXBD512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVSXBDMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask)
// result: (VPMOVSXBQMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVSXBQ512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVSXBQMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask)
// result: (VPMOVZXBWMasked512 x mask)
for {
@ -28019,6 +28205,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask)
// result: (VPMOVZXBDMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVZXBD512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVZXBDMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask)
// result: (VPMOVZXBQMasked512 x mask)
for {
if v_0.Op != OpAMD64VPMOVZXBQ512 {
break
}
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPMOVZXBQMasked512)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask)
// result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask)
for {

View file

@ -223,18 +223,48 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x32.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt32x8", opLen1(ssa.OpConvertToInt32x8Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt64x4", opLen1(ssa.OpConvertToInt64x4Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.ConvertToInt64x8", opLen1(ssa.OpConvertToInt64x8Int8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint32x8", opLen1(ssa.OpConvertToUint32x8Uint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Int16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x16.ConvertToUint64x8", opLen1(ssa.OpConvertToUint64x8Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)

View file

@ -1,20 +1,57 @@
!sum
# Non-truncating conversions
# Could be widening int<->int or uint<->uint conversions or float<->int|uint conversions.
# int<->int or uint<->uint widening or float<->int|uint conversions.
- go: ConvertToInt16
commutative: false
documentation: !string |-
// NAME converts element values to int16.
- go: ConvertToInt32
commutative: false
documentation: !string |-
// NAME converts element values to int32.
- go: ConvertToUint32
- go: ConvertToInt64
commutative: false
documentation: !string |-
// NAME converts element values to uint32.
// NAME converts element values to int64.
- go: ConvertToUint16
commutative: false
documentation: !string |-
// NAME converts element values to uint16.
- go: ConvertToUint32
commutative: false
documentation: !string |-
// NAME converts element values to uint32.
- go: ConvertToUint64
commutative: false
documentation: !string |-
// NAME converts element values to uint64.
# Truncating conversions, int<->int or uint<->uint.
# Truncating conversions
# int<->int or uint<->uint widening conversions.
- go: ConvertToInt16x8
commutative: false
documentation: !string |-
// NAME converts 8 lowest vector element values to int16.
- go: ConvertToInt32x4
commutative: false
documentation: !string |-
// NAME converts 4 lowest vector element values to int32.
- go: ConvertToInt32x8
commutative: false
documentation: !string |-
// NAME converts 8 lowest vector element values to int32.
- go: ConvertToInt64x2
commutative: false
documentation: !string |-
// NAME converts 2 lowest vector element values to int64.
- go: ConvertToInt64x4
commutative: false
documentation: !string |-
// NAME converts 4 lowest vector element values to int64.
- go: ConvertToInt64x8
commutative: false
documentation: !string |-
// NAME converts 8 lowest vector element values to int64.
- go: ConvertToUint16x8
commutative: false
documentation: !string |-
@ -23,3 +60,19 @@
commutative: false
documentation: !string |-
// NAME converts 4 lowest vector element values to uint32.
- go: ConvertToUint32x8
commutative: false
documentation: !string |-
// NAME converts 8 lowest vector element values to uint32.
- go: ConvertToUint64x2
commutative: false
documentation: !string |-
// NAME converts 2 lowest vector element values to uint64.
- go: ConvertToUint64x4
commutative: false
documentation: !string |-
// NAME converts 4 lowest vector element values to uint64.
- go: ConvertToUint64x8
commutative: false
documentation: !string |-
// NAME converts 8 lowest vector element values to uint64.

View file

@ -1,6 +1,6 @@
!sum
# Float <-> Int conversions
# TODO: this right now only has Float32 -> Int32|Uint32, more to add.
# float32 -> int32
- go: ConvertToInt32
asm: "VCVTTPS2DQ"
in:
@ -12,6 +12,7 @@
go: $u
base: int
elemBits: 32
# float32 -> uint32
- go: ConvertToUint32
asm: "VCVTPS2UDQ"
in:
@ -22,8 +23,8 @@
base: uint
elemBits: 32
# Uint -> Uint widening conversions.
# TODO: this right now only has uint8 -> uint16 and uint16->uint32.
# Widening integer conversions.
# uint8 -> uint16
- go: ConvertToUint16
asm: "VPMOVZXBW"
in:
@ -36,7 +37,6 @@
base: uint
elemBits: 16
bits: 256
- go: ConvertToUint16
asm: "VPMOVZXBW"
in:
@ -49,7 +49,32 @@
base: uint
elemBits: 16
bits: 512
# int8 -> int16
- go: ConvertToInt16
asm: "VPMOVSXBW"
in:
- &i8x16
base: int
elemBits: 8
bits: 128
out:
- &i16x16
base: int
elemBits: 16
bits: 256
- go: ConvertToInt16
asm: "VPMOVSXBW"
in:
- &i8x32
base: int
elemBits: 8
bits: 256
out:
- &i16x32
base: int
elemBits: 16
bits: 512
# uint16->uint32
- go: ConvertToUint32
asm: "VPMOVZXWD"
in:
@ -62,7 +87,6 @@
base: uint
elemBits: 32
bits: 256
- go: ConvertToUint32
asm: "VPMOVZXWD"
in:
@ -72,21 +96,237 @@
base: uint
elemBits: 32
bits: 512
# int16->int32
- go: ConvertToInt32
asm: "VPMOVSXWD"
in:
- &i16x8
base: int
elemBits: 16
bits: 128
out:
- &i32x8
base: int
elemBits: 32
bits: 256
- go: ConvertToInt32
asm: "VPMOVSXWD"
in:
- *i16x16
out:
- &i32x16
base: int
elemBits: 32
bits: 512
# uint32 -> uint64
- go: ConvertToUint64
asm: "VPMOVZXDQ"
in:
- &u32x4
base: uint
elemBits: 32
bits: 128
out:
- &u64x4
base: uint
elemBits: 64
bits: 256
- go: ConvertToUint64
asm: "VPMOVZXDQ"
in:
- *u32x8
out:
- &u64x8
base: uint
elemBits: 64
bits: 512
# int32 -> int64
- go: ConvertToInt64
asm: "VPMOVSXDQ"
in:
- &i32x4
base: int
elemBits: 32
bits: 128
out:
- &i64x4
base: int
elemBits: 64
bits: 256
- go: ConvertToInt64
asm: "VPMOVSXDQ"
in:
- *i32x8
out:
- &i64x8
base: int
elemBits: 64
bits: 512
# uint16 -> uint64
- go: ConvertToUint64
asm: "VPMOVZXWQ"
in:
- *u16x8
out:
- *u64x8
# int16 -> int64
- go: ConvertToInt64
asm: "VPMOVSXWQ"
in:
- *i16x8
out:
- *i64x8
# uint8 -> uint32
- go: ConvertToUint32
asm: "VPMOVZXBD"
in:
- *u8x16
out:
- *u32x16
# int8 -> int32
- go: ConvertToInt32
asm: "VPMOVSXBD"
in:
- *i8x16
out:
- *i32x16
# Truncating conversions.
# TODO: this right now only has uint8->uint16 and uint16->uint32.
# uint8->uint16
- go: ConvertToUint16x8
asm: "VPMOVZXBW"
in:
- *u8x16
out:
- *u16x8
# int8->int16
- go: ConvertToInt16x8
asm: "VPMOVSXBW"
in:
- *i8x16
out:
- *i16x8
# uint16->uint32
- go: ConvertToUint32x4
asm: "VPMOVZXWD"
in:
- *u16x8
out:
- &u32x4
- *u32x4
# int16->int32
- go: ConvertToInt32x4
asm: "VPMOVSXWD"
in:
- *i16x8
out:
- *i32x4
# uint32 -> uint64
- go: ConvertToUint64x2
asm: "VPMOVZXDQ"
in:
- *u32x4
out:
- &u64x2
base: uint
elemBits: 32
elemBits: 64
bits: 128
# int32 -> int64
- go: ConvertToInt64x2
asm: "VPMOVSXDQ"
in:
- *i32x4
out:
- &i64x2
base: int
elemBits: 64
bits: 128
# uint16 -> uint64
- go: ConvertToUint64x2
asm: "VPMOVZXWQ"
in:
- *u16x8
out:
- *u64x2
- go: ConvertToUint64x4
asm: "VPMOVZXWQ"
in:
- *u16x8
out:
- *u64x4
# int16 -> int64
- go: ConvertToInt64x2
asm: "VPMOVSXWQ"
in:
- *i16x8
out:
- *i64x2
- go: ConvertToUint64x4
asm: "VPMOVSXWQ"
in:
- *i16x8
out:
- *i64x4
# uint8 -> uint32
- go: ConvertToUint32x4
asm: "VPMOVZXBD"
in:
- *u8x16
out:
- *u32x4
- go: ConvertToUint32x8
asm: "VPMOVZXBD"
in:
- *u8x16
out:
- *u32x8
# int8 -> int32
- go: ConvertToInt32x4
asm: "VPMOVSXBD"
in:
- *i8x16
out:
- *i32x4
- go: ConvertToInt32x8
asm: "VPMOVSXBD"
in:
- *i8x16
out:
- *i32x8
# uint8 -> uint64
- go: ConvertToUint64x2
asm: "VPMOVZXBQ"
in:
- *u8x16
out:
- *u64x2
- go: ConvertToUint64x4
asm: "VPMOVZXBQ"
in:
- *u8x16
out:
- *u64x4
- go: ConvertToUint64x8
asm: "VPMOVZXBQ"
in:
- *u8x16
out:
- *u64x8
# int8 -> int64
- go: ConvertToInt64x2
asm: "VPMOVSXBQ"
in:
- *i8x16
out:
- *i64x2
- go: ConvertToInt64x4
asm: "VPMOVSXBQ"
in:
- *i8x16
out:
- *i64x4
- go: ConvertToInt64x8
asm: "VPMOVSXBQ"
in:
- *i8x16
out:
- *i64x8

View file

@ -1195,6 +1195,25 @@ func (x Uint64x4) Compress(mask Mask64x4) Uint64x4
// Asm: VPCOMPRESSQ, CPU Feature: AVX512
func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
/* ConvertToInt16 */
// ConvertToInt16 converts element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX2
func (x Int8x16) ConvertToInt16() Int16x16
// ConvertToInt16 converts element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX512
func (x Int8x32) ConvertToInt16() Int16x32
/* ConvertToInt16x8 */
// ConvertToInt16x8 converts 8 lowest vector element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX
func (x Int8x16) ConvertToInt16x8() Int16x8
/* ConvertToInt32 */
// ConvertToInt32 converts element values to int32.
@ -1212,6 +1231,88 @@ func (x Float32x8) ConvertToInt32() Int32x8
// Asm: VCVTTPS2DQ, CPU Feature: AVX512
func (x Float32x16) ConvertToInt32() Int32x16
// ConvertToInt32 converts element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX512
func (x Int8x16) ConvertToInt32() Int32x16
// ConvertToInt32 converts element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX2
func (x Int16x8) ConvertToInt32() Int32x8
// ConvertToInt32 converts element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX512
func (x Int16x16) ConvertToInt32() Int32x16
/* ConvertToInt32x4 */
// ConvertToInt32x4 converts 4 lowest vector element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX
func (x Int8x16) ConvertToInt32x4() Int32x4
// ConvertToInt32x4 converts 4 lowest vector element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX
func (x Int16x8) ConvertToInt32x4() Int32x4
/* ConvertToInt32x8 */
// ConvertToInt32x8 converts 8 lowest vector element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX2
func (x Int8x16) ConvertToInt32x8() Int32x8
/* ConvertToInt64 */
// ConvertToInt64 converts element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX512
func (x Int16x8) ConvertToInt64() Int64x8
// ConvertToInt64 converts element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX2
func (x Int32x4) ConvertToInt64() Int64x4
// ConvertToInt64 converts element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX512
func (x Int32x8) ConvertToInt64() Int64x8
/* ConvertToInt64x2 */
// ConvertToInt64x2 converts 2 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX
func (x Int8x16) ConvertToInt64x2() Int64x2
// ConvertToInt64x2 converts 2 lowest vector element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX
func (x Int16x8) ConvertToInt64x2() Int64x2
// ConvertToInt64x2 converts 2 lowest vector element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX
func (x Int32x4) ConvertToInt64x2() Int64x2
/* ConvertToInt64x4 */
// ConvertToInt64x4 converts 4 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX2
func (x Int8x16) ConvertToInt64x4() Int64x4
/* ConvertToInt64x8 */
// ConvertToInt64x8 converts 8 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX512
func (x Int8x16) ConvertToInt64x8() Int64x8
/* ConvertToUint16 */
// ConvertToUint16 converts element values to uint16.
@ -1248,6 +1349,11 @@ func (x Float32x8) ConvertToUint32() Uint32x8
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
func (x Float32x16) ConvertToUint32() Uint32x16
// ConvertToUint32 converts element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX512
func (x Uint8x16) ConvertToUint32() Uint32x16
// ConvertToUint32 converts element values to uint32.
//
// Asm: VPMOVZXWD, CPU Feature: AVX2
@ -1260,11 +1366,81 @@ func (x Uint16x16) ConvertToUint32() Uint32x16
/* ConvertToUint32x4 */
// ConvertToUint32x4 converts 4 lowest vector element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX
func (x Uint8x16) ConvertToUint32x4() Uint32x4
// ConvertToUint32x4 converts 4 lowest vector element values to uint32.
//
// Asm: VPMOVZXWD, CPU Feature: AVX
func (x Uint16x8) ConvertToUint32x4() Uint32x4
/* ConvertToUint32x8 */
// ConvertToUint32x8 converts 8 lowest vector element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX2
func (x Uint8x16) ConvertToUint32x8() Uint32x8
/* ConvertToUint64 */
// ConvertToUint64 converts element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX512
func (x Uint16x8) ConvertToUint64() Uint64x8
// ConvertToUint64 converts element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX2
func (x Uint32x4) ConvertToUint64() Uint64x4
// ConvertToUint64 converts element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX512
func (x Uint32x8) ConvertToUint64() Uint64x8
/* ConvertToUint64x2 */
// ConvertToUint64x2 converts 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX
func (x Uint8x16) ConvertToUint64x2() Uint64x2
// ConvertToUint64x2 converts 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX
func (x Uint16x8) ConvertToUint64x2() Uint64x2
// ConvertToUint64x2 converts 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX
func (x Uint32x4) ConvertToUint64x2() Uint64x2
/* ConvertToUint64x4 */
// ConvertToUint64x4 converts 4 lowest vector element values to uint64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX2
func (x Int16x8) ConvertToUint64x4() Int64x4
// ConvertToUint64x4 converts 4 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX2
func (x Uint8x16) ConvertToUint64x4() Uint64x4
// ConvertToUint64x4 converts 4 lowest vector element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX2
func (x Uint16x8) ConvertToUint64x4() Uint64x4
/* ConvertToUint64x8 */
// ConvertToUint64x8 converts 8 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX512
func (x Uint8x16) ConvertToUint64x8() Uint64x8
/* CopySign */
// CopySign returns the product of the first operand with -1, 0, or 1,