diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 5fc85457cf0..8674866df3d 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -41,18 +41,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPBROADCASTW512, ssa.OpAMD64VPBROADCASTD512, ssa.OpAMD64VPBROADCASTQ512, + ssa.OpAMD64VPMOVSXBW256, + ssa.OpAMD64VPMOVSXBW512, + ssa.OpAMD64VPMOVSXBW128, ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ256, ssa.OpAMD64VCVTTPS2DQ512, + ssa.OpAMD64VPMOVSXBD512, + ssa.OpAMD64VPMOVSXWD256, + ssa.OpAMD64VPMOVSXWD512, + ssa.OpAMD64VPMOVSXBD128, + ssa.OpAMD64VPMOVSXWD128, + ssa.OpAMD64VPMOVSXBD256, + ssa.OpAMD64VPMOVSXWQ512, + ssa.OpAMD64VPMOVSXDQ256, + ssa.OpAMD64VPMOVSXDQ512, + ssa.OpAMD64VPMOVSXBQ128, + ssa.OpAMD64VPMOVSXWQ128, + ssa.OpAMD64VPMOVSXDQ128, + ssa.OpAMD64VPMOVSXBQ256, + ssa.OpAMD64VPMOVSXBQ512, ssa.OpAMD64VPMOVZXBW256, ssa.OpAMD64VPMOVZXBW512, ssa.OpAMD64VPMOVZXBW128, ssa.OpAMD64VCVTPS2UDQ128, ssa.OpAMD64VCVTPS2UDQ256, ssa.OpAMD64VCVTPS2UDQ512, + ssa.OpAMD64VPMOVZXBD512, ssa.OpAMD64VPMOVZXWD256, ssa.OpAMD64VPMOVZXWD512, + ssa.OpAMD64VPMOVZXBD128, ssa.OpAMD64VPMOVZXWD128, + ssa.OpAMD64VPMOVZXBD256, + ssa.OpAMD64VPMOVZXWQ512, + ssa.OpAMD64VPMOVZXDQ256, + ssa.OpAMD64VPMOVZXDQ512, + ssa.OpAMD64VPMOVZXBQ128, + ssa.OpAMD64VPMOVZXWQ128, + ssa.OpAMD64VPMOVZXDQ128, + ssa.OpAMD64VPMOVSXWQ256, + ssa.OpAMD64VPMOVZXBQ256, + ssa.OpAMD64VPMOVZXWQ256, + ssa.OpAMD64VPMOVZXBQ512, ssa.OpAMD64VPOPCNTB128, ssa.OpAMD64VPOPCNTB256, ssa.OpAMD64VPOPCNTB512, @@ -685,18 +715,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, + ssa.OpAMD64VPMOVSXBWMasked256, + ssa.OpAMD64VPMOVSXBWMasked512, + ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, ssa.OpAMD64VCVTTPS2DQMasked512, + ssa.OpAMD64VPMOVSXBDMasked512, + ssa.OpAMD64VPMOVSXWDMasked256, + ssa.OpAMD64VPMOVSXWDMasked512, + ssa.OpAMD64VPMOVSXBDMasked128, + ssa.OpAMD64VPMOVSXWDMasked128, + ssa.OpAMD64VPMOVSXBDMasked256, + ssa.OpAMD64VPMOVSXWQMasked512, + ssa.OpAMD64VPMOVSXDQMasked256, + ssa.OpAMD64VPMOVSXDQMasked512, + ssa.OpAMD64VPMOVSXBQMasked128, + ssa.OpAMD64VPMOVSXWQMasked128, + ssa.OpAMD64VPMOVSXDQMasked128, + ssa.OpAMD64VPMOVSXBQMasked256, + ssa.OpAMD64VPMOVSXBQMasked512, ssa.OpAMD64VPMOVZXBWMasked256, ssa.OpAMD64VPMOVZXBWMasked512, ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked512, + ssa.OpAMD64VPMOVZXBDMasked512, ssa.OpAMD64VPMOVZXWDMasked256, ssa.OpAMD64VPMOVZXWDMasked512, + ssa.OpAMD64VPMOVZXBDMasked128, ssa.OpAMD64VPMOVZXWDMasked128, + ssa.OpAMD64VPMOVZXBDMasked256, + ssa.OpAMD64VPMOVZXWQMasked512, + ssa.OpAMD64VPMOVZXDQMasked256, + ssa.OpAMD64VPMOVZXDQMasked512, + ssa.OpAMD64VPMOVZXBQMasked128, + ssa.OpAMD64VPMOVZXWQMasked128, + ssa.OpAMD64VPMOVZXDQMasked128, + ssa.OpAMD64VPMOVSXWQMasked256, + ssa.OpAMD64VPMOVZXBQMasked256, + ssa.OpAMD64VPMOVZXWQMasked256, + ssa.OpAMD64VPMOVZXBQMasked512, ssa.OpAMD64VEXPANDPSMasked128, ssa.OpAMD64VEXPANDPSMasked256, ssa.OpAMD64VEXPANDPSMasked512, @@ -1307,18 +1367,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, + ssa.OpAMD64VPMOVSXBWMasked256, + ssa.OpAMD64VPMOVSXBWMasked512, + ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, ssa.OpAMD64VCVTTPS2DQMasked512, + ssa.OpAMD64VPMOVSXBDMasked512, + ssa.OpAMD64VPMOVSXWDMasked256, + ssa.OpAMD64VPMOVSXWDMasked512, + ssa.OpAMD64VPMOVSXBDMasked128, + ssa.OpAMD64VPMOVSXWDMasked128, + ssa.OpAMD64VPMOVSXBDMasked256, + ssa.OpAMD64VPMOVSXWQMasked512, + ssa.OpAMD64VPMOVSXDQMasked256, + ssa.OpAMD64VPMOVSXDQMasked512, + ssa.OpAMD64VPMOVSXBQMasked128, + ssa.OpAMD64VPMOVSXWQMasked128, + ssa.OpAMD64VPMOVSXDQMasked128, + ssa.OpAMD64VPMOVSXBQMasked256, + ssa.OpAMD64VPMOVSXBQMasked512, ssa.OpAMD64VPMOVZXBWMasked256, ssa.OpAMD64VPMOVZXBWMasked512, ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked512, + ssa.OpAMD64VPMOVZXBDMasked512, ssa.OpAMD64VPMOVZXWDMasked256, ssa.OpAMD64VPMOVZXWDMasked512, + ssa.OpAMD64VPMOVZXBDMasked128, ssa.OpAMD64VPMOVZXWDMasked128, + ssa.OpAMD64VPMOVZXBDMasked256, + ssa.OpAMD64VPMOVZXWQMasked512, + ssa.OpAMD64VPMOVZXDQMasked256, + ssa.OpAMD64VPMOVZXDQMasked512, + ssa.OpAMD64VPMOVZXBQMasked128, + ssa.OpAMD64VPMOVZXWQMasked128, + ssa.OpAMD64VPMOVZXDQMasked128, + ssa.OpAMD64VPMOVSXWQMasked256, + ssa.OpAMD64VPMOVZXBQMasked256, + ssa.OpAMD64VPMOVZXWQMasked256, + ssa.OpAMD64VPMOVZXBQMasked512, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked256, ssa.OpAMD64VDIVPSMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index d7bab7b050f..303eec4bc01 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -211,18 +211,48 @@ (CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) (CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) (CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) +(ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...) +(ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...) +(ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...) (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...) (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...) (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...) +(ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...) +(ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...) +(ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...) +(ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) +(ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) +(ConvertToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...) +(ConvertToInt64Int16x8 ...) => (VPMOVSXWQ512 ...) +(ConvertToInt64Int32x4 ...) => (VPMOVSXDQ256 ...) +(ConvertToInt64Int32x8 ...) => (VPMOVSXDQ512 ...) +(ConvertToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...) +(ConvertToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...) +(ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) +(ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) +(ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) (ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) (ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) (ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...) (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...) (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...) +(ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...) (ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) (ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) +(ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) (ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) +(ConvertToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...) +(ConvertToUint64Uint16x8 ...) => (VPMOVZXWQ512 ...) +(ConvertToUint64Uint32x4 ...) => (VPMOVZXDQ256 ...) +(ConvertToUint64Uint32x8 ...) => (VPMOVZXDQ512 ...) +(ConvertToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...) +(ConvertToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...) +(ConvertToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...) +(ConvertToUint64x4Int16x8 ...) => (VPMOVSXWQ256 ...) +(ConvertToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...) +(ConvertToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...) +(ConvertToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...) (CopySignInt8x16 ...) => (VPSIGNB128 ...) (CopySignInt8x32 ...) => (VPSIGNB256 ...) (CopySignInt16x8 ...) => (VPSIGNW128 ...) @@ -1141,10 +1171,20 @@ (VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512 [a] x mask) (VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512 [a] x mask) (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask) +(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask) (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512 x mask) +(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask) +(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask) +(VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) => (VPMOVSXWQMasked512 x mask) +(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask) +(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask) (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask) (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512 x mask) +(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask) (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask) +(VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) => (VPMOVZXWQMasked512 x mask) +(VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) => (VPMOVZXDQMasked512 x mask) +(VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) => (VPMOVZXBQMasked512 x mask) (VMOVDQU32Masked512 (VDIVPS512 x y) mask) => (VDIVPSMasked512 x y mask) (VMOVDQU64Masked512 (VDIVPD512 x y) mask) => (VDIVPDMasked512 x y mask) (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) => (VPMADDWDMasked512 x y mask) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 7782b43cf5b..aa279a9f2a0 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -548,18 +548,78 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXBD512", argLength: 1, reg: w11, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXBDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXBDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXBDMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXBQ128", argLength: 1, reg: v11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXBQ256", argLength: 1, reg: v11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXBQ512", argLength: 1, reg: w11, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXBQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXBQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXBQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXBW128", argLength: 1, reg: v11, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXBW256", argLength: 1, reg: v11, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXBW512", argLength: 1, reg: w11, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXDQ128", argLength: 1, reg: v11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXDQ256", argLength: 1, reg: v11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXDQ512", argLength: 1, reg: w11, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXDQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXDQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXDQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXWD128", argLength: 1, reg: v11, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXWD256", argLength: 1, reg: v11, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXWD512", argLength: 1, reg: w11, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXWQ128", argLength: 1, reg: v11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXWQ256", argLength: 1, reg: v11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXWQ512", argLength: 1, reg: w11, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXBD512", argLength: 1, reg: w11, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXBDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXBDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXBDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXBQ128", argLength: 1, reg: v11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXBQ256", argLength: 1, reg: v11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXBQ512", argLength: 1, reg: w11, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXBQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXBQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXBQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMOVZXBW128", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXBW256", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXBW512", argLength: 1, reg: w11, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMOVZXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXDQ128", argLength: 1, reg: v11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXDQ256", argLength: 1, reg: v11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXDQ512", argLength: 1, reg: w11, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXDQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXDQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXDQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMOVZXWD128", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXWD256", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXWD512", argLength: 1, reg: w11, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMOVZXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXWQ128", argLength: 1, reg: v11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXWQ256", argLength: 1, reg: v11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXWQ512", argLength: 1, reg: w11, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVZXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVZXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVZXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 4844d8fc0ce..4baad2b3127 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -203,18 +203,48 @@ func simdGenericOps() []opData { {name: "CompressUint64x2", argLength: 2, commutative: false}, {name: "CompressUint64x4", argLength: 2, commutative: false}, {name: "CompressUint64x8", argLength: 2, commutative: false}, + {name: "ConvertToInt16Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int8x32", argLength: 1, commutative: false}, + {name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x8", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x16", argLength: 1, commutative: false}, + {name: "ConvertToInt32Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToInt32Int16x8", argLength: 1, commutative: false}, + {name: "ConvertToInt32Int16x16", argLength: 1, commutative: false}, + {name: "ConvertToInt32x4Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToInt32x4Int16x8", argLength: 1, commutative: false}, + {name: "ConvertToInt32x8Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToInt64Int16x8", argLength: 1, commutative: false}, + {name: "ConvertToInt64Int32x4", argLength: 1, commutative: false}, + {name: "ConvertToInt64Int32x8", argLength: 1, commutative: false}, + {name: "ConvertToInt64x2Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToInt64x2Int16x8", argLength: 1, commutative: false}, + {name: "ConvertToInt64x2Int32x4", argLength: 1, commutative: false}, + {name: "ConvertToInt64x4Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToInt64x8Int8x16", argLength: 1, commutative: false}, {name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false}, {name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x8", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x16", argLength: 1, commutative: false}, + {name: "ConvertToUint32Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false}, {name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false}, + {name: "ConvertToUint32x4Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint32x8Uint8x16", argLength: 1, commutative: false}, + {name: "ConvertToUint64Uint16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint64Uint32x4", argLength: 1, commutative: false}, + {name: "ConvertToUint64Uint32x8", argLength: 1, commutative: false}, + {name: "ConvertToUint64x2Uint8x16", argLength: 1, commutative: false}, + {name: "ConvertToUint64x2Uint16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint64x2Uint32x4", argLength: 1, commutative: false}, + {name: "ConvertToUint64x4Int16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint64x4Uint8x16", argLength: 1, commutative: false}, + {name: "ConvertToUint64x4Uint16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint64x8Uint8x16", argLength: 1, commutative: false}, {name: "CopySignInt8x16", argLength: 2, commutative: false}, {name: "CopySignInt8x32", argLength: 2, commutative: false}, {name: "CopySignInt16x8", argLength: 2, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index c5402c6f17f..a45d01b5bb9 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1771,18 +1771,78 @@ const ( OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked512 + OpAMD64VPMOVSXBD128 + OpAMD64VPMOVSXBD256 + OpAMD64VPMOVSXBD512 + OpAMD64VPMOVSXBDMasked128 + OpAMD64VPMOVSXBDMasked256 + OpAMD64VPMOVSXBDMasked512 + OpAMD64VPMOVSXBQ128 + OpAMD64VPMOVSXBQ256 + OpAMD64VPMOVSXBQ512 + OpAMD64VPMOVSXBQMasked128 + OpAMD64VPMOVSXBQMasked256 + OpAMD64VPMOVSXBQMasked512 + OpAMD64VPMOVSXBW128 + OpAMD64VPMOVSXBW256 + OpAMD64VPMOVSXBW512 + OpAMD64VPMOVSXBWMasked128 + OpAMD64VPMOVSXBWMasked256 + OpAMD64VPMOVSXBWMasked512 + OpAMD64VPMOVSXDQ128 + OpAMD64VPMOVSXDQ256 + OpAMD64VPMOVSXDQ512 + OpAMD64VPMOVSXDQMasked128 + OpAMD64VPMOVSXDQMasked256 + OpAMD64VPMOVSXDQMasked512 + OpAMD64VPMOVSXWD128 + OpAMD64VPMOVSXWD256 + OpAMD64VPMOVSXWD512 + OpAMD64VPMOVSXWDMasked128 + OpAMD64VPMOVSXWDMasked256 + OpAMD64VPMOVSXWDMasked512 + OpAMD64VPMOVSXWQ128 + OpAMD64VPMOVSXWQ256 + OpAMD64VPMOVSXWQ512 + OpAMD64VPMOVSXWQMasked128 + OpAMD64VPMOVSXWQMasked256 + OpAMD64VPMOVSXWQMasked512 + OpAMD64VPMOVZXBD128 + OpAMD64VPMOVZXBD256 + OpAMD64VPMOVZXBD512 + OpAMD64VPMOVZXBDMasked128 + OpAMD64VPMOVZXBDMasked256 + OpAMD64VPMOVZXBDMasked512 + OpAMD64VPMOVZXBQ128 + OpAMD64VPMOVZXBQ256 + OpAMD64VPMOVZXBQ512 + OpAMD64VPMOVZXBQMasked128 + OpAMD64VPMOVZXBQMasked256 + OpAMD64VPMOVZXBQMasked512 OpAMD64VPMOVZXBW128 OpAMD64VPMOVZXBW256 OpAMD64VPMOVZXBW512 OpAMD64VPMOVZXBWMasked128 OpAMD64VPMOVZXBWMasked256 OpAMD64VPMOVZXBWMasked512 + OpAMD64VPMOVZXDQ128 + OpAMD64VPMOVZXDQ256 + OpAMD64VPMOVZXDQ512 + OpAMD64VPMOVZXDQMasked128 + OpAMD64VPMOVZXDQMasked256 + OpAMD64VPMOVZXDQMasked512 OpAMD64VPMOVZXWD128 OpAMD64VPMOVZXWD256 OpAMD64VPMOVZXWD512 OpAMD64VPMOVZXWDMasked128 OpAMD64VPMOVZXWDMasked256 OpAMD64VPMOVZXWDMasked512 + OpAMD64VPMOVZXWQ128 + OpAMD64VPMOVZXWQ256 + OpAMD64VPMOVZXWQ512 + OpAMD64VPMOVZXWQMasked128 + OpAMD64VPMOVZXWQMasked256 + OpAMD64VPMOVZXWQMasked512 OpAMD64VPMULDQ128 OpAMD64VPMULDQ256 OpAMD64VPMULHUW128 @@ -4856,18 +4916,48 @@ const ( OpCompressUint64x2 OpCompressUint64x4 OpCompressUint64x8 + OpConvertToInt16Int8x16 + OpConvertToInt16Int8x32 + OpConvertToInt16x8Int8x16 OpConvertToInt32Float32x4 OpConvertToInt32Float32x8 OpConvertToInt32Float32x16 + OpConvertToInt32Int8x16 + OpConvertToInt32Int16x8 + OpConvertToInt32Int16x16 + OpConvertToInt32x4Int8x16 + OpConvertToInt32x4Int16x8 + OpConvertToInt32x8Int8x16 + OpConvertToInt64Int16x8 + OpConvertToInt64Int32x4 + OpConvertToInt64Int32x8 + OpConvertToInt64x2Int8x16 + OpConvertToInt64x2Int16x8 + OpConvertToInt64x2Int32x4 + OpConvertToInt64x4Int8x16 + OpConvertToInt64x8Int8x16 OpConvertToUint16Uint8x16 OpConvertToUint16Uint8x32 OpConvertToUint16x8Uint8x16 OpConvertToUint32Float32x4 OpConvertToUint32Float32x8 OpConvertToUint32Float32x16 + OpConvertToUint32Uint8x16 OpConvertToUint32Uint16x8 OpConvertToUint32Uint16x16 + OpConvertToUint32x4Uint8x16 OpConvertToUint32x4Uint16x8 + OpConvertToUint32x8Uint8x16 + OpConvertToUint64Uint16x8 + OpConvertToUint64Uint32x4 + OpConvertToUint64Uint32x8 + OpConvertToUint64x2Uint8x16 + OpConvertToUint64x2Uint16x8 + OpConvertToUint64x2Uint32x4 + OpConvertToUint64x4Int16x8 + OpConvertToUint64x4Uint8x16 + OpConvertToUint64x4Uint16x8 + OpConvertToUint64x8Uint8x16 OpCopySignInt8x16 OpCopySignInt8x32 OpCopySignInt16x8 @@ -26948,6 +27038,654 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMOVSXBD128", + argLen: 1, + asm: x86.AVPMOVSXBD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBD256", + argLen: 1, + asm: x86.AVPMOVSXBD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBD512", + argLen: 1, + asm: x86.AVPMOVSXBD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSXBDMasked128", + argLen: 2, + asm: x86.AVPMOVSXBD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBDMasked256", + argLen: 2, + asm: x86.AVPMOVSXBD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBDMasked512", + argLen: 2, + asm: x86.AVPMOVSXBD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBQ128", + argLen: 1, + asm: x86.AVPMOVSXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBQ256", + argLen: 1, + asm: x86.AVPMOVSXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBQ512", + argLen: 1, + asm: x86.AVPMOVSXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSXBQMasked128", + argLen: 2, + asm: x86.AVPMOVSXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBQMasked256", + argLen: 2, + asm: x86.AVPMOVSXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBQMasked512", + argLen: 2, + asm: x86.AVPMOVSXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBW128", + argLen: 1, + asm: x86.AVPMOVSXBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBW256", + argLen: 1, + asm: x86.AVPMOVSXBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBW512", + argLen: 1, + asm: x86.AVPMOVSXBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSXBWMasked128", + argLen: 2, + asm: x86.AVPMOVSXBW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBWMasked256", + argLen: 2, + asm: x86.AVPMOVSXBW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXBWMasked512", + argLen: 2, + asm: x86.AVPMOVSXBW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXDQ128", + argLen: 1, + asm: x86.AVPMOVSXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXDQ256", + argLen: 1, + asm: x86.AVPMOVSXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXDQ512", + argLen: 1, + asm: x86.AVPMOVSXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSXDQMasked128", + argLen: 2, + asm: x86.AVPMOVSXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXDQMasked256", + argLen: 2, + asm: x86.AVPMOVSXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXDQMasked512", + argLen: 2, + asm: x86.AVPMOVSXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWD128", + argLen: 1, + asm: x86.AVPMOVSXWD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWD256", + argLen: 1, + asm: x86.AVPMOVSXWD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWD512", + argLen: 1, + asm: x86.AVPMOVSXWD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSXWDMasked128", + argLen: 2, + asm: x86.AVPMOVSXWD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWDMasked256", + argLen: 2, + asm: x86.AVPMOVSXWD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWDMasked512", + argLen: 2, + asm: x86.AVPMOVSXWD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWQ128", + argLen: 1, + asm: x86.AVPMOVSXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWQ256", + argLen: 1, + asm: x86.AVPMOVSXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWQ512", + argLen: 1, + asm: x86.AVPMOVSXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSXWQMasked128", + argLen: 2, + asm: x86.AVPMOVSXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWQMasked256", + argLen: 2, + asm: x86.AVPMOVSXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVSXWQMasked512", + argLen: 2, + asm: x86.AVPMOVSXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBD128", + argLen: 1, + asm: x86.AVPMOVZXBD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBD256", + argLen: 1, + asm: x86.AVPMOVZXBD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBD512", + argLen: 1, + asm: x86.AVPMOVZXBD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVZXBDMasked128", + argLen: 2, + asm: x86.AVPMOVZXBD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBDMasked256", + argLen: 2, + asm: x86.AVPMOVZXBD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBDMasked512", + argLen: 2, + asm: x86.AVPMOVZXBD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBQ128", + argLen: 1, + asm: x86.AVPMOVZXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBQ256", + argLen: 1, + asm: x86.AVPMOVZXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBQ512", + argLen: 1, + asm: x86.AVPMOVZXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVZXBQMasked128", + argLen: 2, + asm: x86.AVPMOVZXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBQMasked256", + argLen: 2, + asm: x86.AVPMOVZXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXBQMasked512", + argLen: 2, + asm: x86.AVPMOVZXBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMOVZXBW128", argLen: 1, @@ -27029,6 +27767,87 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMOVZXDQ128", + argLen: 1, + asm: x86.AVPMOVZXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXDQ256", + argLen: 1, + asm: x86.AVPMOVZXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXDQ512", + argLen: 1, + asm: x86.AVPMOVZXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVZXDQMasked128", + argLen: 2, + asm: x86.AVPMOVZXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXDQMasked256", + argLen: 2, + asm: x86.AVPMOVZXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXDQMasked512", + argLen: 2, + asm: x86.AVPMOVZXDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMOVZXWD128", argLen: 1, @@ -27110,6 +27929,87 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMOVZXWQ128", + argLen: 1, + asm: x86.AVPMOVZXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWQ256", + argLen: 1, + asm: x86.AVPMOVZXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWQ512", + argLen: 1, + asm: x86.AVPMOVZXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVZXWQMasked128", + argLen: 2, + asm: x86.AVPMOVZXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWQMasked256", + argLen: 2, + asm: x86.AVPMOVZXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVZXWQMasked512", + argLen: 2, + asm: x86.AVPMOVZXWQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMULDQ128", argLen: 2, @@ -64323,6 +65223,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ConvertToInt16Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16Int8x32", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16x8Int8x16", + argLen: 1, + generic: true, + }, { name: "ConvertToInt32Float32x4", argLen: 1, @@ -64338,6 +65253,76 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToInt32Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32Int16x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32x4Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32x4Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32x8Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64Int32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64Int32x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64x2Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64x2Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64x2Int32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64x4Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt64x8Int8x16", + argLen: 1, + generic: true, + }, { name: "ConvertToUint16Uint8x16", argLen: 1, @@ -64368,6 +65353,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint32Uint8x16", + argLen: 1, + generic: true, + }, { name: "ConvertToUint32Uint16x8", argLen: 1, @@ -64378,11 +65368,71 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint32x4Uint8x16", + argLen: 1, + generic: true, + }, { name: "ConvertToUint32x4Uint16x8", argLen: 1, generic: true, }, + { + name: "ConvertToUint32x8Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64Uint32x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x2Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x2Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x2Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x4Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x4Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x4Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint64x8Uint8x16", + argLen: 1, + generic: true, + }, { name: "CopySignInt8x16", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 70c773bc1cb..2e17c845086 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1370,6 +1370,15 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpConstBool(v) case OpConstNil: return rewriteValueAMD64_OpConstNil(v) + case OpConvertToInt16Int8x16: + v.Op = OpAMD64VPMOVSXBW256 + return true + case OpConvertToInt16Int8x32: + v.Op = OpAMD64VPMOVSXBW512 + return true + case OpConvertToInt16x8Int8x16: + v.Op = OpAMD64VPMOVSXBW128 + return true case OpConvertToInt32Float32x16: v.Op = OpAMD64VCVTTPS2DQ512 return true @@ -1379,6 +1388,48 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToInt32Float32x8: v.Op = OpAMD64VCVTTPS2DQ256 return true + case OpConvertToInt32Int16x16: + v.Op = OpAMD64VPMOVSXWD512 + return true + case OpConvertToInt32Int16x8: + v.Op = OpAMD64VPMOVSXWD256 + return true + case OpConvertToInt32Int8x16: + v.Op = OpAMD64VPMOVSXBD512 + return true + case OpConvertToInt32x4Int16x8: + v.Op = OpAMD64VPMOVSXWD128 + return true + case OpConvertToInt32x4Int8x16: + v.Op = OpAMD64VPMOVSXBD128 + return true + case OpConvertToInt32x8Int8x16: + v.Op = OpAMD64VPMOVSXBD256 + return true + case OpConvertToInt64Int16x8: + v.Op = OpAMD64VPMOVSXWQ512 + return true + case OpConvertToInt64Int32x4: + v.Op = OpAMD64VPMOVSXDQ256 + return true + case OpConvertToInt64Int32x8: + v.Op = OpAMD64VPMOVSXDQ512 + return true + case OpConvertToInt64x2Int16x8: + v.Op = OpAMD64VPMOVSXWQ128 + return true + case OpConvertToInt64x2Int32x4: + v.Op = OpAMD64VPMOVSXDQ128 + return true + case OpConvertToInt64x2Int8x16: + v.Op = OpAMD64VPMOVSXBQ128 + return true + case OpConvertToInt64x4Int8x16: + v.Op = OpAMD64VPMOVSXBQ256 + return true + case OpConvertToInt64x8Int8x16: + v.Op = OpAMD64VPMOVSXBQ512 + return true case OpConvertToUint16Uint8x16: v.Op = OpAMD64VPMOVZXBW256 return true @@ -1403,9 +1454,48 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToUint32Uint16x8: v.Op = OpAMD64VPMOVZXWD256 return true + case OpConvertToUint32Uint8x16: + v.Op = OpAMD64VPMOVZXBD512 + return true case OpConvertToUint32x4Uint16x8: v.Op = OpAMD64VPMOVZXWD128 return true + case OpConvertToUint32x4Uint8x16: + v.Op = OpAMD64VPMOVZXBD128 + return true + case OpConvertToUint32x8Uint8x16: + v.Op = OpAMD64VPMOVZXBD256 + return true + case OpConvertToUint64Uint16x8: + v.Op = OpAMD64VPMOVZXWQ512 + return true + case OpConvertToUint64Uint32x4: + v.Op = OpAMD64VPMOVZXDQ256 + return true + case OpConvertToUint64Uint32x8: + v.Op = OpAMD64VPMOVZXDQ512 + return true + case OpConvertToUint64x2Uint16x8: + v.Op = OpAMD64VPMOVZXWQ128 + return true + case OpConvertToUint64x2Uint32x4: + v.Op = OpAMD64VPMOVZXDQ128 + return true + case OpConvertToUint64x2Uint8x16: + v.Op = OpAMD64VPMOVZXBQ128 + return true + case OpConvertToUint64x4Int16x8: + v.Op = OpAMD64VPMOVSXWQ256 + return true + case OpConvertToUint64x4Uint16x8: + v.Op = OpAMD64VPMOVZXWQ256 + return true + case OpConvertToUint64x4Uint8x16: + v.Op = OpAMD64VPMOVZXBQ256 + return true + case OpConvertToUint64x8Uint8x16: + v.Op = OpAMD64VPMOVZXBQ512 + return true case OpCopySignInt16x16: v.Op = OpAMD64VPSIGNW256 return true @@ -26103,6 +26193,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) + // result: (VPMOVSXWDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXWD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) + // result: (VPMOVSXWQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXWQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) // result: (VPMOVZXWDMasked512 x mask) for { @@ -26115,6 +26229,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) + // result: (VPMOVZXWQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXWQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) // result: (VPMADDWDMasked512 x y mask) for { @@ -26677,6 +26803,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) + // result: (VPMOVSXDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) // result: (VCVTPS2UDQMasked512 x mask) for { @@ -26689,6 +26827,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) + // result: (VPMOVZXDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask) // result: (VDIVPSMasked512 x y mask) for { @@ -28007,6 +28157,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) + // result: (VPMOVSXBWMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBW512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) + // result: (VPMOVSXBDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) + // result: (VPMOVSXBQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) // result: (VPMOVZXBWMasked512 x mask) for { @@ -28019,6 +28205,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) + // result: (VPMOVZXBDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) + // result: (VPMOVZXBQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) // result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask) for { diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 676cfa9032f..731b9afecb1 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -223,18 +223,48 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x32.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt32x8", opLen1(ssa.OpConvertToInt32x8Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int16x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt64x4", opLen1(ssa.OpConvertToInt64x4Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ConvertToInt64x8", opLen1(ssa.OpConvertToInt64x8Int8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint32x8", opLen1(ssa.OpConvertToUint32x8Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint16x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ConvertToUint64x8", opLen1(ssa.OpConvertToUint64x8Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64) diff --git a/src/simd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/_gen/simdgen/ops/Converts/categories.yaml index c2141b56845..a2508906c38 100644 --- a/src/simd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/categories.yaml @@ -1,20 +1,57 @@ !sum # Non-truncating conversions -# Could be widening int<->int or uint<->uint conversions or float<->int|uint conversions. +# int<->int or uint<->uint widening or float<->int|uint conversions. +- go: ConvertToInt16 + commutative: false + documentation: !string |- + // NAME converts element values to int16. - go: ConvertToInt32 commutative: false documentation: !string |- // NAME converts element values to int32. -- go: ConvertToUint32 +- go: ConvertToInt64 commutative: false documentation: !string |- - // NAME converts element values to uint32. + // NAME converts element values to int64. - go: ConvertToUint16 commutative: false documentation: !string |- // NAME converts element values to uint16. +- go: ConvertToUint32 + commutative: false + documentation: !string |- + // NAME converts element values to uint32. +- go: ConvertToUint64 + commutative: false + documentation: !string |- + // NAME converts element values to uint64. -# Truncating conversions, int<->int or uint<->uint. +# Truncating conversions +# int<->int or uint<->uint widening conversions. +- go: ConvertToInt16x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to int16. +- go: ConvertToInt32x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to int32. +- go: ConvertToInt32x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to int32. +- go: ConvertToInt64x2 + commutative: false + documentation: !string |- + // NAME converts 2 lowest vector element values to int64. +- go: ConvertToInt64x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to int64. +- go: ConvertToInt64x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to int64. - go: ConvertToUint16x8 commutative: false documentation: !string |- @@ -23,3 +60,19 @@ commutative: false documentation: !string |- // NAME converts 4 lowest vector element values to uint32. +- go: ConvertToUint32x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to uint32. +- go: ConvertToUint64x2 + commutative: false + documentation: !string |- + // NAME converts 2 lowest vector element values to uint64. +- go: ConvertToUint64x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to uint64. +- go: ConvertToUint64x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to uint64. \ No newline at end of file diff --git a/src/simd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/_gen/simdgen/ops/Converts/go.yaml index 56dea4ae050..453050c323c 100644 --- a/src/simd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/go.yaml @@ -1,6 +1,6 @@ !sum # Float <-> Int conversions -# TODO: this right now only has Float32 -> Int32|Uint32, more to add. +# float32 -> int32 - go: ConvertToInt32 asm: "VCVTTPS2DQ" in: @@ -12,6 +12,7 @@ go: $u base: int elemBits: 32 +# float32 -> uint32 - go: ConvertToUint32 asm: "VCVTPS2UDQ" in: @@ -22,8 +23,8 @@ base: uint elemBits: 32 -# Uint -> Uint widening conversions. -# TODO: this right now only has uint8 -> uint16 and uint16->uint32. +# Widening integer conversions. +# uint8 -> uint16 - go: ConvertToUint16 asm: "VPMOVZXBW" in: @@ -36,7 +37,6 @@ base: uint elemBits: 16 bits: 256 - - go: ConvertToUint16 asm: "VPMOVZXBW" in: @@ -49,7 +49,32 @@ base: uint elemBits: 16 bits: 512 - +# int8 -> int16 +- go: ConvertToInt16 + asm: "VPMOVSXBW" + in: + - &i8x16 + base: int + elemBits: 8 + bits: 128 + out: + - &i16x16 + base: int + elemBits: 16 + bits: 256 +- go: ConvertToInt16 + asm: "VPMOVSXBW" + in: + - &i8x32 + base: int + elemBits: 8 + bits: 256 + out: + - &i16x32 + base: int + elemBits: 16 + bits: 512 +# uint16->uint32 - go: ConvertToUint32 asm: "VPMOVZXWD" in: @@ -62,7 +87,6 @@ base: uint elemBits: 32 bits: 256 - - go: ConvertToUint32 asm: "VPMOVZXWD" in: @@ -72,21 +96,237 @@ base: uint elemBits: 32 bits: 512 +# int16->int32 +- go: ConvertToInt32 + asm: "VPMOVSXWD" + in: + - &i16x8 + base: int + elemBits: 16 + bits: 128 + out: + - &i32x8 + base: int + elemBits: 32 + bits: 256 +- go: ConvertToInt32 + asm: "VPMOVSXWD" + in: + - *i16x16 + out: + - &i32x16 + base: int + elemBits: 32 + bits: 512 +# uint32 -> uint64 +- go: ConvertToUint64 + asm: "VPMOVZXDQ" + in: + - &u32x4 + base: uint + elemBits: 32 + bits: 128 + out: + - &u64x4 + base: uint + elemBits: 64 + bits: 256 +- go: ConvertToUint64 + asm: "VPMOVZXDQ" + in: + - *u32x8 + out: + - &u64x8 + base: uint + elemBits: 64 + bits: 512 +# int32 -> int64 +- go: ConvertToInt64 + asm: "VPMOVSXDQ" + in: + - &i32x4 + base: int + elemBits: 32 + bits: 128 + out: + - &i64x4 + base: int + elemBits: 64 + bits: 256 +- go: ConvertToInt64 + asm: "VPMOVSXDQ" + in: + - *i32x8 + out: + - &i64x8 + base: int + elemBits: 64 + bits: 512 +# uint16 -> uint64 +- go: ConvertToUint64 + asm: "VPMOVZXWQ" + in: + - *u16x8 + out: + - *u64x8 +# int16 -> int64 +- go: ConvertToInt64 + asm: "VPMOVSXWQ" + in: + - *i16x8 + out: + - *i64x8 +# uint8 -> uint32 +- go: ConvertToUint32 + asm: "VPMOVZXBD" + in: + - *u8x16 + out: + - *u32x16 +# int8 -> int32 +- go: ConvertToInt32 + asm: "VPMOVSXBD" + in: + - *i8x16 + out: + - *i32x16 # Truncating conversions. -# TODO: this right now only has uint8->uint16 and uint16->uint32. +# uint8->uint16 - go: ConvertToUint16x8 asm: "VPMOVZXBW" in: - *u8x16 out: - *u16x8 +# int8->int16 +- go: ConvertToInt16x8 + asm: "VPMOVSXBW" + in: + - *i8x16 + out: + - *i16x8 +# uint16->uint32 - go: ConvertToUint32x4 asm: "VPMOVZXWD" in: - *u16x8 out: - - &u32x4 + - *u32x4 +# int16->int32 +- go: ConvertToInt32x4 + asm: "VPMOVSXWD" + in: + - *i16x8 + out: + - *i32x4 +# uint32 -> uint64 +- go: ConvertToUint64x2 + asm: "VPMOVZXDQ" + in: + - *u32x4 + out: + - &u64x2 base: uint - elemBits: 32 - bits: 128 \ No newline at end of file + elemBits: 64 + bits: 128 +# int32 -> int64 +- go: ConvertToInt64x2 + asm: "VPMOVSXDQ" + in: + - *i32x4 + out: + - &i64x2 + base: int + elemBits: 64 + bits: 128 +# uint16 -> uint64 +- go: ConvertToUint64x2 + asm: "VPMOVZXWQ" + in: + - *u16x8 + out: + - *u64x2 +- go: ConvertToUint64x4 + asm: "VPMOVZXWQ" + in: + - *u16x8 + out: + - *u64x4 +# int16 -> int64 +- go: ConvertToInt64x2 + asm: "VPMOVSXWQ" + in: + - *i16x8 + out: + - *i64x2 +- go: ConvertToUint64x4 + asm: "VPMOVSXWQ" + in: + - *i16x8 + out: + - *i64x4 +# uint8 -> uint32 +- go: ConvertToUint32x4 + asm: "VPMOVZXBD" + in: + - *u8x16 + out: + - *u32x4 +- go: ConvertToUint32x8 + asm: "VPMOVZXBD" + in: + - *u8x16 + out: + - *u32x8 +# int8 -> int32 +- go: ConvertToInt32x4 + asm: "VPMOVSXBD" + in: + - *i8x16 + out: + - *i32x4 +- go: ConvertToInt32x8 + asm: "VPMOVSXBD" + in: + - *i8x16 + out: + - *i32x8 +# uint8 -> uint64 +- go: ConvertToUint64x2 + asm: "VPMOVZXBQ" + in: + - *u8x16 + out: + - *u64x2 +- go: ConvertToUint64x4 + asm: "VPMOVZXBQ" + in: + - *u8x16 + out: + - *u64x4 +- go: ConvertToUint64x8 + asm: "VPMOVZXBQ" + in: + - *u8x16 + out: + - *u64x8 +# int8 -> int64 +- go: ConvertToInt64x2 + asm: "VPMOVSXBQ" + in: + - *i8x16 + out: + - *i64x2 +- go: ConvertToInt64x4 + asm: "VPMOVSXBQ" + in: + - *i8x16 + out: + - *i64x4 +- go: ConvertToInt64x8 + asm: "VPMOVSXBQ" + in: + - *i8x16 + out: + - *i64x8 \ No newline at end of file diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 4cfebb3a772..418ae229272 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -1195,6 +1195,25 @@ func (x Uint64x4) Compress(mask Mask64x4) Uint64x4 // Asm: VPCOMPRESSQ, CPU Feature: AVX512 func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 +/* ConvertToInt16 */ + +// ConvertToInt16 converts element values to int16. +// +// Asm: VPMOVSXBW, CPU Feature: AVX2 +func (x Int8x16) ConvertToInt16() Int16x16 + +// ConvertToInt16 converts element values to int16. +// +// Asm: VPMOVSXBW, CPU Feature: AVX512 +func (x Int8x32) ConvertToInt16() Int16x32 + +/* ConvertToInt16x8 */ + +// ConvertToInt16x8 converts 8 lowest vector element values to int16. +// +// Asm: VPMOVSXBW, CPU Feature: AVX +func (x Int8x16) ConvertToInt16x8() Int16x8 + /* ConvertToInt32 */ // ConvertToInt32 converts element values to int32. @@ -1212,6 +1231,88 @@ func (x Float32x8) ConvertToInt32() Int32x8 // Asm: VCVTTPS2DQ, CPU Feature: AVX512 func (x Float32x16) ConvertToInt32() Int32x16 +// ConvertToInt32 converts element values to int32. +// +// Asm: VPMOVSXBD, CPU Feature: AVX512 +func (x Int8x16) ConvertToInt32() Int32x16 + +// ConvertToInt32 converts element values to int32. +// +// Asm: VPMOVSXWD, CPU Feature: AVX2 +func (x Int16x8) ConvertToInt32() Int32x8 + +// ConvertToInt32 converts element values to int32. +// +// Asm: VPMOVSXWD, CPU Feature: AVX512 +func (x Int16x16) ConvertToInt32() Int32x16 + +/* ConvertToInt32x4 */ + +// ConvertToInt32x4 converts 4 lowest vector element values to int32. +// +// Asm: VPMOVSXBD, CPU Feature: AVX +func (x Int8x16) ConvertToInt32x4() Int32x4 + +// ConvertToInt32x4 converts 4 lowest vector element values to int32. +// +// Asm: VPMOVSXWD, CPU Feature: AVX +func (x Int16x8) ConvertToInt32x4() Int32x4 + +/* ConvertToInt32x8 */ + +// ConvertToInt32x8 converts 8 lowest vector element values to int32. +// +// Asm: VPMOVSXBD, CPU Feature: AVX2 +func (x Int8x16) ConvertToInt32x8() Int32x8 + +/* ConvertToInt64 */ + +// ConvertToInt64 converts element values to int64. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX512 +func (x Int16x8) ConvertToInt64() Int64x8 + +// ConvertToInt64 converts element values to int64. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX2 +func (x Int32x4) ConvertToInt64() Int64x4 + +// ConvertToInt64 converts element values to int64. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX512 +func (x Int32x8) ConvertToInt64() Int64x8 + +/* ConvertToInt64x2 */ + +// ConvertToInt64x2 converts 2 lowest vector element values to int64. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX +func (x Int8x16) ConvertToInt64x2() Int64x2 + +// ConvertToInt64x2 converts 2 lowest vector element values to int64. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX +func (x Int16x8) ConvertToInt64x2() Int64x2 + +// ConvertToInt64x2 converts 2 lowest vector element values to int64. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX +func (x Int32x4) ConvertToInt64x2() Int64x2 + +/* ConvertToInt64x4 */ + +// ConvertToInt64x4 converts 4 lowest vector element values to int64. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX2 +func (x Int8x16) ConvertToInt64x4() Int64x4 + +/* ConvertToInt64x8 */ + +// ConvertToInt64x8 converts 8 lowest vector element values to int64. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX512 +func (x Int8x16) ConvertToInt64x8() Int64x8 + /* ConvertToUint16 */ // ConvertToUint16 converts element values to uint16. @@ -1248,6 +1349,11 @@ func (x Float32x8) ConvertToUint32() Uint32x8 // Asm: VCVTPS2UDQ, CPU Feature: AVX512 func (x Float32x16) ConvertToUint32() Uint32x16 +// ConvertToUint32 converts element values to uint32. +// +// Asm: VPMOVZXBD, CPU Feature: AVX512 +func (x Uint8x16) ConvertToUint32() Uint32x16 + // ConvertToUint32 converts element values to uint32. // // Asm: VPMOVZXWD, CPU Feature: AVX2 @@ -1260,11 +1366,81 @@ func (x Uint16x16) ConvertToUint32() Uint32x16 /* ConvertToUint32x4 */ +// ConvertToUint32x4 converts 4 lowest vector element values to uint32. +// +// Asm: VPMOVZXBD, CPU Feature: AVX +func (x Uint8x16) ConvertToUint32x4() Uint32x4 + // ConvertToUint32x4 converts 4 lowest vector element values to uint32. // // Asm: VPMOVZXWD, CPU Feature: AVX func (x Uint16x8) ConvertToUint32x4() Uint32x4 +/* ConvertToUint32x8 */ + +// ConvertToUint32x8 converts 8 lowest vector element values to uint32. +// +// Asm: VPMOVZXBD, CPU Feature: AVX2 +func (x Uint8x16) ConvertToUint32x8() Uint32x8 + +/* ConvertToUint64 */ + +// ConvertToUint64 converts element values to uint64. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX512 +func (x Uint16x8) ConvertToUint64() Uint64x8 + +// ConvertToUint64 converts element values to uint64. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX2 +func (x Uint32x4) ConvertToUint64() Uint64x4 + +// ConvertToUint64 converts element values to uint64. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX512 +func (x Uint32x8) ConvertToUint64() Uint64x8 + +/* ConvertToUint64x2 */ + +// ConvertToUint64x2 converts 2 lowest vector element values to uint64. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX +func (x Uint8x16) ConvertToUint64x2() Uint64x2 + +// ConvertToUint64x2 converts 2 lowest vector element values to uint64. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX +func (x Uint16x8) ConvertToUint64x2() Uint64x2 + +// ConvertToUint64x2 converts 2 lowest vector element values to uint64. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX +func (x Uint32x4) ConvertToUint64x2() Uint64x2 + +/* ConvertToUint64x4 */ + +// ConvertToUint64x4 converts 4 lowest vector element values to uint64. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX2 +func (x Int16x8) ConvertToUint64x4() Int64x4 + +// ConvertToUint64x4 converts 4 lowest vector element values to uint64. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX2 +func (x Uint8x16) ConvertToUint64x4() Uint64x4 + +// ConvertToUint64x4 converts 4 lowest vector element values to uint64. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX2 +func (x Uint16x8) ConvertToUint64x4() Uint64x4 + +/* ConvertToUint64x8 */ + +// ConvertToUint64x8 converts 8 lowest vector element values to uint64. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX512 +func (x Uint8x16) ConvertToUint64x8() Uint64x8 + /* CopySign */ // CopySign returns the product of the first operand with -1, 0, or 1,