mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd, cmd/compile: add widening unsigned converts 8->16->32
Change-Id: If0bde7154bd622573375eba5539fd642b8ef9d2f Reviewed-on: https://go-review.googlesource.com/c/go/+/696555 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
0f660d675f
commit
4fce49b86c
10 changed files with 404 additions and 5 deletions
|
|
@ -44,9 +44,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VCVTTPS2DQ128,
|
ssa.OpAMD64VCVTTPS2DQ128,
|
||||||
ssa.OpAMD64VCVTTPS2DQ256,
|
ssa.OpAMD64VCVTTPS2DQ256,
|
||||||
ssa.OpAMD64VCVTTPS2DQ512,
|
ssa.OpAMD64VCVTTPS2DQ512,
|
||||||
|
ssa.OpAMD64VPMOVZXBW256,
|
||||||
|
ssa.OpAMD64VPMOVZXBW512,
|
||||||
|
ssa.OpAMD64VPMOVZXBW128,
|
||||||
ssa.OpAMD64VCVTPS2UDQ128,
|
ssa.OpAMD64VCVTPS2UDQ128,
|
||||||
ssa.OpAMD64VCVTPS2UDQ256,
|
ssa.OpAMD64VCVTPS2UDQ256,
|
||||||
ssa.OpAMD64VCVTPS2UDQ512,
|
ssa.OpAMD64VCVTPS2UDQ512,
|
||||||
|
ssa.OpAMD64VPMOVZXWD256,
|
||||||
|
ssa.OpAMD64VPMOVZXWD512,
|
||||||
|
ssa.OpAMD64VPMOVZXWD128,
|
||||||
ssa.OpAMD64VPOPCNTB128,
|
ssa.OpAMD64VPOPCNTB128,
|
||||||
ssa.OpAMD64VPOPCNTB256,
|
ssa.OpAMD64VPOPCNTB256,
|
||||||
ssa.OpAMD64VPOPCNTB512,
|
ssa.OpAMD64VPOPCNTB512,
|
||||||
|
|
@ -679,9 +685,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked128,
|
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked256,
|
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked512,
|
ssa.OpAMD64VCVTTPS2DQMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXBWMasked256,
|
||||||
|
ssa.OpAMD64VPMOVZXBWMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXBWMasked128,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked512,
|
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXWDMasked256,
|
||||||
|
ssa.OpAMD64VPMOVZXWDMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXWDMasked128,
|
||||||
ssa.OpAMD64VEXPANDPSMasked128,
|
ssa.OpAMD64VEXPANDPSMasked128,
|
||||||
ssa.OpAMD64VEXPANDPSMasked256,
|
ssa.OpAMD64VEXPANDPSMasked256,
|
||||||
ssa.OpAMD64VEXPANDPSMasked512,
|
ssa.OpAMD64VEXPANDPSMasked512,
|
||||||
|
|
@ -1289,9 +1301,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked128,
|
ssa.OpAMD64VCVTTPS2DQMasked128,
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked256,
|
ssa.OpAMD64VCVTTPS2DQMasked256,
|
||||||
ssa.OpAMD64VCVTTPS2DQMasked512,
|
ssa.OpAMD64VCVTTPS2DQMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXBWMasked256,
|
||||||
|
ssa.OpAMD64VPMOVZXBWMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXBWMasked128,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked512,
|
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXWDMasked256,
|
||||||
|
ssa.OpAMD64VPMOVZXWDMasked512,
|
||||||
|
ssa.OpAMD64VPMOVZXWDMasked128,
|
||||||
ssa.OpAMD64VDIVPSMasked128,
|
ssa.OpAMD64VDIVPSMasked128,
|
||||||
ssa.OpAMD64VDIVPSMasked256,
|
ssa.OpAMD64VDIVPSMasked256,
|
||||||
ssa.OpAMD64VDIVPSMasked512,
|
ssa.OpAMD64VDIVPSMasked512,
|
||||||
|
|
|
||||||
|
|
@ -214,9 +214,15 @@
|
||||||
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
|
(ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
|
||||||
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
|
(ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
|
||||||
(ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
|
(ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
|
||||||
|
(ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...)
|
||||||
|
(ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...)
|
||||||
|
(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
|
||||||
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
|
(ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
|
||||||
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
|
(ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
|
||||||
(ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
|
(ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
|
||||||
|
(ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...)
|
||||||
|
(ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...)
|
||||||
|
(ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...)
|
||||||
(CopySignInt8x16 ...) => (VPSIGNB128 ...)
|
(CopySignInt8x16 ...) => (VPSIGNB128 ...)
|
||||||
(CopySignInt8x32 ...) => (VPSIGNB256 ...)
|
(CopySignInt8x32 ...) => (VPSIGNB256 ...)
|
||||||
(CopySignInt16x8 ...) => (VPSIGNW128 ...)
|
(CopySignInt16x8 ...) => (VPSIGNW128 ...)
|
||||||
|
|
|
||||||
|
|
@ -542,6 +542,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXBW128", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXBW256", argLength: 1, reg: v11, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXBW512", argLength: 1, reg: w11, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXBWMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXBWMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXBWMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXWD128", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXWD256", argLength: 1, reg: v11, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXWD512", argLength: 1, reg: w11, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXWDMasked128", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXWDMasked256", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPMOVZXWDMasked512", argLength: 2, reg: wkw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
|
||||||
|
|
@ -206,9 +206,15 @@ func simdGenericOps() []opData {
|
||||||
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
|
{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
|
{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
|
{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
|
{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
|
{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
|
{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false},
|
||||||
|
{name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false},
|
||||||
{name: "CopySignInt8x16", argLength: 2, commutative: false},
|
{name: "CopySignInt8x16", argLength: 2, commutative: false},
|
||||||
{name: "CopySignInt8x32", argLength: 2, commutative: false},
|
{name: "CopySignInt8x32", argLength: 2, commutative: false},
|
||||||
{name: "CopySignInt16x8", argLength: 2, commutative: false},
|
{name: "CopySignInt16x8", argLength: 2, commutative: false},
|
||||||
|
|
|
||||||
|
|
@ -1765,6 +1765,18 @@ const (
|
||||||
OpAMD64VPMINUWMasked128
|
OpAMD64VPMINUWMasked128
|
||||||
OpAMD64VPMINUWMasked256
|
OpAMD64VPMINUWMasked256
|
||||||
OpAMD64VPMINUWMasked512
|
OpAMD64VPMINUWMasked512
|
||||||
|
OpAMD64VPMOVZXBW128
|
||||||
|
OpAMD64VPMOVZXBW256
|
||||||
|
OpAMD64VPMOVZXBW512
|
||||||
|
OpAMD64VPMOVZXBWMasked128
|
||||||
|
OpAMD64VPMOVZXBWMasked256
|
||||||
|
OpAMD64VPMOVZXBWMasked512
|
||||||
|
OpAMD64VPMOVZXWD128
|
||||||
|
OpAMD64VPMOVZXWD256
|
||||||
|
OpAMD64VPMOVZXWD512
|
||||||
|
OpAMD64VPMOVZXWDMasked128
|
||||||
|
OpAMD64VPMOVZXWDMasked256
|
||||||
|
OpAMD64VPMOVZXWDMasked512
|
||||||
OpAMD64VPMULDQ128
|
OpAMD64VPMULDQ128
|
||||||
OpAMD64VPMULDQ256
|
OpAMD64VPMULDQ256
|
||||||
OpAMD64VPMULHUW128
|
OpAMD64VPMULHUW128
|
||||||
|
|
@ -4838,9 +4850,15 @@ const (
|
||||||
OpConvertToInt32Float32x4
|
OpConvertToInt32Float32x4
|
||||||
OpConvertToInt32Float32x8
|
OpConvertToInt32Float32x8
|
||||||
OpConvertToInt32Float32x16
|
OpConvertToInt32Float32x16
|
||||||
|
OpConvertToUint16Uint8x16
|
||||||
|
OpConvertToUint16Uint8x32
|
||||||
|
OpConvertToUint16x8Uint8x16
|
||||||
OpConvertToUint32Float32x4
|
OpConvertToUint32Float32x4
|
||||||
OpConvertToUint32Float32x8
|
OpConvertToUint32Float32x8
|
||||||
OpConvertToUint32Float32x16
|
OpConvertToUint32Float32x16
|
||||||
|
OpConvertToUint32Uint16x8
|
||||||
|
OpConvertToUint32Uint16x16
|
||||||
|
OpConvertToUint32x4Uint16x8
|
||||||
OpCopySignInt8x16
|
OpCopySignInt8x16
|
||||||
OpCopySignInt8x32
|
OpCopySignInt8x32
|
||||||
OpCopySignInt16x8
|
OpCopySignInt16x8
|
||||||
|
|
@ -26824,6 +26842,168 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXBW128",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPMOVZXBW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXBW256",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPMOVZXBW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXBW512",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPMOVZXBW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXBWMasked128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMOVZXBW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXBWMasked256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMOVZXBW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXBWMasked512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMOVZXBW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXWD128",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPMOVZXWD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXWD256",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPMOVZXWD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXWD512",
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPMOVZXWD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXWDMasked128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMOVZXWD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXWDMasked256",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMOVZXWD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMOVZXWDMasked512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMOVZXWD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPMULDQ128",
|
name: "VPMULDQ128",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -64008,6 +64188,21 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint16Uint8x16",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint16Uint8x32",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint16x8Uint8x16",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "ConvertToUint32Float32x4",
|
name: "ConvertToUint32Float32x4",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -64023,6 +64218,21 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32Uint16x8",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32Uint16x16",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConvertToUint32x4Uint16x8",
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "CopySignInt8x16",
|
name: "CopySignInt8x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
|
||||||
|
|
@ -1333,6 +1333,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpConvertToInt32Float32x8:
|
case OpConvertToInt32Float32x8:
|
||||||
v.Op = OpAMD64VCVTTPS2DQ256
|
v.Op = OpAMD64VCVTTPS2DQ256
|
||||||
return true
|
return true
|
||||||
|
case OpConvertToUint16Uint8x16:
|
||||||
|
v.Op = OpAMD64VPMOVZXBW256
|
||||||
|
return true
|
||||||
|
case OpConvertToUint16Uint8x32:
|
||||||
|
v.Op = OpAMD64VPMOVZXBW512
|
||||||
|
return true
|
||||||
|
case OpConvertToUint16x8Uint8x16:
|
||||||
|
v.Op = OpAMD64VPMOVZXBW128
|
||||||
|
return true
|
||||||
case OpConvertToUint32Float32x16:
|
case OpConvertToUint32Float32x16:
|
||||||
v.Op = OpAMD64VCVTPS2UDQ512
|
v.Op = OpAMD64VCVTPS2UDQ512
|
||||||
return true
|
return true
|
||||||
|
|
@ -1342,6 +1351,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpConvertToUint32Float32x8:
|
case OpConvertToUint32Float32x8:
|
||||||
v.Op = OpAMD64VCVTPS2UDQ256
|
v.Op = OpAMD64VCVTPS2UDQ256
|
||||||
return true
|
return true
|
||||||
|
case OpConvertToUint32Uint16x16:
|
||||||
|
v.Op = OpAMD64VPMOVZXWD512
|
||||||
|
return true
|
||||||
|
case OpConvertToUint32Uint16x8:
|
||||||
|
v.Op = OpAMD64VPMOVZXWD256
|
||||||
|
return true
|
||||||
|
case OpConvertToUint32x4Uint16x8:
|
||||||
|
v.Op = OpAMD64VPMOVZXWD128
|
||||||
|
return true
|
||||||
case OpCopySignInt16x16:
|
case OpCopySignInt16x16:
|
||||||
v.Op = OpAMD64VPSIGNW256
|
v.Op = OpAMD64VPSIGNW256
|
||||||
return true
|
return true
|
||||||
|
|
|
||||||
|
|
@ -226,9 +226,15 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,24 @@
|
||||||
- go: ConvertToInt32
|
- go: ConvertToInt32
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// ConvertToInt32 converts element values to int32.
|
// NAME converts element values to int32.
|
||||||
|
|
||||||
- go: ConvertToUint32
|
- go: ConvertToUint32
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// ConvertToUint32Masked converts element values to uint32.
|
// NAME converts element values to uint32.
|
||||||
|
|
||||||
|
- go: ConvertToUint16
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME converts element values to uint16.
|
||||||
|
|
||||||
|
- go: ConvertToUint16x8
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME converts 8 lowest vector element values to uint16.
|
||||||
|
|
||||||
|
- go: ConvertToUint32x4
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME converts 4 lowest vector element values to uint32.
|
||||||
|
|
|
||||||
|
|
@ -19,3 +19,75 @@
|
||||||
go: $u
|
go: $u
|
||||||
base: uint
|
base: uint
|
||||||
elemBits: 32
|
elemBits: 32
|
||||||
|
|
||||||
|
- go: ConvertToUint16x8
|
||||||
|
asm: "VPMOVZXBW"
|
||||||
|
in:
|
||||||
|
- &u8x16
|
||||||
|
base: uint
|
||||||
|
elemBits: 8
|
||||||
|
bits: 128
|
||||||
|
out:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 16
|
||||||
|
bits: 128
|
||||||
|
|
||||||
|
- go: ConvertToUint16
|
||||||
|
asm: "VPMOVZXBW"
|
||||||
|
in:
|
||||||
|
- *u8x16
|
||||||
|
out:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 16
|
||||||
|
bits: 256
|
||||||
|
|
||||||
|
- go: ConvertToUint16
|
||||||
|
asm: "VPMOVZXBW"
|
||||||
|
in:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 8
|
||||||
|
bits: 256
|
||||||
|
out:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 16
|
||||||
|
bits: 512
|
||||||
|
|
||||||
|
- go: ConvertToUint32x4
|
||||||
|
asm: "VPMOVZXWD"
|
||||||
|
in:
|
||||||
|
- &u16x8
|
||||||
|
base: uint
|
||||||
|
elemBits: 16
|
||||||
|
bits: 128
|
||||||
|
out:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 32
|
||||||
|
bits: 128
|
||||||
|
|
||||||
|
- go: ConvertToUint32
|
||||||
|
asm: "VPMOVZXWD"
|
||||||
|
in:
|
||||||
|
- *u16x8
|
||||||
|
out:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 32
|
||||||
|
bits: 256
|
||||||
|
|
||||||
|
- go: ConvertToUint32
|
||||||
|
asm: "VPMOVZXWD"
|
||||||
|
in:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 16
|
||||||
|
bits: 256
|
||||||
|
out:
|
||||||
|
-
|
||||||
|
base: uint
|
||||||
|
elemBits: 32
|
||||||
|
bits: 512
|
||||||
|
|
|
||||||
|
|
@ -1212,23 +1212,59 @@ func (x Float32x8) ConvertToInt32() Int32x8
|
||||||
// Asm: VCVTTPS2DQ, CPU Feature: AVX512
|
// Asm: VCVTTPS2DQ, CPU Feature: AVX512
|
||||||
func (x Float32x16) ConvertToInt32() Int32x16
|
func (x Float32x16) ConvertToInt32() Int32x16
|
||||||
|
|
||||||
|
/* ConvertToUint16 */
|
||||||
|
|
||||||
|
// ConvertToUint16 converts element values to uint16.
|
||||||
|
//
|
||||||
|
// Asm: VPMOVZXBW, CPU Feature: AVX2
|
||||||
|
func (x Uint8x16) ConvertToUint16() Uint16x16
|
||||||
|
|
||||||
|
// ConvertToUint16 converts element values to uint16.
|
||||||
|
//
|
||||||
|
// Asm: VPMOVZXBW, CPU Feature: AVX512
|
||||||
|
func (x Uint8x32) ConvertToUint16() Uint16x32
|
||||||
|
|
||||||
|
/* ConvertToUint16x8 */
|
||||||
|
|
||||||
|
// ConvertToUint16x8 converts 8 lowest vector element values to uint16.
|
||||||
|
//
|
||||||
|
// Asm: VPMOVZXBW, CPU Feature: AVX
|
||||||
|
func (x Uint8x16) ConvertToUint16x8() Uint16x8
|
||||||
|
|
||||||
/* ConvertToUint32 */
|
/* ConvertToUint32 */
|
||||||
|
|
||||||
// ConvertToUint32Masked converts element values to uint32.
|
// ConvertToUint32 converts element values to uint32.
|
||||||
//
|
//
|
||||||
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
||||||
func (x Float32x4) ConvertToUint32() Uint32x4
|
func (x Float32x4) ConvertToUint32() Uint32x4
|
||||||
|
|
||||||
// ConvertToUint32Masked converts element values to uint32.
|
// ConvertToUint32 converts element values to uint32.
|
||||||
//
|
//
|
||||||
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
||||||
func (x Float32x8) ConvertToUint32() Uint32x8
|
func (x Float32x8) ConvertToUint32() Uint32x8
|
||||||
|
|
||||||
// ConvertToUint32Masked converts element values to uint32.
|
// ConvertToUint32 converts element values to uint32.
|
||||||
//
|
//
|
||||||
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
// Asm: VCVTPS2UDQ, CPU Feature: AVX512
|
||||||
func (x Float32x16) ConvertToUint32() Uint32x16
|
func (x Float32x16) ConvertToUint32() Uint32x16
|
||||||
|
|
||||||
|
// ConvertToUint32 converts element values to uint32.
|
||||||
|
//
|
||||||
|
// Asm: VPMOVZXWD, CPU Feature: AVX2
|
||||||
|
func (x Uint16x8) ConvertToUint32() Uint32x8
|
||||||
|
|
||||||
|
// ConvertToUint32 converts element values to uint32.
|
||||||
|
//
|
||||||
|
// Asm: VPMOVZXWD, CPU Feature: AVX512
|
||||||
|
func (x Uint16x16) ConvertToUint32() Uint32x16
|
||||||
|
|
||||||
|
/* ConvertToUint32x4 */
|
||||||
|
|
||||||
|
// ConvertToUint32x4 converts 4 lowest vector element values to uint32.
|
||||||
|
//
|
||||||
|
// Asm: VPMOVZXWD, CPU Feature: AVX
|
||||||
|
func (x Uint16x8) ConvertToUint32x4() Uint32x4
|
||||||
|
|
||||||
/* CopySign */
|
/* CopySign */
|
||||||
|
|
||||||
// CopySign returns the product of the first operand with -1, 0, or 1,
|
// CopySign returns the product of the first operand with -1, 0, or 1,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue