mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add VPLZCNT[DQ]
Change-Id: Ifd6d8c12deac9c41722fdf2511d860a334e83438 Reviewed-on: https://go-review.googlesource.com/c/go/+/701915 Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Bypass: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
832c1f76dc
commit
c39b2fdd1e
11 changed files with 466 additions and 0 deletions
|
|
@ -110,6 +110,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMOVZXBQ256,
|
||||
ssa.OpAMD64VPMOVZXWQ256,
|
||||
ssa.OpAMD64VPMOVZXBQ512,
|
||||
ssa.OpAMD64VPLZCNTD128,
|
||||
ssa.OpAMD64VPLZCNTD256,
|
||||
ssa.OpAMD64VPLZCNTD512,
|
||||
ssa.OpAMD64VPLZCNTQ128,
|
||||
ssa.OpAMD64VPLZCNTQ256,
|
||||
ssa.OpAMD64VPLZCNTQ512,
|
||||
ssa.OpAMD64VPOPCNTB128,
|
||||
ssa.OpAMD64VPOPCNTB256,
|
||||
ssa.OpAMD64VPOPCNTB512,
|
||||
|
|
@ -863,6 +869,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPEXPANDQMasked128,
|
||||
ssa.OpAMD64VPEXPANDQMasked256,
|
||||
ssa.OpAMD64VPEXPANDQMasked512,
|
||||
ssa.OpAMD64VPLZCNTDMasked128,
|
||||
ssa.OpAMD64VPLZCNTDMasked256,
|
||||
ssa.OpAMD64VPLZCNTDMasked512,
|
||||
ssa.OpAMD64VPLZCNTQMasked128,
|
||||
ssa.OpAMD64VPLZCNTQMasked256,
|
||||
ssa.OpAMD64VPLZCNTQMasked512,
|
||||
ssa.OpAMD64VPOPCNTBMasked128,
|
||||
ssa.OpAMD64VPOPCNTBMasked256,
|
||||
ssa.OpAMD64VPOPCNTBMasked512,
|
||||
|
|
@ -1581,6 +1593,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VGF2P8MULBMasked128,
|
||||
ssa.OpAMD64VGF2P8MULBMasked256,
|
||||
ssa.OpAMD64VGF2P8MULBMasked512,
|
||||
ssa.OpAMD64VPLZCNTDMasked128,
|
||||
ssa.OpAMD64VPLZCNTDMasked256,
|
||||
ssa.OpAMD64VPLZCNTDMasked512,
|
||||
ssa.OpAMD64VPLZCNTQMasked128,
|
||||
ssa.OpAMD64VPLZCNTQMasked256,
|
||||
ssa.OpAMD64VPLZCNTQMasked512,
|
||||
ssa.OpAMD64VMAXPSMasked128,
|
||||
ssa.OpAMD64VMAXPSMasked256,
|
||||
ssa.OpAMD64VMAXPSMasked512,
|
||||
|
|
|
|||
|
|
@ -562,6 +562,18 @@
|
|||
(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
|
||||
(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
|
||||
(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
|
||||
(LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...)
|
||||
(LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...)
|
||||
(LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...)
|
||||
(LeadingZerosInt64x2 ...) => (VPLZCNTQ128 ...)
|
||||
(LeadingZerosInt64x4 ...) => (VPLZCNTQ256 ...)
|
||||
(LeadingZerosInt64x8 ...) => (VPLZCNTQ512 ...)
|
||||
(LeadingZerosUint32x4 ...) => (VPLZCNTD128 ...)
|
||||
(LeadingZerosUint32x8 ...) => (VPLZCNTD256 ...)
|
||||
(LeadingZerosUint32x16 ...) => (VPLZCNTD512 ...)
|
||||
(LeadingZerosUint64x2 ...) => (VPLZCNTQ128 ...)
|
||||
(LeadingZerosUint64x4 ...) => (VPLZCNTQ256 ...)
|
||||
(LeadingZerosUint64x8 ...) => (VPLZCNTQ512 ...)
|
||||
(LessFloat32x4 x y) => (VCMPPS128 [1] x y)
|
||||
(LessFloat32x8 x y) => (VCMPPS256 [1] x y)
|
||||
(LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
|
||||
|
|
@ -1334,6 +1346,8 @@
|
|||
(VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y mask)
|
||||
(VMOVDQU8Masked512 (VGF2P8AFFINEQB512 [a] x y) mask) => (VGF2P8AFFINEQBMasked512 [a] x y mask)
|
||||
(VMOVDQU8Masked512 (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512 x y mask)
|
||||
(VMOVDQU32Masked512 (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512 x mask)
|
||||
(VMOVDQU64Masked512 (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VMAXPS512 x y) mask) => (VMAXPSMasked512 x y mask)
|
||||
(VMOVDQU64Masked512 (VMAXPD512 x y) mask) => (VMAXPDMasked512 x y mask)
|
||||
(VMOVDQU8Masked512 (VPMAXSB512 x y) mask) => (VPMAXSBMasked512 x y mask)
|
||||
|
|
|
|||
|
|
@ -450,6 +450,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPHSUBSW256", argLength: 2, reg: v21, asm: "VPHSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPHSUBW128", argLength: 2, reg: v21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPHSUBW256", argLength: 2, reg: v21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPLZCNTD128", argLength: 1, reg: w11, asm: "VPLZCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPLZCNTD256", argLength: 1, reg: w11, asm: "VPLZCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPLZCNTD512", argLength: 1, reg: w11, asm: "VPLZCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPLZCNTDMasked128", argLength: 2, reg: wkw, asm: "VPLZCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPLZCNTDMasked256", argLength: 2, reg: wkw, asm: "VPLZCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPLZCNTDMasked512", argLength: 2, reg: wkw, asm: "VPLZCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPLZCNTQ128", argLength: 1, reg: w11, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPLZCNTQ256", argLength: 1, reg: w11, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPLZCNTQ512", argLength: 1, reg: w11, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPLZCNTQMasked128", argLength: 2, reg: wkw, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPLZCNTQMasked256", argLength: 2, reg: wkw, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPLZCNTQMasked512", argLength: 2, reg: wkw, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMADDUBSW128", argLength: 2, reg: v21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMADDUBSW256", argLength: 2, reg: v21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMADDUBSW512", argLength: 2, reg: w21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -526,6 +526,18 @@ func simdGenericOps() []opData {
|
|||
{name: "IsNanFloat64x2", argLength: 2, commutative: true},
|
||||
{name: "IsNanFloat64x4", argLength: 2, commutative: true},
|
||||
{name: "IsNanFloat64x8", argLength: 2, commutative: true},
|
||||
{name: "LeadingZerosInt32x4", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosInt32x8", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosInt32x16", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosInt64x2", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosInt64x4", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosInt64x8", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosUint32x4", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosUint32x8", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosUint32x16", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosUint64x2", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosUint64x4", argLength: 1, commutative: false},
|
||||
{name: "LeadingZerosUint64x8", argLength: 1, commutative: false},
|
||||
{name: "LessEqualFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "LessEqualFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "LessEqualFloat32x16", argLength: 2, commutative: false},
|
||||
|
|
|
|||
|
|
@ -1682,6 +1682,18 @@ const (
|
|||
OpAMD64VPHSUBSW256
|
||||
OpAMD64VPHSUBW128
|
||||
OpAMD64VPHSUBW256
|
||||
OpAMD64VPLZCNTD128
|
||||
OpAMD64VPLZCNTD256
|
||||
OpAMD64VPLZCNTD512
|
||||
OpAMD64VPLZCNTDMasked128
|
||||
OpAMD64VPLZCNTDMasked256
|
||||
OpAMD64VPLZCNTDMasked512
|
||||
OpAMD64VPLZCNTQ128
|
||||
OpAMD64VPLZCNTQ256
|
||||
OpAMD64VPLZCNTQ512
|
||||
OpAMD64VPLZCNTQMasked128
|
||||
OpAMD64VPLZCNTQMasked256
|
||||
OpAMD64VPLZCNTQMasked512
|
||||
OpAMD64VPMADDUBSW128
|
||||
OpAMD64VPMADDUBSW256
|
||||
OpAMD64VPMADDUBSW512
|
||||
|
|
@ -5343,6 +5355,18 @@ const (
|
|||
OpIsNanFloat64x2
|
||||
OpIsNanFloat64x4
|
||||
OpIsNanFloat64x8
|
||||
OpLeadingZerosInt32x4
|
||||
OpLeadingZerosInt32x8
|
||||
OpLeadingZerosInt32x16
|
||||
OpLeadingZerosInt64x2
|
||||
OpLeadingZerosInt64x4
|
||||
OpLeadingZerosInt64x8
|
||||
OpLeadingZerosUint32x4
|
||||
OpLeadingZerosUint32x8
|
||||
OpLeadingZerosUint32x16
|
||||
OpLeadingZerosUint64x2
|
||||
OpLeadingZerosUint64x4
|
||||
OpLeadingZerosUint64x8
|
||||
OpLessEqualFloat32x4
|
||||
OpLessEqualFloat32x8
|
||||
OpLessEqualFloat32x16
|
||||
|
|
@ -25897,6 +25921,168 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTD128",
|
||||
argLen: 1,
|
||||
asm: x86.AVPLZCNTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTD256",
|
||||
argLen: 1,
|
||||
asm: x86.AVPLZCNTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTD512",
|
||||
argLen: 1,
|
||||
asm: x86.AVPLZCNTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTDMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPLZCNTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTDMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPLZCNTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTDMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPLZCNTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTQ128",
|
||||
argLen: 1,
|
||||
asm: x86.AVPLZCNTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTQ256",
|
||||
argLen: 1,
|
||||
asm: x86.AVPLZCNTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTQ512",
|
||||
argLen: 1,
|
||||
asm: x86.AVPLZCNTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTQMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPLZCNTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTQMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPLZCNTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPLZCNTQMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPLZCNTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPMADDUBSW128",
|
||||
argLen: 2,
|
||||
|
|
@ -68572,6 +68758,66 @@ var opcodeTable = [...]opInfo{
|
|||
commutative: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosInt32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosInt32x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosInt32x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosInt64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosInt64x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosInt64x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosUint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosUint32x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosUint32x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosUint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosUint64x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LeadingZerosUint64x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "LessEqualFloat32x4",
|
||||
argLen: 2,
|
||||
|
|
|
|||
|
|
@ -2489,6 +2489,42 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpIsNonNil(v)
|
||||
case OpIsSliceInBounds:
|
||||
return rewriteValueAMD64_OpIsSliceInBounds(v)
|
||||
case OpLeadingZerosInt32x16:
|
||||
v.Op = OpAMD64VPLZCNTD512
|
||||
return true
|
||||
case OpLeadingZerosInt32x4:
|
||||
v.Op = OpAMD64VPLZCNTD128
|
||||
return true
|
||||
case OpLeadingZerosInt32x8:
|
||||
v.Op = OpAMD64VPLZCNTD256
|
||||
return true
|
||||
case OpLeadingZerosInt64x2:
|
||||
v.Op = OpAMD64VPLZCNTQ128
|
||||
return true
|
||||
case OpLeadingZerosInt64x4:
|
||||
v.Op = OpAMD64VPLZCNTQ256
|
||||
return true
|
||||
case OpLeadingZerosInt64x8:
|
||||
v.Op = OpAMD64VPLZCNTQ512
|
||||
return true
|
||||
case OpLeadingZerosUint32x16:
|
||||
v.Op = OpAMD64VPLZCNTD512
|
||||
return true
|
||||
case OpLeadingZerosUint32x4:
|
||||
v.Op = OpAMD64VPLZCNTD128
|
||||
return true
|
||||
case OpLeadingZerosUint32x8:
|
||||
v.Op = OpAMD64VPLZCNTD256
|
||||
return true
|
||||
case OpLeadingZerosUint64x2:
|
||||
v.Op = OpAMD64VPLZCNTQ128
|
||||
return true
|
||||
case OpLeadingZerosUint64x4:
|
||||
v.Op = OpAMD64VPLZCNTQ256
|
||||
return true
|
||||
case OpLeadingZerosUint64x8:
|
||||
v.Op = OpAMD64VPLZCNTQ512
|
||||
return true
|
||||
case OpLeq16:
|
||||
return rewriteValueAMD64_OpLeq16(v)
|
||||
case OpLeq16U:
|
||||
|
|
@ -27364,6 +27400,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
|||
v.AddArg3(x, y, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask)
|
||||
// result: (VPLZCNTDMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPLZCNTD512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPLZCNTDMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VMAXPS512 x y) mask)
|
||||
// result: (VMAXPSMasked512 x y mask)
|
||||
for {
|
||||
|
|
@ -28057,6 +28105,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool {
|
|||
v.AddArg3(x, y, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked512 (VPLZCNTQ512 x) mask)
|
||||
// result: (VPLZCNTQMasked512 x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPLZCNTQ512 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPLZCNTQMasked512)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked512 (VMAXPD512 x y) mask)
|
||||
// result: (VMAXPDMasked512 x y mask)
|
||||
for {
|
||||
|
|
|
|||
|
|
@ -574,6 +574,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.LeadingZeros", opLen1(ssa.OpLeadingZerosInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.LeadingZeros", opLen1(ssa.OpLeadingZerosUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.LeadingZeros", opLen1(ssa.OpLeadingZerosUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.LeadingZeros", opLen1(ssa.OpLeadingZerosUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
|
|||
5
src/simd/_gen/simdgen/ops/Others/categories.yaml
Normal file
5
src/simd/_gen/simdgen/ops/Others/categories.yaml
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
!sum
|
||||
- go: LeadingZeros
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME counts the leading zeros of each element in x.
|
||||
8
src/simd/_gen/simdgen/ops/Others/go.yaml
Normal file
8
src/simd/_gen/simdgen/ops/Others/go.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
!sum
|
||||
- go: LeadingZeros
|
||||
asm: "VPLZCNT[DQ]"
|
||||
in:
|
||||
- &any
|
||||
go: $t
|
||||
out:
|
||||
- *any
|
||||
|
|
@ -540,3 +540,20 @@ func TestClearAVXUpperBits(t *testing.T) {
|
|||
checkSlices[int64](t, r, []int64{11, 22, 33, 44})
|
||||
checkSlices[int64](t, s, []int64{9, 18, 27, 36})
|
||||
}
|
||||
|
||||
func TestLeadingZeros(t *testing.T) {
|
||||
if !simd.HasAVX512() {
|
||||
t.Skip("Test requires HasAVX512, not available on this hardware")
|
||||
return
|
||||
}
|
||||
|
||||
src := []uint64{0b1111, 0}
|
||||
want := []uint64{60, 64}
|
||||
got := make([]uint64, 2)
|
||||
simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
|
||||
for i := range 2 {
|
||||
if want[i] != got[i] {
|
||||
t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3298,6 +3298,68 @@ func (x Float64x4) IsNan(y Float64x4) Mask64x4
|
|||
// Asm: VCMPPD, CPU Feature: AVX512
|
||||
func (x Float64x8) IsNan(y Float64x8) Mask64x8
|
||||
|
||||
/* LeadingZeros */
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTD, CPU Feature: AVX512
|
||||
func (x Int32x4) LeadingZeros() Int32x4
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTD, CPU Feature: AVX512
|
||||
func (x Int32x8) LeadingZeros() Int32x8
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTD, CPU Feature: AVX512
|
||||
func (x Int32x16) LeadingZeros() Int32x16
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTQ, CPU Feature: AVX512
|
||||
func (x Int64x2) LeadingZeros() Int64x2
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTQ, CPU Feature: AVX512
|
||||
func (x Int64x4) LeadingZeros() Int64x4
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTQ, CPU Feature: AVX512
|
||||
func (x Int64x8) LeadingZeros() Int64x8
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) LeadingZeros() Uint32x4
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTD, CPU Feature: AVX512
|
||||
func (x Uint32x8) LeadingZeros() Uint32x8
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTD, CPU Feature: AVX512
|
||||
func (x Uint32x16) LeadingZeros() Uint32x16
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) LeadingZeros() Uint64x2
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTQ, CPU Feature: AVX512
|
||||
func (x Uint64x4) LeadingZeros() Uint64x4
|
||||
|
||||
// LeadingZeros counts the leading zeros of each element in x.
|
||||
//
|
||||
// Asm: VPLZCNTQ, CPU Feature: AVX512
|
||||
func (x Uint64x8) LeadingZeros() Uint64x8
|
||||
|
||||
/* Less */
|
||||
|
||||
// Less compares for less than.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue