mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: complete AVX2? u?int shuffles
The namings follow the following convention: - If its indices are from constant, amend "Constant" to the name. - If its indices are used by multiple groups, mend "Grouped" to the name. - If its indexing only the low part, amend "Lo", similarly "Hi". Change-Id: I6a58f5dae54c882ebd59f39b5288f6f3f14d957f Reviewed-on: https://go-review.googlesource.com/c/go/+/698296 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
fa1e78c9ad
commit
baea0c700b
10 changed files with 1050 additions and 2 deletions
|
|
@ -346,6 +346,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPERMQ256,
|
||||
ssa.OpAMD64VPERMPD512,
|
||||
ssa.OpAMD64VPERMQ512,
|
||||
ssa.OpAMD64VPSHUFB256,
|
||||
ssa.OpAMD64VPSHUFB512,
|
||||
ssa.OpAMD64VPROLVD128,
|
||||
ssa.OpAMD64VPROLVD256,
|
||||
ssa.OpAMD64VPROLVD512,
|
||||
|
|
@ -606,6 +608,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPORQMasked128,
|
||||
ssa.OpAMD64VPORQMasked256,
|
||||
ssa.OpAMD64VPORQMasked512,
|
||||
ssa.OpAMD64VPSHUFBMasked256,
|
||||
ssa.OpAMD64VPSHUFBMasked512,
|
||||
ssa.OpAMD64VPSHUFBMasked128,
|
||||
ssa.OpAMD64VPERMBMasked256,
|
||||
ssa.OpAMD64VPERMBMasked512,
|
||||
|
|
@ -903,6 +907,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VEXTRACTF64X4256,
|
||||
ssa.OpAMD64VEXTRACTI128128,
|
||||
ssa.OpAMD64VEXTRACTI64X4256,
|
||||
ssa.OpAMD64VPSHUFD128,
|
||||
ssa.OpAMD64VPSHUFD256,
|
||||
ssa.OpAMD64VPSHUFD512,
|
||||
ssa.OpAMD64VPSHUFHW128,
|
||||
ssa.OpAMD64VPSHUFHW256,
|
||||
ssa.OpAMD64VPSHUFHW512,
|
||||
ssa.OpAMD64VPROLD128,
|
||||
ssa.OpAMD64VPROLD256,
|
||||
ssa.OpAMD64VPROLD512,
|
||||
|
|
@ -956,6 +966,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VREDUCEPDMasked128,
|
||||
ssa.OpAMD64VREDUCEPDMasked256,
|
||||
ssa.OpAMD64VREDUCEPDMasked512,
|
||||
ssa.OpAMD64VPSHUFDMasked256,
|
||||
ssa.OpAMD64VPSHUFDMasked512,
|
||||
ssa.OpAMD64VPSHUFHWMasked256,
|
||||
ssa.OpAMD64VPSHUFHWMasked512,
|
||||
ssa.OpAMD64VPSHUFHWMasked128,
|
||||
ssa.OpAMD64VPSHUFDMasked128,
|
||||
ssa.OpAMD64VPROLDMasked128,
|
||||
ssa.OpAMD64VPROLDMasked256,
|
||||
ssa.OpAMD64VPROLDMasked512,
|
||||
|
|
@ -1682,6 +1698,14 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPERMI2QMasked256,
|
||||
ssa.OpAMD64VPERMI2PDMasked512,
|
||||
ssa.OpAMD64VPERMI2QMasked512,
|
||||
ssa.OpAMD64VPSHUFDMasked256,
|
||||
ssa.OpAMD64VPSHUFDMasked512,
|
||||
ssa.OpAMD64VPSHUFHWMasked256,
|
||||
ssa.OpAMD64VPSHUFHWMasked512,
|
||||
ssa.OpAMD64VPSHUFHWMasked128,
|
||||
ssa.OpAMD64VPSHUFDMasked128,
|
||||
ssa.OpAMD64VPSHUFBMasked256,
|
||||
ssa.OpAMD64VPSHUFBMasked512,
|
||||
ssa.OpAMD64VPSHUFBMasked128,
|
||||
ssa.OpAMD64VPERMBMasked256,
|
||||
ssa.OpAMD64VPERMBMasked512,
|
||||
|
|
|
|||
|
|
@ -782,6 +782,32 @@
|
|||
(Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
|
||||
(Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
|
||||
(Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
|
||||
(PermuteConstantInt32x4 ...) => (VPSHUFD128 ...)
|
||||
(PermuteConstantUint32x4 ...) => (VPSHUFD128 ...)
|
||||
(PermuteConstantGroupedInt32x8 ...) => (VPSHUFD256 ...)
|
||||
(PermuteConstantGroupedInt32x16 ...) => (VPSHUFD512 ...)
|
||||
(PermuteConstantGroupedUint32x8 ...) => (VPSHUFD256 ...)
|
||||
(PermuteConstantGroupedUint32x16 ...) => (VPSHUFD512 ...)
|
||||
(PermuteConstantHiInt16x8 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantHiInt32x4 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantHiUint16x8 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantHiUint32x4 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantHiGroupedInt16x16 ...) => (VPSHUFHW256 ...)
|
||||
(PermuteConstantHiGroupedInt16x32 ...) => (VPSHUFHW512 ...)
|
||||
(PermuteConstantHiGroupedUint16x16 ...) => (VPSHUFHW256 ...)
|
||||
(PermuteConstantHiGroupedUint16x32 ...) => (VPSHUFHW512 ...)
|
||||
(PermuteConstantLoInt16x8 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantLoInt32x4 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantLoUint16x8 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantLoUint32x4 ...) => (VPSHUFHW128 ...)
|
||||
(PermuteConstantLoGroupedInt16x16 ...) => (VPSHUFHW256 ...)
|
||||
(PermuteConstantLoGroupedInt16x32 ...) => (VPSHUFHW512 ...)
|
||||
(PermuteConstantLoGroupedUint16x16 ...) => (VPSHUFHW256 ...)
|
||||
(PermuteConstantLoGroupedUint16x32 ...) => (VPSHUFHW512 ...)
|
||||
(PermuteGroupedInt8x32 ...) => (VPSHUFB256 ...)
|
||||
(PermuteGroupedInt8x64 ...) => (VPSHUFB512 ...)
|
||||
(PermuteGroupedUint8x32 ...) => (VPSHUFB256 ...)
|
||||
(PermuteGroupedUint8x64 ...) => (VPSHUFB512 ...)
|
||||
(ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
|
||||
(ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
|
||||
(ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
|
||||
|
|
@ -1317,6 +1343,9 @@
|
|||
(VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) => (VPERMI2DMasked512 x y z mask)
|
||||
(VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) => (VPERMI2PDMasked512 x y z mask)
|
||||
(VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) => (VPERMI2QMasked512 x y z mask)
|
||||
(VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512 [a] x mask)
|
||||
(VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512 [a] x mask)
|
||||
(VMOVDQU8Masked512 (VPSHUFB512 x y) mask) => (VPSHUFBMasked512 x y mask)
|
||||
(VMOVDQU8Masked512 (VPERMB512 x y) mask) => (VPERMBMasked512 x y mask)
|
||||
(VMOVDQU16Masked512 (VPERMW512 x y) mask) => (VPERMWMasked512 x y mask)
|
||||
(VMOVDQU32Masked512 (VPERMPS512 x y) mask) => (VPERMPSMasked512 x y mask)
|
||||
|
|
|
|||
|
|
@ -816,7 +816,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPSHRDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHRDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHUFB128", argLength: 2, reg: v21, asm: "VPSHUFB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSHUFB256", argLength: 2, reg: v21, asm: "VPSHUFB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSHUFB512", argLength: 2, reg: w21, asm: "VPSHUFB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPSHUFBMasked128", argLength: 3, reg: w2kw, asm: "VPSHUFB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSHUFBMasked256", argLength: 3, reg: w2kw, asm: "VPSHUFB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSHUFBMasked512", argLength: 3, reg: w2kw, asm: "VPSHUFB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPSIGNB128", argLength: 2, reg: v21, asm: "VPSIGNB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSIGNB256", argLength: 2, reg: v21, asm: "VPSIGNB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSIGND128", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
|
|
@ -1141,6 +1145,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||
{name: "VPSHUFD128", argLength: 1, reg: v11, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSHUFD256", argLength: 1, reg: v11, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSHUFD512", argLength: 1, reg: w11, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPSHUFDMasked256", argLength: 2, reg: wkw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSHUFDMasked512", argLength: 2, reg: wkw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPSHUFHW128", argLength: 1, reg: w11, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSHUFHW256", argLength: 1, reg: v11, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSHUFHW512", argLength: 1, reg: w11, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPSHUFHWMasked256", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSHUFHWMasked512", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPSHUFHWMasked128", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSHUFDMasked128", argLength: 2, reg: wkw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPROLD128", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPROLD256", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPROLD512", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -726,6 +726,10 @@ func simdGenericOps() []opData {
|
|||
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
||||
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
||||
{name: "PermuteGroupedInt8x32", argLength: 2, commutative: false},
|
||||
{name: "PermuteGroupedInt8x64", argLength: 2, commutative: false},
|
||||
{name: "PermuteGroupedUint8x32", argLength: 2, commutative: false},
|
||||
{name: "PermuteGroupedUint8x64", argLength: 2, commutative: false},
|
||||
{name: "PermuteInt8x16", argLength: 2, commutative: false},
|
||||
{name: "PermuteInt8x32", argLength: 2, commutative: false},
|
||||
{name: "PermuteInt8x64", argLength: 2, commutative: false},
|
||||
|
|
@ -1089,6 +1093,28 @@ func simdGenericOps() []opData {
|
|||
{name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantGroupedInt32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantGroupedInt32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantGroupedUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantGroupedUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiGroupedInt16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiGroupedInt16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiGroupedUint16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiGroupedUint16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantHiUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoGroupedInt16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoGroupedInt16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoGroupedUint16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoGroupedUint16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantLoUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "PermuteConstantUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RotateAllLeftInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RotateAllLeftInt32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
|
|
|
|||
|
|
@ -2039,7 +2039,11 @@ const (
|
|||
OpAMD64VPSHRDVWMasked256
|
||||
OpAMD64VPSHRDVWMasked512
|
||||
OpAMD64VPSHUFB128
|
||||
OpAMD64VPSHUFB256
|
||||
OpAMD64VPSHUFB512
|
||||
OpAMD64VPSHUFBMasked128
|
||||
OpAMD64VPSHUFBMasked256
|
||||
OpAMD64VPSHUFBMasked512
|
||||
OpAMD64VPSIGNB128
|
||||
OpAMD64VPSIGNB256
|
||||
OpAMD64VPSIGND128
|
||||
|
|
@ -2364,6 +2368,18 @@ const (
|
|||
OpAMD64VPCMPW512
|
||||
OpAMD64VPCMPD512
|
||||
OpAMD64VPCMPQ512
|
||||
OpAMD64VPSHUFD128
|
||||
OpAMD64VPSHUFD256
|
||||
OpAMD64VPSHUFD512
|
||||
OpAMD64VPSHUFDMasked256
|
||||
OpAMD64VPSHUFDMasked512
|
||||
OpAMD64VPSHUFHW128
|
||||
OpAMD64VPSHUFHW256
|
||||
OpAMD64VPSHUFHW512
|
||||
OpAMD64VPSHUFHWMasked256
|
||||
OpAMD64VPSHUFHWMasked512
|
||||
OpAMD64VPSHUFHWMasked128
|
||||
OpAMD64VPSHUFDMasked128
|
||||
OpAMD64VPROLD128
|
||||
OpAMD64VPROLD256
|
||||
OpAMD64VPROLD512
|
||||
|
|
@ -5505,6 +5521,10 @@ const (
|
|||
OpPermuteFloat32x16
|
||||
OpPermuteFloat64x4
|
||||
OpPermuteFloat64x8
|
||||
OpPermuteGroupedInt8x32
|
||||
OpPermuteGroupedInt8x64
|
||||
OpPermuteGroupedUint8x32
|
||||
OpPermuteGroupedUint8x64
|
||||
OpPermuteInt8x16
|
||||
OpPermuteInt8x32
|
||||
OpPermuteInt8x64
|
||||
|
|
@ -5868,6 +5888,28 @@ const (
|
|||
OpGetElemUint16x8
|
||||
OpGetElemUint32x4
|
||||
OpGetElemUint64x2
|
||||
OpPermuteConstantGroupedInt32x8
|
||||
OpPermuteConstantGroupedInt32x16
|
||||
OpPermuteConstantGroupedUint32x8
|
||||
OpPermuteConstantGroupedUint32x16
|
||||
OpPermuteConstantHiGroupedInt16x16
|
||||
OpPermuteConstantHiGroupedInt16x32
|
||||
OpPermuteConstantHiGroupedUint16x16
|
||||
OpPermuteConstantHiGroupedUint16x32
|
||||
OpPermuteConstantHiInt16x8
|
||||
OpPermuteConstantHiInt32x4
|
||||
OpPermuteConstantHiUint16x8
|
||||
OpPermuteConstantHiUint32x4
|
||||
OpPermuteConstantInt32x4
|
||||
OpPermuteConstantLoGroupedInt16x16
|
||||
OpPermuteConstantLoGroupedInt16x32
|
||||
OpPermuteConstantLoGroupedUint16x16
|
||||
OpPermuteConstantLoGroupedUint16x32
|
||||
OpPermuteConstantLoInt16x8
|
||||
OpPermuteConstantLoInt32x4
|
||||
OpPermuteConstantLoUint16x8
|
||||
OpPermuteConstantLoUint32x4
|
||||
OpPermuteConstantUint32x4
|
||||
OpRotateAllLeftInt32x4
|
||||
OpRotateAllLeftInt32x8
|
||||
OpRotateAllLeftInt32x16
|
||||
|
|
@ -31031,6 +31073,34 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFB256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFB512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFBMasked128",
|
||||
argLen: 3,
|
||||
|
|
@ -31046,6 +31116,36 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFBMasked256",
|
||||
argLen: 3,
|
||||
asm: x86.AVPSHUFB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFBMasked512",
|
||||
argLen: 3,
|
||||
asm: x86.AVPSHUFB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSIGNB128",
|
||||
argLen: 2,
|
||||
|
|
@ -35810,6 +35910,180 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFD128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
asm: x86.AVPSHUFD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFD256",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
asm: x86.AVPSHUFD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFD512",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
asm: x86.AVPSHUFD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFDMasked256",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFDMasked512",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFHW128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
asm: x86.AVPSHUFHW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFHW256",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
asm: x86.AVPSHUFHW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFHW512",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
asm: x86.AVPSHUFHW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFHWMasked256",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFHW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFHWMasked512",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFHW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFHWMasked128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFHW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPSHUFDMasked128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPSHUFD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPROLD128",
|
||||
auxType: auxUInt8,
|
||||
|
|
@ -69053,6 +69327,26 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteGroupedInt8x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteGroupedInt8x64",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteGroupedUint8x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteGroupedUint8x64",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteInt8x16",
|
||||
argLen: 2,
|
||||
|
|
@ -70932,6 +71226,138 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantGroupedInt32x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantGroupedInt32x16",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantGroupedUint32x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantGroupedUint32x16",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiGroupedInt16x16",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiGroupedInt16x32",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiGroupedUint16x16",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiGroupedUint16x32",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiInt16x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiInt32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiUint16x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantHiUint32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantInt32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoGroupedInt16x16",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoGroupedInt16x32",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoGroupedUint16x16",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoGroupedUint16x32",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoInt16x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoInt32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoUint16x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantLoUint32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "PermuteConstantUint32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "RotateAllLeftInt32x4",
|
||||
auxType: auxUInt8,
|
||||
|
|
|
|||
|
|
@ -3223,6 +3223,72 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpPermute2Uint8x64:
|
||||
v.Op = OpAMD64VPERMI2B512
|
||||
return true
|
||||
case OpPermuteConstantGroupedInt32x16:
|
||||
v.Op = OpAMD64VPSHUFD512
|
||||
return true
|
||||
case OpPermuteConstantGroupedInt32x8:
|
||||
v.Op = OpAMD64VPSHUFD256
|
||||
return true
|
||||
case OpPermuteConstantGroupedUint32x16:
|
||||
v.Op = OpAMD64VPSHUFD512
|
||||
return true
|
||||
case OpPermuteConstantGroupedUint32x8:
|
||||
v.Op = OpAMD64VPSHUFD256
|
||||
return true
|
||||
case OpPermuteConstantHiGroupedInt16x16:
|
||||
v.Op = OpAMD64VPSHUFHW256
|
||||
return true
|
||||
case OpPermuteConstantHiGroupedInt16x32:
|
||||
v.Op = OpAMD64VPSHUFHW512
|
||||
return true
|
||||
case OpPermuteConstantHiGroupedUint16x16:
|
||||
v.Op = OpAMD64VPSHUFHW256
|
||||
return true
|
||||
case OpPermuteConstantHiGroupedUint16x32:
|
||||
v.Op = OpAMD64VPSHUFHW512
|
||||
return true
|
||||
case OpPermuteConstantHiInt16x8:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantHiInt32x4:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantHiUint16x8:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantHiUint32x4:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantInt32x4:
|
||||
v.Op = OpAMD64VPSHUFD128
|
||||
return true
|
||||
case OpPermuteConstantLoGroupedInt16x16:
|
||||
v.Op = OpAMD64VPSHUFHW256
|
||||
return true
|
||||
case OpPermuteConstantLoGroupedInt16x32:
|
||||
v.Op = OpAMD64VPSHUFHW512
|
||||
return true
|
||||
case OpPermuteConstantLoGroupedUint16x16:
|
||||
v.Op = OpAMD64VPSHUFHW256
|
||||
return true
|
||||
case OpPermuteConstantLoGroupedUint16x32:
|
||||
v.Op = OpAMD64VPSHUFHW512
|
||||
return true
|
||||
case OpPermuteConstantLoInt16x8:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantLoInt32x4:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantLoUint16x8:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantLoUint32x4:
|
||||
v.Op = OpAMD64VPSHUFHW128
|
||||
return true
|
||||
case OpPermuteConstantUint32x4:
|
||||
v.Op = OpAMD64VPSHUFD128
|
||||
return true
|
||||
case OpPermuteFloat32x16:
|
||||
v.Op = OpAMD64VPERMPS512
|
||||
return true
|
||||
|
|
@ -3235,6 +3301,18 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpPermuteFloat64x8:
|
||||
v.Op = OpAMD64VPERMPD512
|
||||
return true
|
||||
case OpPermuteGroupedInt8x32:
|
||||
v.Op = OpAMD64VPSHUFB256
|
||||
return true
|
||||
case OpPermuteGroupedInt8x64:
|
||||
v.Op = OpAMD64VPSHUFB512
|
||||
return true
|
||||
case OpPermuteGroupedUint8x32:
|
||||
v.Op = OpAMD64VPSHUFB256
|
||||
return true
|
||||
case OpPermuteGroupedUint8x64:
|
||||
v.Op = OpAMD64VPSHUFB512
|
||||
return true
|
||||
case OpPermuteInt16x16:
|
||||
v.Op = OpAMD64VPERMW256
|
||||
return true
|
||||
|
|
@ -26618,6 +26696,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
|
|||
v.AddArg4(x, y, z, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask)
|
||||
// result: (VPSHUFHWMasked512 [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSHUFHW512 {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSHUFHWMasked512)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPERMW512 x y) mask)
|
||||
// result: (VPERMWMasked512 x y mask)
|
||||
for {
|
||||
|
|
@ -27311,6 +27403,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
|||
v.AddArg4(x, y, z, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask)
|
||||
// result: (VPSHUFDMasked512 [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSHUFD512 {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSHUFDMasked512)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPERMPS512 x y) mask)
|
||||
// result: (VPERMPSMasked512 x y mask)
|
||||
for {
|
||||
|
|
@ -28610,6 +28716,19 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
|
|||
v.AddArg4(x, y, z, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPSHUFB512 x y) mask)
|
||||
// result: (VPSHUFBMasked512 x y mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSHUFB512 {
|
||||
break
|
||||
}
|
||||
y := v_0.Args[1]
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSHUFBMasked512)
|
||||
v.AddArg3(x, y, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU8Masked512 (VPERMB512 x y) mask)
|
||||
// result: (VPERMBMasked512 x y mask)
|
||||
for {
|
||||
|
|
|
|||
|
|
@ -794,6 +794,32 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.PermuteConstant", opLen1Imm8(ssa.OpPermuteConstantInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.PermuteConstant", opLen1Imm8(ssa.OpPermuteConstantUint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedInt32x8, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedInt32x16, types.TypeVec512, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedUint32x8, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedUint32x16, types.TypeVec512, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiInt16x8, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiUint16x8, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiUint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedInt16x16, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedInt16x32, types.TypeVec512, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedUint16x16, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedUint16x32, types.TypeVec512, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoInt16x8, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoUint16x8, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoUint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedInt16x16, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedInt16x32, types.TypeVec512, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedUint16x16, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedUint16x32, types.TypeVec512, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.PermuteGrouped", opLen2(ssa.OpPermuteGroupedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.PermuteGrouped", opLen2(ssa.OpPermuteGroupedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.PermuteGrouped", opLen2(ssa.OpPermuteGroupedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.PermuteGrouped", opLen2(ssa.OpPermuteGroupedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -75,3 +75,31 @@
|
|||
documentation: !string |-
|
||||
// NAME copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
- go: PermuteGrouped
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a grouped permutation of vector x using indices:
|
||||
- go: PermuteConstant
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a permutation of vector x using constant indices:
|
||||
- go: PermuteConstantGrouped
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a grouped permutation of vector x using constant indices:
|
||||
- go: PermuteConstantLo
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a permutation of vector x using constant indices:
|
||||
- go: PermuteConstantLoGrouped
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a grouped permutation of vector x using constant indices:
|
||||
- go: PermuteConstantHi
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a permutation of vector x using constant indices:
|
||||
- go: PermuteConstantHiGrouped
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a grouped permutation of vector x using constant indices:
|
||||
|
|
@ -433,3 +433,97 @@
|
|||
name: indices
|
||||
out:
|
||||
- *128any
|
||||
- go: PermuteGrouped
|
||||
asm: VPSHUFB
|
||||
addDoc: !string |-
|
||||
// result := {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
||||
// Only the needed bits to represent the index of a group of x are used in indices' elements.
|
||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
||||
// Each group is of size 128-bit.
|
||||
in:
|
||||
- &256Or512any
|
||||
bits: "256|512"
|
||||
go: $t
|
||||
- bits: "256|512"
|
||||
go: $t
|
||||
name: indices
|
||||
out:
|
||||
- *256Or512any
|
||||
|
||||
- go: PermuteConstant
|
||||
asm: VPSHUFD
|
||||
addDoc: !string |-
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
in:
|
||||
- *128any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *128any
|
||||
- go: PermuteConstantGrouped
|
||||
asm: VPSHUFD
|
||||
addDoc: !string |-
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
in:
|
||||
- *256Or512any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *256Or512any
|
||||
|
||||
- go: PermuteConstantLo
|
||||
asm: VPSHUFHW
|
||||
addDoc: !string |-
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
in:
|
||||
- *128any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *128any
|
||||
- go: PermuteConstantLoGrouped
|
||||
asm: VPSHUFHW
|
||||
addDoc: !string |-
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
in:
|
||||
- *256Or512any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *256Or512any
|
||||
|
||||
- go: PermuteConstantHi
|
||||
asm: VPSHUFHW
|
||||
addDoc: !string |-
|
||||
// result := {x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
in:
|
||||
- *128any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *128any
|
||||
- go: PermuteConstantHiGrouped
|
||||
asm: VPSHUFHW
|
||||
addDoc: !string |-
|
||||
// result := {x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4], x_group1[indices[0:2]+4], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
in:
|
||||
- *256Or512any
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *256Or512any
|
||||
|
|
@ -4564,6 +4564,266 @@ func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
|
|||
// Asm: VPERMI2Q, CPU Feature: AVX512
|
||||
func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
|
||||
|
||||
/* PermuteConstant */
|
||||
|
||||
// PermuteConstant performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFD, CPU Feature: AVX
|
||||
func (x Int32x4) PermuteConstant(indices uint8) Int32x4
|
||||
|
||||
// PermuteConstant performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFD, CPU Feature: AVX
|
||||
func (x Uint32x4) PermuteConstant(indices uint8) Uint32x4
|
||||
|
||||
/* PermuteConstantGrouped */
|
||||
|
||||
// PermuteConstantGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFD, CPU Feature: AVX2
|
||||
func (x Int32x8) PermuteConstantGrouped(indices uint8) Int32x8
|
||||
|
||||
// PermuteConstantGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFD, CPU Feature: AVX512
|
||||
func (x Int32x16) PermuteConstantGrouped(indices uint8) Int32x16
|
||||
|
||||
// PermuteConstantGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFD, CPU Feature: AVX2
|
||||
func (x Uint32x8) PermuteConstantGrouped(indices uint8) Uint32x8
|
||||
|
||||
// PermuteConstantGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFD, CPU Feature: AVX512
|
||||
func (x Uint32x16) PermuteConstantGrouped(indices uint8) Uint32x16
|
||||
|
||||
/* PermuteConstantHi */
|
||||
|
||||
// PermuteConstantHi performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Int16x8) PermuteConstantHi(indices uint8) Int16x8
|
||||
|
||||
// PermuteConstantHi performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX
|
||||
func (x Int32x4) PermuteConstantHi(indices uint8) Int32x4
|
||||
|
||||
// PermuteConstantHi performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Uint16x8) PermuteConstantHi(indices uint8) Uint16x8
|
||||
|
||||
// PermuteConstantHi performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX
|
||||
func (x Uint32x4) PermuteConstantHi(indices uint8) Uint32x4
|
||||
|
||||
/* PermuteConstantHiGrouped */
|
||||
|
||||
// PermuteConstantHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4], x_group1[indices[0:2]+4], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||
func (x Int16x16) PermuteConstantHiGrouped(indices uint8) Int16x16
|
||||
|
||||
// PermuteConstantHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4], x_group1[indices[0:2]+4], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Int16x32) PermuteConstantHiGrouped(indices uint8) Int16x32
|
||||
|
||||
// PermuteConstantHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4], x_group1[indices[0:2]+4], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||
func (x Uint16x16) PermuteConstantHiGrouped(indices uint8) Uint16x16
|
||||
|
||||
// PermuteConstantHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4], x_group1[indices[0:2]+4], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Uint16x32) PermuteConstantHiGrouped(indices uint8) Uint16x32
|
||||
|
||||
/* PermuteConstantLo */
|
||||
|
||||
// PermuteConstantLo performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Int16x8) PermuteConstantLo(indices uint8) Int16x8
|
||||
|
||||
// PermuteConstantLo performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX
|
||||
func (x Int32x4) PermuteConstantLo(indices uint8) Int32x4
|
||||
|
||||
// PermuteConstantLo performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Uint16x8) PermuteConstantLo(indices uint8) Uint16x8
|
||||
|
||||
// PermuteConstantLo performs a permutation of vector x using constant indices:
|
||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX
|
||||
func (x Uint32x4) PermuteConstantLo(indices uint8) Uint32x4
|
||||
|
||||
/* PermuteConstantLoGrouped */
|
||||
|
||||
// PermuteConstantLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||
func (x Int16x16) PermuteConstantLoGrouped(indices uint8) Int16x16
|
||||
|
||||
// PermuteConstantLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Int16x32) PermuteConstantLoGrouped(indices uint8) Int16x32
|
||||
|
||||
// PermuteConstantLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||
func (x Uint16x16) PermuteConstantLoGrouped(indices uint8) Uint16x16
|
||||
|
||||
// PermuteConstantLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||
func (x Uint16x32) PermuteConstantLoGrouped(indices uint8) Uint16x32
|
||||
|
||||
/* PermuteGrouped */
|
||||
|
||||
// PermuteGrouped performs a grouped permutation of vector x using indices:
|
||||
// result := {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
||||
// Only the needed bits to represent the index of a group of x are used in indices' elements.
|
||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// Asm: VPSHUFB, CPU Feature: AVX2
|
||||
func (x Int8x32) PermuteGrouped(indices Int8x32) Int8x32
|
||||
|
||||
// PermuteGrouped performs a grouped permutation of vector x using indices:
|
||||
// result := {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
||||
// Only the needed bits to represent the index of a group of x are used in indices' elements.
|
||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// Asm: VPSHUFB, CPU Feature: AVX512
|
||||
func (x Int8x64) PermuteGrouped(indices Int8x64) Int8x64
|
||||
|
||||
// PermuteGrouped performs a grouped permutation of vector x using indices:
|
||||
// result := {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
||||
// Only the needed bits to represent the index of a group of x are used in indices' elements.
|
||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// Asm: VPSHUFB, CPU Feature: AVX2
|
||||
func (x Uint8x32) PermuteGrouped(indices Uint8x32) Uint8x32
|
||||
|
||||
// PermuteGrouped performs a grouped permutation of vector x using indices:
|
||||
// result := {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
||||
// Only the needed bits to represent the index of a group of x are used in indices' elements.
|
||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
||||
// Each group is of size 128-bit.
|
||||
//
|
||||
// Asm: VPSHUFB, CPU Feature: AVX512
|
||||
func (x Uint8x64) PermuteGrouped(indices Uint8x64) Uint8x64
|
||||
|
||||
/* Reciprocal */
|
||||
|
||||
// Reciprocal computes an approximate reciprocal of each element.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue