mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd, cmd/compile: add Interleave{Hi,Lo} (VPUNPCK*)
these are building blocks for transpose, not sure of their best names yet. Change-Id: I3800a55de9fa7fde2590ca822894c8a75387dec3 Reviewed-on: https://go-review.googlesource.com/c/go/+/698576 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
6890aa2e20
commit
b509516b2e
12 changed files with 1021 additions and 2 deletions
|
|
@ -243,6 +243,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPCMPGTD256,
|
||||
ssa.OpAMD64VPCMPGTQ128,
|
||||
ssa.OpAMD64VPCMPGTQ256,
|
||||
ssa.OpAMD64VPUNPCKHWD128,
|
||||
ssa.OpAMD64VPUNPCKHDQ128,
|
||||
ssa.OpAMD64VPUNPCKHQDQ128,
|
||||
ssa.OpAMD64VPUNPCKHWD256,
|
||||
ssa.OpAMD64VPUNPCKHWD512,
|
||||
ssa.OpAMD64VPUNPCKHDQ256,
|
||||
ssa.OpAMD64VPUNPCKHDQ512,
|
||||
ssa.OpAMD64VPUNPCKHQDQ256,
|
||||
ssa.OpAMD64VPUNPCKHQDQ512,
|
||||
ssa.OpAMD64VPUNPCKLWD128,
|
||||
ssa.OpAMD64VPUNPCKLDQ128,
|
||||
ssa.OpAMD64VPUNPCKLQDQ128,
|
||||
ssa.OpAMD64VPUNPCKLWD256,
|
||||
ssa.OpAMD64VPUNPCKLWD512,
|
||||
ssa.OpAMD64VPUNPCKLDQ256,
|
||||
ssa.OpAMD64VPUNPCKLDQ512,
|
||||
ssa.OpAMD64VPUNPCKLQDQ256,
|
||||
ssa.OpAMD64VPUNPCKLQDQ512,
|
||||
ssa.OpAMD64VMAXPS128,
|
||||
ssa.OpAMD64VMAXPS256,
|
||||
ssa.OpAMD64VMAXPS512,
|
||||
|
|
|
|||
|
|
@ -520,6 +520,42 @@
|
|||
(GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
|
||||
(GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
|
||||
(GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
|
||||
(InterleaveHiInt16x8 ...) => (VPUNPCKHWD128 ...)
|
||||
(InterleaveHiInt32x4 ...) => (VPUNPCKHDQ128 ...)
|
||||
(InterleaveHiInt64x2 ...) => (VPUNPCKHQDQ128 ...)
|
||||
(InterleaveHiUint16x8 ...) => (VPUNPCKHWD128 ...)
|
||||
(InterleaveHiUint32x4 ...) => (VPUNPCKHDQ128 ...)
|
||||
(InterleaveHiUint64x2 ...) => (VPUNPCKHQDQ128 ...)
|
||||
(InterleaveHiGroupedInt16x16 ...) => (VPUNPCKHWD256 ...)
|
||||
(InterleaveHiGroupedInt16x32 ...) => (VPUNPCKHWD512 ...)
|
||||
(InterleaveHiGroupedInt32x8 ...) => (VPUNPCKHDQ256 ...)
|
||||
(InterleaveHiGroupedInt32x16 ...) => (VPUNPCKHDQ512 ...)
|
||||
(InterleaveHiGroupedInt64x4 ...) => (VPUNPCKHQDQ256 ...)
|
||||
(InterleaveHiGroupedInt64x8 ...) => (VPUNPCKHQDQ512 ...)
|
||||
(InterleaveHiGroupedUint16x16 ...) => (VPUNPCKHWD256 ...)
|
||||
(InterleaveHiGroupedUint16x32 ...) => (VPUNPCKHWD512 ...)
|
||||
(InterleaveHiGroupedUint32x8 ...) => (VPUNPCKHDQ256 ...)
|
||||
(InterleaveHiGroupedUint32x16 ...) => (VPUNPCKHDQ512 ...)
|
||||
(InterleaveHiGroupedUint64x4 ...) => (VPUNPCKHQDQ256 ...)
|
||||
(InterleaveHiGroupedUint64x8 ...) => (VPUNPCKHQDQ512 ...)
|
||||
(InterleaveLoInt16x8 ...) => (VPUNPCKLWD128 ...)
|
||||
(InterleaveLoInt32x4 ...) => (VPUNPCKLDQ128 ...)
|
||||
(InterleaveLoInt64x2 ...) => (VPUNPCKLQDQ128 ...)
|
||||
(InterleaveLoUint16x8 ...) => (VPUNPCKLWD128 ...)
|
||||
(InterleaveLoUint32x4 ...) => (VPUNPCKLDQ128 ...)
|
||||
(InterleaveLoUint64x2 ...) => (VPUNPCKLQDQ128 ...)
|
||||
(InterleaveLoGroupedInt16x16 ...) => (VPUNPCKLWD256 ...)
|
||||
(InterleaveLoGroupedInt16x32 ...) => (VPUNPCKLWD512 ...)
|
||||
(InterleaveLoGroupedInt32x8 ...) => (VPUNPCKLDQ256 ...)
|
||||
(InterleaveLoGroupedInt32x16 ...) => (VPUNPCKLDQ512 ...)
|
||||
(InterleaveLoGroupedInt64x4 ...) => (VPUNPCKLQDQ256 ...)
|
||||
(InterleaveLoGroupedInt64x8 ...) => (VPUNPCKLQDQ512 ...)
|
||||
(InterleaveLoGroupedUint16x16 ...) => (VPUNPCKLWD256 ...)
|
||||
(InterleaveLoGroupedUint16x32 ...) => (VPUNPCKLWD512 ...)
|
||||
(InterleaveLoGroupedUint32x8 ...) => (VPUNPCKLDQ256 ...)
|
||||
(InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...)
|
||||
(InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...)
|
||||
(InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...)
|
||||
(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
|
||||
(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
|
||||
(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
|
||||
|
|
|
|||
|
|
@ -983,6 +983,24 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPSUBWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSUBWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSUBWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPUNPCKHDQ128", argLength: 2, reg: v21, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPUNPCKHDQ256", argLength: 2, reg: v21, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPUNPCKHDQ512", argLength: 2, reg: w21, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPUNPCKHQDQ128", argLength: 2, reg: v21, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPUNPCKHQDQ256", argLength: 2, reg: v21, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPUNPCKHQDQ512", argLength: 2, reg: w21, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPUNPCKHWD128", argLength: 2, reg: v21, asm: "VPUNPCKHWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPUNPCKHWD256", argLength: 2, reg: v21, asm: "VPUNPCKHWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPUNPCKHWD512", argLength: 2, reg: w21, asm: "VPUNPCKHWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPUNPCKLDQ128", argLength: 2, reg: v21, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPUNPCKLDQ256", argLength: 2, reg: v21, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPUNPCKLDQ512", argLength: 2, reg: w21, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPUNPCKLQDQ128", argLength: 2, reg: v21, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPUNPCKLQDQ256", argLength: 2, reg: v21, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPUNPCKLQDQ512", argLength: 2, reg: w21, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPUNPCKLWD128", argLength: 2, reg: v21, asm: "VPUNPCKLWD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPUNPCKLWD256", argLength: 2, reg: v21, asm: "VPUNPCKLWD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPUNPCKLWD512", argLength: 2, reg: w21, asm: "VPUNPCKLWD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPXOR128", argLength: 2, reg: v21, asm: "VPXOR", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPXOR256", argLength: 2, reg: v21, asm: "VPXOR", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPXORD512", argLength: 2, reg: w21, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -484,6 +484,42 @@ func simdGenericOps() []opData {
|
|||
{name: "GreaterUint16x32", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint32x16", argLength: 2, commutative: false},
|
||||
{name: "GreaterUint64x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedInt16x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedInt16x32", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedInt32x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedInt32x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedInt64x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedInt64x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedUint16x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedUint16x32", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedUint32x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedUint32x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedUint64x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiGroupedUint64x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiInt16x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiInt32x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiInt64x2", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiUint16x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiUint32x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveHiUint64x2", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedInt16x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedInt16x32", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedInt32x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedInt32x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedInt64x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedInt64x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedUint16x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedUint16x32", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedUint32x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedUint32x16", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedUint64x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoGroupedUint64x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoInt16x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoInt32x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoInt64x2", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoUint16x8", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoUint32x4", argLength: 2, commutative: false},
|
||||
{name: "InterleaveLoUint64x2", argLength: 2, commutative: false},
|
||||
{name: "IsNanFloat32x4", argLength: 2, commutative: true},
|
||||
{name: "IsNanFloat32x8", argLength: 2, commutative: true},
|
||||
{name: "IsNanFloat32x16", argLength: 2, commutative: true},
|
||||
|
|
|
|||
|
|
@ -2215,6 +2215,24 @@ const (
|
|||
OpAMD64VPSUBWMasked128
|
||||
OpAMD64VPSUBWMasked256
|
||||
OpAMD64VPSUBWMasked512
|
||||
OpAMD64VPUNPCKHDQ128
|
||||
OpAMD64VPUNPCKHDQ256
|
||||
OpAMD64VPUNPCKHDQ512
|
||||
OpAMD64VPUNPCKHQDQ128
|
||||
OpAMD64VPUNPCKHQDQ256
|
||||
OpAMD64VPUNPCKHQDQ512
|
||||
OpAMD64VPUNPCKHWD128
|
||||
OpAMD64VPUNPCKHWD256
|
||||
OpAMD64VPUNPCKHWD512
|
||||
OpAMD64VPUNPCKLDQ128
|
||||
OpAMD64VPUNPCKLDQ256
|
||||
OpAMD64VPUNPCKLDQ512
|
||||
OpAMD64VPUNPCKLQDQ128
|
||||
OpAMD64VPUNPCKLQDQ256
|
||||
OpAMD64VPUNPCKLQDQ512
|
||||
OpAMD64VPUNPCKLWD128
|
||||
OpAMD64VPUNPCKLWD256
|
||||
OpAMD64VPUNPCKLWD512
|
||||
OpAMD64VPXOR128
|
||||
OpAMD64VPXOR256
|
||||
OpAMD64VPXORD512
|
||||
|
|
@ -5288,6 +5306,42 @@ const (
|
|||
OpGreaterUint16x32
|
||||
OpGreaterUint32x16
|
||||
OpGreaterUint64x8
|
||||
OpInterleaveHiGroupedInt16x16
|
||||
OpInterleaveHiGroupedInt16x32
|
||||
OpInterleaveHiGroupedInt32x8
|
||||
OpInterleaveHiGroupedInt32x16
|
||||
OpInterleaveHiGroupedInt64x4
|
||||
OpInterleaveHiGroupedInt64x8
|
||||
OpInterleaveHiGroupedUint16x16
|
||||
OpInterleaveHiGroupedUint16x32
|
||||
OpInterleaveHiGroupedUint32x8
|
||||
OpInterleaveHiGroupedUint32x16
|
||||
OpInterleaveHiGroupedUint64x4
|
||||
OpInterleaveHiGroupedUint64x8
|
||||
OpInterleaveHiInt16x8
|
||||
OpInterleaveHiInt32x4
|
||||
OpInterleaveHiInt64x2
|
||||
OpInterleaveHiUint16x8
|
||||
OpInterleaveHiUint32x4
|
||||
OpInterleaveHiUint64x2
|
||||
OpInterleaveLoGroupedInt16x16
|
||||
OpInterleaveLoGroupedInt16x32
|
||||
OpInterleaveLoGroupedInt32x8
|
||||
OpInterleaveLoGroupedInt32x16
|
||||
OpInterleaveLoGroupedInt64x4
|
||||
OpInterleaveLoGroupedInt64x8
|
||||
OpInterleaveLoGroupedUint16x16
|
||||
OpInterleaveLoGroupedUint16x32
|
||||
OpInterleaveLoGroupedUint32x8
|
||||
OpInterleaveLoGroupedUint32x16
|
||||
OpInterleaveLoGroupedUint64x4
|
||||
OpInterleaveLoGroupedUint64x8
|
||||
OpInterleaveLoInt16x8
|
||||
OpInterleaveLoInt32x4
|
||||
OpInterleaveLoInt64x2
|
||||
OpInterleaveLoUint16x8
|
||||
OpInterleaveLoUint32x4
|
||||
OpInterleaveLoUint64x2
|
||||
OpIsNanFloat32x4
|
||||
OpIsNanFloat32x8
|
||||
OpIsNanFloat32x16
|
||||
|
|
@ -33629,6 +33683,258 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHDQ128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHDQ256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHDQ512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHQDQ128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHQDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHQDQ256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHQDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHQDQ512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHQDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHWD128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHWD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHWD256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHWD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKHWD512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKHWD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLDQ128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLDQ256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLDQ512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLQDQ128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLQDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLQDQ256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLQDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLQDQ512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLQDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLWD128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLWD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLWD256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLWD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPUNPCKLWD512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPUNPCKLWD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPXOR128",
|
||||
argLen: 2,
|
||||
|
|
@ -68116,6 +68422,186 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedInt16x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedInt16x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedInt32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedInt32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedInt64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedInt64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedUint16x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedUint16x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedUint32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedUint32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedUint64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiGroupedUint64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiInt16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiInt64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiUint16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiUint32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveHiUint64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedInt16x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedInt16x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedInt32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedInt32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedInt64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedInt64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedUint16x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedUint16x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedUint32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedUint32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedUint64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoGroupedUint64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoInt16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoInt64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoUint16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoUint32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "InterleaveLoUint64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "IsNanFloat32x4",
|
||||
argLen: 2,
|
||||
|
|
|
|||
|
|
@ -2363,6 +2363,114 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpInterCall:
|
||||
v.Op = OpAMD64CALLinter
|
||||
return true
|
||||
case OpInterleaveHiGroupedInt16x16:
|
||||
v.Op = OpAMD64VPUNPCKHWD256
|
||||
return true
|
||||
case OpInterleaveHiGroupedInt16x32:
|
||||
v.Op = OpAMD64VPUNPCKHWD512
|
||||
return true
|
||||
case OpInterleaveHiGroupedInt32x16:
|
||||
v.Op = OpAMD64VPUNPCKHDQ512
|
||||
return true
|
||||
case OpInterleaveHiGroupedInt32x8:
|
||||
v.Op = OpAMD64VPUNPCKHDQ256
|
||||
return true
|
||||
case OpInterleaveHiGroupedInt64x4:
|
||||
v.Op = OpAMD64VPUNPCKHQDQ256
|
||||
return true
|
||||
case OpInterleaveHiGroupedInt64x8:
|
||||
v.Op = OpAMD64VPUNPCKHQDQ512
|
||||
return true
|
||||
case OpInterleaveHiGroupedUint16x16:
|
||||
v.Op = OpAMD64VPUNPCKHWD256
|
||||
return true
|
||||
case OpInterleaveHiGroupedUint16x32:
|
||||
v.Op = OpAMD64VPUNPCKHWD512
|
||||
return true
|
||||
case OpInterleaveHiGroupedUint32x16:
|
||||
v.Op = OpAMD64VPUNPCKHDQ512
|
||||
return true
|
||||
case OpInterleaveHiGroupedUint32x8:
|
||||
v.Op = OpAMD64VPUNPCKHDQ256
|
||||
return true
|
||||
case OpInterleaveHiGroupedUint64x4:
|
||||
v.Op = OpAMD64VPUNPCKHQDQ256
|
||||
return true
|
||||
case OpInterleaveHiGroupedUint64x8:
|
||||
v.Op = OpAMD64VPUNPCKHQDQ512
|
||||
return true
|
||||
case OpInterleaveHiInt16x8:
|
||||
v.Op = OpAMD64VPUNPCKHWD128
|
||||
return true
|
||||
case OpInterleaveHiInt32x4:
|
||||
v.Op = OpAMD64VPUNPCKHDQ128
|
||||
return true
|
||||
case OpInterleaveHiInt64x2:
|
||||
v.Op = OpAMD64VPUNPCKHQDQ128
|
||||
return true
|
||||
case OpInterleaveHiUint16x8:
|
||||
v.Op = OpAMD64VPUNPCKHWD128
|
||||
return true
|
||||
case OpInterleaveHiUint32x4:
|
||||
v.Op = OpAMD64VPUNPCKHDQ128
|
||||
return true
|
||||
case OpInterleaveHiUint64x2:
|
||||
v.Op = OpAMD64VPUNPCKHQDQ128
|
||||
return true
|
||||
case OpInterleaveLoGroupedInt16x16:
|
||||
v.Op = OpAMD64VPUNPCKLWD256
|
||||
return true
|
||||
case OpInterleaveLoGroupedInt16x32:
|
||||
v.Op = OpAMD64VPUNPCKLWD512
|
||||
return true
|
||||
case OpInterleaveLoGroupedInt32x16:
|
||||
v.Op = OpAMD64VPUNPCKLDQ512
|
||||
return true
|
||||
case OpInterleaveLoGroupedInt32x8:
|
||||
v.Op = OpAMD64VPUNPCKLDQ256
|
||||
return true
|
||||
case OpInterleaveLoGroupedInt64x4:
|
||||
v.Op = OpAMD64VPUNPCKLQDQ256
|
||||
return true
|
||||
case OpInterleaveLoGroupedInt64x8:
|
||||
v.Op = OpAMD64VPUNPCKLQDQ512
|
||||
return true
|
||||
case OpInterleaveLoGroupedUint16x16:
|
||||
v.Op = OpAMD64VPUNPCKLWD256
|
||||
return true
|
||||
case OpInterleaveLoGroupedUint16x32:
|
||||
v.Op = OpAMD64VPUNPCKLWD512
|
||||
return true
|
||||
case OpInterleaveLoGroupedUint32x16:
|
||||
v.Op = OpAMD64VPUNPCKLDQ512
|
||||
return true
|
||||
case OpInterleaveLoGroupedUint32x8:
|
||||
v.Op = OpAMD64VPUNPCKLDQ256
|
||||
return true
|
||||
case OpInterleaveLoGroupedUint64x4:
|
||||
v.Op = OpAMD64VPUNPCKLQDQ256
|
||||
return true
|
||||
case OpInterleaveLoGroupedUint64x8:
|
||||
v.Op = OpAMD64VPUNPCKLQDQ512
|
||||
return true
|
||||
case OpInterleaveLoInt16x8:
|
||||
v.Op = OpAMD64VPUNPCKLWD128
|
||||
return true
|
||||
case OpInterleaveLoInt32x4:
|
||||
v.Op = OpAMD64VPUNPCKLDQ128
|
||||
return true
|
||||
case OpInterleaveLoInt64x2:
|
||||
v.Op = OpAMD64VPUNPCKLQDQ128
|
||||
return true
|
||||
case OpInterleaveLoUint16x8:
|
||||
v.Op = OpAMD64VPUNPCKLWD128
|
||||
return true
|
||||
case OpInterleaveLoUint32x4:
|
||||
v.Op = OpAMD64VPUNPCKLDQ128
|
||||
return true
|
||||
case OpInterleaveLoUint64x2:
|
||||
v.Op = OpAMD64VPUNPCKLQDQ128
|
||||
return true
|
||||
case OpIsInBounds:
|
||||
return rewriteValueAMD64_OpIsInBounds(v)
|
||||
case OpIsNanFloat32x16:
|
||||
|
|
|
|||
|
|
@ -532,6 +532,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.InterleaveHi", opLen2(ssa.OpInterleaveHiInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.InterleaveHi", opLen2(ssa.OpInterleaveHiInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.InterleaveHi", opLen2(ssa.OpInterleaveHiInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.InterleaveHi", opLen2(ssa.OpInterleaveHiUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.InterleaveHi", opLen2(ssa.OpInterleaveHiUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.InterleaveHi", opLen2(ssa.OpInterleaveHiUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.InterleaveHiGrouped", opLen2(ssa.OpInterleaveHiGroupedUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.InterleaveLo", opLen2(ssa.OpInterleaveLoInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.InterleaveLo", opLen2(ssa.OpInterleaveLoInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.InterleaveLo", opLen2(ssa.OpInterleaveLoInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.InterleaveLo", opLen2(ssa.OpInterleaveLoUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.InterleaveLo", opLen2(ssa.OpInterleaveLoUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.InterleaveLo", opLen2(ssa.OpInterleaveLoUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -102,4 +102,21 @@
|
|||
- go: PermuteConstantHiGrouped
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
// NAME performs a grouped permutation of vector x using constant indices:
|
||||
// NAME performs a grouped permutation of vector x using constant indices:
|
||||
- go: InterleaveHi
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME interleaves the elements of the high halves of x and y.
|
||||
- go: InterleaveLo
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME interleaves the elements of the low halves of x and y.
|
||||
- go: InterleaveHiGrouped
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
- go: InterleaveLoGrouped
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
|
||||
|
|
|
|||
|
|
@ -526,4 +526,41 @@
|
|||
immOffset: 0
|
||||
name: indices
|
||||
out:
|
||||
- *256Or512any
|
||||
- *256Or512any
|
||||
|
||||
- go: InterleaveHi
|
||||
asm: VPUNPCKH(QDQ|DQ|WD|WB)
|
||||
in:
|
||||
- *128any
|
||||
- *128any
|
||||
inVariant: []
|
||||
out:
|
||||
- *128any
|
||||
|
||||
- go: InterleaveLo
|
||||
asm: VPUNPCKL(QDQ|DQ|WD|WB)
|
||||
in:
|
||||
- *128any
|
||||
- *128any
|
||||
inVariant: []
|
||||
out:
|
||||
- *128any
|
||||
|
||||
- go: InterleaveHiGrouped
|
||||
asm: VPUNPCKH(QDQ|DQ|WD|WB)
|
||||
in:
|
||||
- *256Or512any
|
||||
- *256Or512any
|
||||
inVariant: []
|
||||
out:
|
||||
- *256Or512any
|
||||
|
||||
- go: InterleaveLoGrouped
|
||||
asm: VPUNPCKL(QDQ|DQ|WD|WB)
|
||||
in:
|
||||
- *256Or512any
|
||||
- *256Or512any
|
||||
inVariant: []
|
||||
out:
|
||||
- *256Or512any
|
||||
|
||||
|
|
|
|||
|
|
@ -494,3 +494,27 @@ func TestMaskOpt512(t *testing.T) {
|
|||
checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0})
|
||||
checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0})
|
||||
}
|
||||
|
||||
// flattenedTranspose tranposes x and y, regarded as a pair of 2x2
|
||||
// matrices, but then flattens the rows in order, i.e
|
||||
// x: ABCD ==> a: A1B2
|
||||
// y: 1234 b: C3D4
|
||||
func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) {
|
||||
return x.InterleaveLo(y), x.InterleaveHi(y)
|
||||
}
|
||||
|
||||
func TestFlattenedTranspose(t *testing.T) {
|
||||
r := make([]int32, 4, 4)
|
||||
s := make([]int32, 4, 4)
|
||||
|
||||
x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
|
||||
y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
|
||||
a, b := flattenedTranspose(x, y)
|
||||
|
||||
a.StoreSlice(r)
|
||||
b.StoreSlice(s)
|
||||
|
||||
checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2})
|
||||
checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3078,6 +3078,194 @@ func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
|
|||
// Asm: VPCMPUQ, CPU Feature: AVX512
|
||||
func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
|
||||
|
||||
/* InterleaveHi */
|
||||
|
||||
// InterleaveHi interleaves the elements of the high halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHWD, CPU Feature: AVX
|
||||
func (x Int16x8) InterleaveHi(y Int16x8) Int16x8
|
||||
|
||||
// InterleaveHi interleaves the elements of the high halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHDQ, CPU Feature: AVX
|
||||
func (x Int32x4) InterleaveHi(y Int32x4) Int32x4
|
||||
|
||||
// InterleaveHi interleaves the elements of the high halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHQDQ, CPU Feature: AVX
|
||||
func (x Int64x2) InterleaveHi(y Int64x2) Int64x2
|
||||
|
||||
// InterleaveHi interleaves the elements of the high halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHWD, CPU Feature: AVX
|
||||
func (x Uint16x8) InterleaveHi(y Uint16x8) Uint16x8
|
||||
|
||||
// InterleaveHi interleaves the elements of the high halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHDQ, CPU Feature: AVX
|
||||
func (x Uint32x4) InterleaveHi(y Uint32x4) Uint32x4
|
||||
|
||||
// InterleaveHi interleaves the elements of the high halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHQDQ, CPU Feature: AVX
|
||||
func (x Uint64x2) InterleaveHi(y Uint64x2) Uint64x2
|
||||
|
||||
/* InterleaveHiGrouped */
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHWD, CPU Feature: AVX2
|
||||
func (x Int16x16) InterleaveHiGrouped(y Int16x16) Int16x16
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHWD, CPU Feature: AVX512
|
||||
func (x Int16x32) InterleaveHiGrouped(y Int16x32) Int16x32
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHDQ, CPU Feature: AVX2
|
||||
func (x Int32x8) InterleaveHiGrouped(y Int32x8) Int32x8
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHDQ, CPU Feature: AVX512
|
||||
func (x Int32x16) InterleaveHiGrouped(y Int32x16) Int32x16
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHQDQ, CPU Feature: AVX2
|
||||
func (x Int64x4) InterleaveHiGrouped(y Int64x4) Int64x4
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHQDQ, CPU Feature: AVX512
|
||||
func (x Int64x8) InterleaveHiGrouped(y Int64x8) Int64x8
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHWD, CPU Feature: AVX2
|
||||
func (x Uint16x16) InterleaveHiGrouped(y Uint16x16) Uint16x16
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHWD, CPU Feature: AVX512
|
||||
func (x Uint16x32) InterleaveHiGrouped(y Uint16x32) Uint16x32
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHDQ, CPU Feature: AVX2
|
||||
func (x Uint32x8) InterleaveHiGrouped(y Uint32x8) Uint32x8
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHDQ, CPU Feature: AVX512
|
||||
func (x Uint32x16) InterleaveHiGrouped(y Uint32x16) Uint32x16
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHQDQ, CPU Feature: AVX2
|
||||
func (x Uint64x4) InterleaveHiGrouped(y Uint64x4) Uint64x4
|
||||
|
||||
// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKHQDQ, CPU Feature: AVX512
|
||||
func (x Uint64x8) InterleaveHiGrouped(y Uint64x8) Uint64x8
|
||||
|
||||
/* InterleaveLo */
|
||||
|
||||
// InterleaveLo interleaves the elements of the low halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLWD, CPU Feature: AVX
|
||||
func (x Int16x8) InterleaveLo(y Int16x8) Int16x8
|
||||
|
||||
// InterleaveLo interleaves the elements of the low halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLDQ, CPU Feature: AVX
|
||||
func (x Int32x4) InterleaveLo(y Int32x4) Int32x4
|
||||
|
||||
// InterleaveLo interleaves the elements of the low halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLQDQ, CPU Feature: AVX
|
||||
func (x Int64x2) InterleaveLo(y Int64x2) Int64x2
|
||||
|
||||
// InterleaveLo interleaves the elements of the low halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLWD, CPU Feature: AVX
|
||||
func (x Uint16x8) InterleaveLo(y Uint16x8) Uint16x8
|
||||
|
||||
// InterleaveLo interleaves the elements of the low halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLDQ, CPU Feature: AVX
|
||||
func (x Uint32x4) InterleaveLo(y Uint32x4) Uint32x4
|
||||
|
||||
// InterleaveLo interleaves the elements of the low halves of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLQDQ, CPU Feature: AVX
|
||||
func (x Uint64x2) InterleaveLo(y Uint64x2) Uint64x2
|
||||
|
||||
/* InterleaveLoGrouped */
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLWD, CPU Feature: AVX2
|
||||
func (x Int16x16) InterleaveLoGrouped(y Int16x16) Int16x16
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLWD, CPU Feature: AVX512
|
||||
func (x Int16x32) InterleaveLoGrouped(y Int16x32) Int16x32
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLDQ, CPU Feature: AVX2
|
||||
func (x Int32x8) InterleaveLoGrouped(y Int32x8) Int32x8
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLDQ, CPU Feature: AVX512
|
||||
func (x Int32x16) InterleaveLoGrouped(y Int32x16) Int32x16
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLQDQ, CPU Feature: AVX2
|
||||
func (x Int64x4) InterleaveLoGrouped(y Int64x4) Int64x4
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
|
||||
func (x Int64x8) InterleaveLoGrouped(y Int64x8) Int64x8
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLWD, CPU Feature: AVX2
|
||||
func (x Uint16x16) InterleaveLoGrouped(y Uint16x16) Uint16x16
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLWD, CPU Feature: AVX512
|
||||
func (x Uint16x32) InterleaveLoGrouped(y Uint16x32) Uint16x32
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLDQ, CPU Feature: AVX2
|
||||
func (x Uint32x8) InterleaveLoGrouped(y Uint32x8) Uint32x8
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLDQ, CPU Feature: AVX512
|
||||
func (x Uint32x16) InterleaveLoGrouped(y Uint32x16) Uint32x16
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLQDQ, CPU Feature: AVX2
|
||||
func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4
|
||||
|
||||
// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
|
||||
//
|
||||
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
|
||||
func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8
|
||||
|
||||
/* IsNan */
|
||||
|
||||
// IsNan checks if elements are NaN. Use as x.IsNan(x).
|
||||
|
|
|
|||
15
src/simd/shuffles_amd64.go
Normal file
15
src/simd/shuffles_amd64.go
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.simd && amd64
|
||||
|
||||
package simd
|
||||
|
||||
// FlattenedTranspose tranposes x and y, regarded as a pair of 2x2
|
||||
// matrices, but then flattens the rows in order, i.e
|
||||
// x: ABCD ==> a: A1B2
|
||||
// y: 1234 b: C3D4
|
||||
func (x Int32x4) FlattenedTranspose(y Int32x4) (a, b Int32x4) {
|
||||
return x.InterleaveLo(y), x.InterleaveHi(y)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue