mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add Expand
This CL is generated by CL 693336. Change-Id: Ic1712d49fcad0544fa3c19b0249d8bc65b347104 Reviewed-on: https://go-review.googlesource.com/c/go/+/693375 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
d3cf582f8a
commit
5b0ef7fcdc
9 changed files with 1332 additions and 0 deletions
|
|
@ -644,6 +644,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||
ssa.OpAMD64VEXPANDPSMasked128,
|
||||
ssa.OpAMD64VEXPANDPSMasked256,
|
||||
ssa.OpAMD64VEXPANDPSMasked512,
|
||||
ssa.OpAMD64VEXPANDPDMasked128,
|
||||
ssa.OpAMD64VEXPANDPDMasked256,
|
||||
ssa.OpAMD64VEXPANDPDMasked512,
|
||||
ssa.OpAMD64VPEXPANDBMasked128,
|
||||
ssa.OpAMD64VPEXPANDBMasked256,
|
||||
ssa.OpAMD64VPEXPANDBMasked512,
|
||||
ssa.OpAMD64VPEXPANDWMasked128,
|
||||
ssa.OpAMD64VPEXPANDWMasked256,
|
||||
ssa.OpAMD64VPEXPANDWMasked512,
|
||||
ssa.OpAMD64VPEXPANDDMasked128,
|
||||
ssa.OpAMD64VPEXPANDDMasked256,
|
||||
ssa.OpAMD64VPEXPANDDMasked512,
|
||||
ssa.OpAMD64VPEXPANDQMasked128,
|
||||
ssa.OpAMD64VPEXPANDQMasked256,
|
||||
ssa.OpAMD64VPEXPANDQMasked512,
|
||||
ssa.OpAMD64VPOPCNTBMasked128,
|
||||
ssa.OpAMD64VPOPCNTBMasked256,
|
||||
ssa.OpAMD64VPOPCNTBMasked512,
|
||||
|
|
@ -1229,6 +1247,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VDIVPDMasked128,
|
||||
ssa.OpAMD64VDIVPDMasked256,
|
||||
ssa.OpAMD64VDIVPDMasked512,
|
||||
ssa.OpAMD64VEXPANDPSMasked128,
|
||||
ssa.OpAMD64VEXPANDPSMasked256,
|
||||
ssa.OpAMD64VEXPANDPSMasked512,
|
||||
ssa.OpAMD64VEXPANDPDMasked128,
|
||||
ssa.OpAMD64VEXPANDPDMasked256,
|
||||
ssa.OpAMD64VEXPANDPDMasked512,
|
||||
ssa.OpAMD64VPEXPANDBMasked128,
|
||||
ssa.OpAMD64VPEXPANDBMasked256,
|
||||
ssa.OpAMD64VPEXPANDBMasked512,
|
||||
ssa.OpAMD64VPEXPANDWMasked128,
|
||||
ssa.OpAMD64VPEXPANDWMasked256,
|
||||
ssa.OpAMD64VPEXPANDWMasked512,
|
||||
ssa.OpAMD64VPEXPANDDMasked128,
|
||||
ssa.OpAMD64VPEXPANDDMasked256,
|
||||
ssa.OpAMD64VPEXPANDDMasked512,
|
||||
ssa.OpAMD64VPEXPANDQMasked128,
|
||||
ssa.OpAMD64VPEXPANDQMasked256,
|
||||
ssa.OpAMD64VPEXPANDQMasked512,
|
||||
ssa.OpAMD64VFMADD213PSMasked128,
|
||||
ssa.OpAMD64VFMADD213PSMasked256,
|
||||
ssa.OpAMD64VFMADD213PSMasked512,
|
||||
|
|
|
|||
|
|
@ -385,6 +385,36 @@
|
|||
(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
|
||||
(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
|
||||
(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
|
||||
(ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(ExpandFloat64x2 x mask) => (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(ExpandFloat64x4 x mask) => (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(ExpandFloat64x8 x mask) => (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(ExpandInt8x16 x mask) => (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(ExpandInt8x32 x mask) => (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(ExpandInt8x64 x mask) => (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(ExpandInt16x8 x mask) => (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(ExpandInt16x16 x mask) => (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(ExpandInt16x32 x mask) => (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(ExpandInt32x4 x mask) => (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(ExpandInt32x8 x mask) => (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(ExpandInt32x16 x mask) => (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(ExpandInt64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(ExpandInt64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(ExpandInt64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(ExpandUint8x16 x mask) => (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(ExpandUint8x32 x mask) => (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(ExpandUint8x64 x mask) => (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(ExpandUint16x8 x mask) => (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(ExpandUint16x16 x mask) => (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(ExpandUint16x32 x mask) => (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(ExpandUint32x4 x mask) => (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(ExpandUint32x8 x mask) => (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(ExpandUint32x16 x mask) => (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(FloorFloat32x4 x) => (VROUNDPS128 [1] x)
|
||||
(FloorFloat32x8 x) => (VROUNDPS256 [1] x)
|
||||
(FloorFloat64x2 x) => (VROUNDPD128 [1] x)
|
||||
|
|
|
|||
|
|
@ -49,6 +49,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VDIVPSMasked128", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VDIVPSMasked256", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VDIVPSMasked512", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VEXPANDPDMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VEXPANDPDMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VEXPANDPDMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VEXPANDPSMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VEXPANDPSMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VEXPANDPSMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
|
|
@ -357,6 +363,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPERMWMasked128", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPERMWMasked256", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPERMWMasked512", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPEXPANDBMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPEXPANDBMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPEXPANDBMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPEXPANDDMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPEXPANDDMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPEXPANDDMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPEXPANDQMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPEXPANDQMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPEXPANDQMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPEXPANDWMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPEXPANDWMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPEXPANDWMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPHADDD128", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPHADDD256", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPHADDSW128", argLength: 2, reg: v21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -364,6 +364,36 @@ func simdGenericOps() []opData {
|
|||
{name: "EqualUint64x2", argLength: 2, commutative: true},
|
||||
{name: "EqualUint64x4", argLength: 2, commutative: true},
|
||||
{name: "EqualUint64x8", argLength: 2, commutative: true},
|
||||
{name: "ExpandFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "ExpandFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandFloat32x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "ExpandFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "ExpandFloat64x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt8x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt8x32", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt8x64", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt16x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt16x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt16x32", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt32x4", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt32x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt32x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt64x2", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt64x4", argLength: 2, commutative: false},
|
||||
{name: "ExpandInt64x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint8x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint8x32", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint8x64", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint16x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint16x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint16x32", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint32x4", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint32x8", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint32x16", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint64x2", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint64x4", argLength: 2, commutative: false},
|
||||
{name: "ExpandUint64x8", argLength: 2, commutative: false},
|
||||
{name: "FloorFloat32x4", argLength: 1, commutative: false},
|
||||
{name: "FloorFloat32x8", argLength: 1, commutative: false},
|
||||
{name: "FloorFloat64x2", argLength: 1, commutative: false},
|
||||
|
|
|
|||
|
|
@ -1268,6 +1268,12 @@ const (
|
|||
OpAMD64VDIVPSMasked128
|
||||
OpAMD64VDIVPSMasked256
|
||||
OpAMD64VDIVPSMasked512
|
||||
OpAMD64VEXPANDPDMasked128
|
||||
OpAMD64VEXPANDPDMasked256
|
||||
OpAMD64VEXPANDPDMasked512
|
||||
OpAMD64VEXPANDPSMasked128
|
||||
OpAMD64VEXPANDPSMasked256
|
||||
OpAMD64VEXPANDPSMasked512
|
||||
OpAMD64VFMADD213PD128
|
||||
OpAMD64VFMADD213PD256
|
||||
OpAMD64VFMADD213PD512
|
||||
|
|
@ -1576,6 +1582,18 @@ const (
|
|||
OpAMD64VPERMWMasked128
|
||||
OpAMD64VPERMWMasked256
|
||||
OpAMD64VPERMWMasked512
|
||||
OpAMD64VPEXPANDBMasked128
|
||||
OpAMD64VPEXPANDBMasked256
|
||||
OpAMD64VPEXPANDBMasked512
|
||||
OpAMD64VPEXPANDDMasked128
|
||||
OpAMD64VPEXPANDDMasked256
|
||||
OpAMD64VPEXPANDDMasked512
|
||||
OpAMD64VPEXPANDQMasked128
|
||||
OpAMD64VPEXPANDQMasked256
|
||||
OpAMD64VPEXPANDQMasked512
|
||||
OpAMD64VPEXPANDWMasked128
|
||||
OpAMD64VPEXPANDWMasked256
|
||||
OpAMD64VPEXPANDWMasked512
|
||||
OpAMD64VPHADDD128
|
||||
OpAMD64VPHADDD256
|
||||
OpAMD64VPHADDSW128
|
||||
|
|
@ -4925,6 +4943,36 @@ const (
|
|||
OpEqualUint64x2
|
||||
OpEqualUint64x4
|
||||
OpEqualUint64x8
|
||||
OpExpandFloat32x4
|
||||
OpExpandFloat32x8
|
||||
OpExpandFloat32x16
|
||||
OpExpandFloat64x2
|
||||
OpExpandFloat64x4
|
||||
OpExpandFloat64x8
|
||||
OpExpandInt8x16
|
||||
OpExpandInt8x32
|
||||
OpExpandInt8x64
|
||||
OpExpandInt16x8
|
||||
OpExpandInt16x16
|
||||
OpExpandInt16x32
|
||||
OpExpandInt32x4
|
||||
OpExpandInt32x8
|
||||
OpExpandInt32x16
|
||||
OpExpandInt64x2
|
||||
OpExpandInt64x4
|
||||
OpExpandInt64x8
|
||||
OpExpandUint8x16
|
||||
OpExpandUint8x32
|
||||
OpExpandUint8x64
|
||||
OpExpandUint16x8
|
||||
OpExpandUint16x16
|
||||
OpExpandUint16x32
|
||||
OpExpandUint32x4
|
||||
OpExpandUint32x8
|
||||
OpExpandUint32x16
|
||||
OpExpandUint64x2
|
||||
OpExpandUint64x4
|
||||
OpExpandUint64x8
|
||||
OpFloorFloat32x4
|
||||
OpFloorFloat32x8
|
||||
OpFloorFloat64x2
|
||||
|
|
@ -20065,6 +20113,90 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VEXPANDPDMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVEXPANDPD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VEXPANDPDMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVEXPANDPD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VEXPANDPDMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVEXPANDPD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VEXPANDPSMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVEXPANDPS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VEXPANDPSMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVEXPANDPS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VEXPANDPSMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVEXPANDPS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PD128",
|
||||
argLen: 3,
|
||||
|
|
@ -24788,6 +24920,174 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDBMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDBMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDBMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDDMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDDMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDDMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDQMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDQMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDQMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDWMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDWMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPEXPANDWMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPEXPANDW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPHADDD128",
|
||||
argLen: 2,
|
||||
|
|
@ -64829,6 +65129,156 @@ var opcodeTable = [...]opInfo{
|
|||
commutative: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandFloat32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandFloat32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandFloat32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandFloat64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandFloat64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandFloat64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt8x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt8x64",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt16x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt16x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandInt64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint8x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint8x64",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint16x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint16x32",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint64x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "ExpandUint64x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "FloorFloat32x4",
|
||||
argLen: 1,
|
||||
|
|
|
|||
|
|
@ -1754,6 +1754,66 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return true
|
||||
case OpEqualUint8x64:
|
||||
return rewriteValueAMD64_OpEqualUint8x64(v)
|
||||
case OpExpandFloat32x16:
|
||||
return rewriteValueAMD64_OpExpandFloat32x16(v)
|
||||
case OpExpandFloat32x4:
|
||||
return rewriteValueAMD64_OpExpandFloat32x4(v)
|
||||
case OpExpandFloat32x8:
|
||||
return rewriteValueAMD64_OpExpandFloat32x8(v)
|
||||
case OpExpandFloat64x2:
|
||||
return rewriteValueAMD64_OpExpandFloat64x2(v)
|
||||
case OpExpandFloat64x4:
|
||||
return rewriteValueAMD64_OpExpandFloat64x4(v)
|
||||
case OpExpandFloat64x8:
|
||||
return rewriteValueAMD64_OpExpandFloat64x8(v)
|
||||
case OpExpandInt16x16:
|
||||
return rewriteValueAMD64_OpExpandInt16x16(v)
|
||||
case OpExpandInt16x32:
|
||||
return rewriteValueAMD64_OpExpandInt16x32(v)
|
||||
case OpExpandInt16x8:
|
||||
return rewriteValueAMD64_OpExpandInt16x8(v)
|
||||
case OpExpandInt32x16:
|
||||
return rewriteValueAMD64_OpExpandInt32x16(v)
|
||||
case OpExpandInt32x4:
|
||||
return rewriteValueAMD64_OpExpandInt32x4(v)
|
||||
case OpExpandInt32x8:
|
||||
return rewriteValueAMD64_OpExpandInt32x8(v)
|
||||
case OpExpandInt64x2:
|
||||
return rewriteValueAMD64_OpExpandInt64x2(v)
|
||||
case OpExpandInt64x4:
|
||||
return rewriteValueAMD64_OpExpandInt64x4(v)
|
||||
case OpExpandInt64x8:
|
||||
return rewriteValueAMD64_OpExpandInt64x8(v)
|
||||
case OpExpandInt8x16:
|
||||
return rewriteValueAMD64_OpExpandInt8x16(v)
|
||||
case OpExpandInt8x32:
|
||||
return rewriteValueAMD64_OpExpandInt8x32(v)
|
||||
case OpExpandInt8x64:
|
||||
return rewriteValueAMD64_OpExpandInt8x64(v)
|
||||
case OpExpandUint16x16:
|
||||
return rewriteValueAMD64_OpExpandUint16x16(v)
|
||||
case OpExpandUint16x32:
|
||||
return rewriteValueAMD64_OpExpandUint16x32(v)
|
||||
case OpExpandUint16x8:
|
||||
return rewriteValueAMD64_OpExpandUint16x8(v)
|
||||
case OpExpandUint32x16:
|
||||
return rewriteValueAMD64_OpExpandUint32x16(v)
|
||||
case OpExpandUint32x4:
|
||||
return rewriteValueAMD64_OpExpandUint32x4(v)
|
||||
case OpExpandUint32x8:
|
||||
return rewriteValueAMD64_OpExpandUint32x8(v)
|
||||
case OpExpandUint64x2:
|
||||
return rewriteValueAMD64_OpExpandUint64x2(v)
|
||||
case OpExpandUint64x4:
|
||||
return rewriteValueAMD64_OpExpandUint64x4(v)
|
||||
case OpExpandUint64x8:
|
||||
return rewriteValueAMD64_OpExpandUint64x8(v)
|
||||
case OpExpandUint8x16:
|
||||
return rewriteValueAMD64_OpExpandUint8x16(v)
|
||||
case OpExpandUint8x32:
|
||||
return rewriteValueAMD64_OpExpandUint8x32(v)
|
||||
case OpExpandUint8x64:
|
||||
return rewriteValueAMD64_OpExpandUint8x64(v)
|
||||
case OpFMA:
|
||||
return rewriteValueAMD64_OpFMA(v)
|
||||
case OpFloor:
|
||||
|
|
@ -34479,6 +34539,486 @@ func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandFloat32x16 x mask)
|
||||
// result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VEXPANDPSMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandFloat32x4 x mask)
|
||||
// result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VEXPANDPSMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandFloat32x8 x mask)
|
||||
// result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VEXPANDPSMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandFloat64x2 x mask)
|
||||
// result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VEXPANDPDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandFloat64x4 x mask)
|
||||
// result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VEXPANDPDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandFloat64x8 x mask)
|
||||
// result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VEXPANDPDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt16x16 x mask)
|
||||
// result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDWMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt16x32 x mask)
|
||||
// result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDWMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt16x8 x mask)
|
||||
// result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDWMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt32x16 x mask)
|
||||
// result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt32x4 x mask)
|
||||
// result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt32x8 x mask)
|
||||
// result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt64x2 x mask)
|
||||
// result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDQMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt64x4 x mask)
|
||||
// result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDQMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt64x8 x mask)
|
||||
// result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDQMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt8x16 x mask)
|
||||
// result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDBMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt8x32 x mask)
|
||||
// result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDBMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandInt8x64 x mask)
|
||||
// result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDBMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint16x16 x mask)
|
||||
// result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDWMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint16x32 x mask)
|
||||
// result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDWMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint16x8 x mask)
|
||||
// result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDWMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint32x16 x mask)
|
||||
// result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint32x4 x mask)
|
||||
// result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint32x8 x mask)
|
||||
// result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint64x2 x mask)
|
||||
// result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDQMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint64x4 x mask)
|
||||
// result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDQMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint64x8 x mask)
|
||||
// result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDQMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint8x16 x mask)
|
||||
// result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDBMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint8x32 x mask)
|
||||
// result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDBMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (ExpandUint8x64 x mask)
|
||||
// result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPEXPANDBMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpFMA(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
|
|
|||
|
|
@ -396,6 +396,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Expand", opLen2(ssa.OpExpandFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Expand", opLen2(ssa.OpExpandFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.Expand", opLen2(ssa.OpExpandFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Expand", opLen2(ssa.OpExpandInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.Expand", opLen2(ssa.OpExpandInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.Expand", opLen2(ssa.OpExpandInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Expand", opLen2(ssa.OpExpandInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.Expand", opLen2(ssa.OpExpandInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.Expand", opLen2(ssa.OpExpandInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Expand", opLen2(ssa.OpExpandInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.Expand", opLen2(ssa.OpExpandInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.Expand", opLen2(ssa.OpExpandInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Expand", opLen2(ssa.OpExpandInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.Expand", opLen2(ssa.OpExpandInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.Expand", opLen2(ssa.OpExpandInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Expand", opLen2(ssa.OpExpandUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.Expand", opLen2(ssa.OpExpandUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.Expand", opLen2(ssa.OpExpandUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Expand", opLen2(ssa.OpExpandUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.Expand", opLen2(ssa.OpExpandUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.Expand", opLen2(ssa.OpExpandUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Expand", opLen2(ssa.OpExpandUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.Expand", opLen2(ssa.OpExpandUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.Expand", opLen2(ssa.OpExpandUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -2399,6 +2399,188 @@ func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
|
|||
// Asm: VPCMPUQ, CPU Feature: AVX512F
|
||||
func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
|
||||
|
||||
/* Expand */
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VEXPANDPS, CPU Feature: AVX512F
|
||||
func (x Float32x4) Expand(mask Mask32x4) Float32x4
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VEXPANDPS, CPU Feature: AVX512F
|
||||
func (x Float32x8) Expand(mask Mask32x8) Float32x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VEXPANDPS, CPU Feature: AVX512F
|
||||
func (x Float32x16) Expand(mask Mask32x16) Float32x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VEXPANDPD, CPU Feature: AVX512F
|
||||
func (x Float64x2) Expand(mask Mask64x2) Float64x2
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VEXPANDPD, CPU Feature: AVX512F
|
||||
func (x Float64x4) Expand(mask Mask64x4) Float64x4
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VEXPANDPD, CPU Feature: AVX512F
|
||||
func (x Float64x8) Expand(mask Mask64x8) Float64x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
|
||||
func (x Int8x16) Expand(mask Mask8x16) Int8x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
|
||||
func (x Int8x32) Expand(mask Mask8x32) Int8x32
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
|
||||
func (x Int8x64) Expand(mask Mask8x64) Int8x64
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
|
||||
func (x Int16x8) Expand(mask Mask16x8) Int16x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
|
||||
func (x Int16x16) Expand(mask Mask16x16) Int16x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
|
||||
func (x Int16x32) Expand(mask Mask16x32) Int16x32
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDD, CPU Feature: AVX512F
|
||||
func (x Int32x4) Expand(mask Mask32x4) Int32x4
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDD, CPU Feature: AVX512F
|
||||
func (x Int32x8) Expand(mask Mask32x8) Int32x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDD, CPU Feature: AVX512F
|
||||
func (x Int32x16) Expand(mask Mask32x16) Int32x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDQ, CPU Feature: AVX512F
|
||||
func (x Int64x2) Expand(mask Mask64x2) Int64x2
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDQ, CPU Feature: AVX512F
|
||||
func (x Int64x4) Expand(mask Mask64x4) Int64x4
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDQ, CPU Feature: AVX512F
|
||||
func (x Int64x8) Expand(mask Mask64x8) Int64x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
|
||||
func (x Uint8x16) Expand(mask Mask8x16) Uint8x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
|
||||
func (x Uint8x32) Expand(mask Mask8x32) Uint8x32
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
|
||||
func (x Uint8x64) Expand(mask Mask8x64) Uint8x64
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
|
||||
func (x Uint16x8) Expand(mask Mask16x8) Uint16x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
|
||||
func (x Uint16x16) Expand(mask Mask16x16) Uint16x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
|
||||
func (x Uint16x32) Expand(mask Mask16x32) Uint16x32
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDD, CPU Feature: AVX512F
|
||||
func (x Uint32x4) Expand(mask Mask32x4) Uint32x4
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDD, CPU Feature: AVX512F
|
||||
func (x Uint32x8) Expand(mask Mask32x8) Uint32x8
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDD, CPU Feature: AVX512F
|
||||
func (x Uint32x16) Expand(mask Mask32x16) Uint32x16
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDQ, CPU Feature: AVX512F
|
||||
func (x Uint64x2) Expand(mask Mask64x2) Uint64x2
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDQ, CPU Feature: AVX512F
|
||||
func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
|
||||
|
||||
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
//
|
||||
// Asm: VPEXPANDQ, CPU Feature: AVX512F
|
||||
func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
|
||||
|
||||
/* Floor */
|
||||
|
||||
// Floor rounds elements down to the nearest integer.
|
||||
|
|
|
|||
|
|
@ -187,6 +187,22 @@ func TestCompress(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestExpand(t *testing.T) {
|
||||
if !simd.HasAVX512() {
|
||||
t.Skip("Test requires HasAVX512, not available on this hardware")
|
||||
return
|
||||
}
|
||||
v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
|
||||
v0101 := simd.LoadInt32x4Slice([]int32{0, -1, 0, -1})
|
||||
v2400 := v3400.Expand(v0101.AsMask32x4())
|
||||
got := make([]int32, 4)
|
||||
v2400.StoreSlice(got)
|
||||
want := []int32{0, 3, 0, 4}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Errorf("want and got differ, want=%v, got=%v", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPairDotProdAccumulate(t *testing.T) {
|
||||
if !simd.HasAVX512GFNI() {
|
||||
// TODO: this function is actually VNNI, let's implement and call the right check.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue