[dev.simd] cmd/compile, simd: add Expand

This CL is generated by CL 693336.

Change-Id: Ic1712d49fcad0544fa3c19b0249d8bc65b347104
Reviewed-on: https://go-review.googlesource.com/c/go/+/693375
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-08-05 19:42:12 +00:00
parent d3cf582f8a
commit 5b0ef7fcdc
9 changed files with 1332 additions and 0 deletions

View file

@ -644,6 +644,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked128,
ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked256,
ssa.OpAMD64VCVTPS2UDQMasked512, ssa.OpAMD64VCVTPS2UDQMasked512,
ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512,
ssa.OpAMD64VEXPANDPDMasked128,
ssa.OpAMD64VEXPANDPDMasked256,
ssa.OpAMD64VEXPANDPDMasked512,
ssa.OpAMD64VPEXPANDBMasked128,
ssa.OpAMD64VPEXPANDBMasked256,
ssa.OpAMD64VPEXPANDBMasked512,
ssa.OpAMD64VPEXPANDWMasked128,
ssa.OpAMD64VPEXPANDWMasked256,
ssa.OpAMD64VPEXPANDWMasked512,
ssa.OpAMD64VPEXPANDDMasked128,
ssa.OpAMD64VPEXPANDDMasked256,
ssa.OpAMD64VPEXPANDDMasked512,
ssa.OpAMD64VPEXPANDQMasked128,
ssa.OpAMD64VPEXPANDQMasked256,
ssa.OpAMD64VPEXPANDQMasked512,
ssa.OpAMD64VPOPCNTBMasked128, ssa.OpAMD64VPOPCNTBMasked128,
ssa.OpAMD64VPOPCNTBMasked256, ssa.OpAMD64VPOPCNTBMasked256,
ssa.OpAMD64VPOPCNTBMasked512, ssa.OpAMD64VPOPCNTBMasked512,
@ -1229,6 +1247,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VDIVPDMasked128, ssa.OpAMD64VDIVPDMasked128,
ssa.OpAMD64VDIVPDMasked256, ssa.OpAMD64VDIVPDMasked256,
ssa.OpAMD64VDIVPDMasked512, ssa.OpAMD64VDIVPDMasked512,
ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512,
ssa.OpAMD64VEXPANDPDMasked128,
ssa.OpAMD64VEXPANDPDMasked256,
ssa.OpAMD64VEXPANDPDMasked512,
ssa.OpAMD64VPEXPANDBMasked128,
ssa.OpAMD64VPEXPANDBMasked256,
ssa.OpAMD64VPEXPANDBMasked512,
ssa.OpAMD64VPEXPANDWMasked128,
ssa.OpAMD64VPEXPANDWMasked256,
ssa.OpAMD64VPEXPANDWMasked512,
ssa.OpAMD64VPEXPANDDMasked128,
ssa.OpAMD64VPEXPANDDMasked256,
ssa.OpAMD64VPEXPANDDMasked512,
ssa.OpAMD64VPEXPANDQMasked128,
ssa.OpAMD64VPEXPANDQMasked256,
ssa.OpAMD64VPEXPANDQMasked512,
ssa.OpAMD64VFMADD213PSMasked128, ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VFMADD213PSMasked256, ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VFMADD213PSMasked512, ssa.OpAMD64VFMADD213PSMasked512,

View file

@ -385,6 +385,36 @@
(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask))) (EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask))) (EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask))) (EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
(ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(ExpandFloat64x2 x mask) => (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ExpandFloat64x4 x mask) => (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ExpandFloat64x8 x mask) => (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(ExpandInt8x16 x mask) => (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(ExpandInt8x32 x mask) => (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(ExpandInt8x64 x mask) => (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(ExpandInt16x8 x mask) => (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(ExpandInt16x16 x mask) => (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(ExpandInt16x32 x mask) => (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(ExpandInt32x4 x mask) => (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ExpandInt32x8 x mask) => (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ExpandInt32x16 x mask) => (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(ExpandInt64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ExpandInt64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ExpandInt64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(ExpandUint8x16 x mask) => (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(ExpandUint8x32 x mask) => (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(ExpandUint8x64 x mask) => (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(ExpandUint16x8 x mask) => (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(ExpandUint16x16 x mask) => (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(ExpandUint16x32 x mask) => (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(ExpandUint32x4 x mask) => (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ExpandUint32x8 x mask) => (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ExpandUint32x16 x mask) => (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(FloorFloat32x4 x) => (VROUNDPS128 [1] x) (FloorFloat32x4 x) => (VROUNDPS128 [1] x)
(FloorFloat32x8 x) => (VROUNDPS256 [1] x) (FloorFloat32x8 x) => (VROUNDPS256 [1] x)
(FloorFloat64x2 x) => (VROUNDPD128 [1] x) (FloorFloat64x2 x) => (VROUNDPD128 [1] x)

View file

@ -49,6 +49,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VDIVPSMasked128", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPSMasked128", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VDIVPSMasked256", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPSMasked256", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VDIVPSMasked512", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPSMasked512", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VEXPANDPDMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VEXPANDPDMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VEXPANDPDMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VEXPANDPSMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VEXPANDPSMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VEXPANDPSMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
@ -357,6 +363,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPERMWMasked128", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPERMWMasked128", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPERMWMasked256", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPERMWMasked256", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMWMasked512", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPERMWMasked512", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPEXPANDBMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPEXPANDBMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPEXPANDBMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPEXPANDDMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPEXPANDDMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPEXPANDDMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPEXPANDQMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPEXPANDQMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPEXPANDQMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPEXPANDWMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPEXPANDWMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPEXPANDWMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPHADDD128", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHADDD128", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHADDD256", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHADDD256", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHADDSW128", argLength: 2, reg: v21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHADDSW128", argLength: 2, reg: v21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false},

View file

@ -364,6 +364,36 @@ func simdGenericOps() []opData {
{name: "EqualUint64x2", argLength: 2, commutative: true}, {name: "EqualUint64x2", argLength: 2, commutative: true},
{name: "EqualUint64x4", argLength: 2, commutative: true}, {name: "EqualUint64x4", argLength: 2, commutative: true},
{name: "EqualUint64x8", argLength: 2, commutative: true}, {name: "EqualUint64x8", argLength: 2, commutative: true},
{name: "ExpandFloat32x4", argLength: 2, commutative: false},
{name: "ExpandFloat32x8", argLength: 2, commutative: false},
{name: "ExpandFloat32x16", argLength: 2, commutative: false},
{name: "ExpandFloat64x2", argLength: 2, commutative: false},
{name: "ExpandFloat64x4", argLength: 2, commutative: false},
{name: "ExpandFloat64x8", argLength: 2, commutative: false},
{name: "ExpandInt8x16", argLength: 2, commutative: false},
{name: "ExpandInt8x32", argLength: 2, commutative: false},
{name: "ExpandInt8x64", argLength: 2, commutative: false},
{name: "ExpandInt16x8", argLength: 2, commutative: false},
{name: "ExpandInt16x16", argLength: 2, commutative: false},
{name: "ExpandInt16x32", argLength: 2, commutative: false},
{name: "ExpandInt32x4", argLength: 2, commutative: false},
{name: "ExpandInt32x8", argLength: 2, commutative: false},
{name: "ExpandInt32x16", argLength: 2, commutative: false},
{name: "ExpandInt64x2", argLength: 2, commutative: false},
{name: "ExpandInt64x4", argLength: 2, commutative: false},
{name: "ExpandInt64x8", argLength: 2, commutative: false},
{name: "ExpandUint8x16", argLength: 2, commutative: false},
{name: "ExpandUint8x32", argLength: 2, commutative: false},
{name: "ExpandUint8x64", argLength: 2, commutative: false},
{name: "ExpandUint16x8", argLength: 2, commutative: false},
{name: "ExpandUint16x16", argLength: 2, commutative: false},
{name: "ExpandUint16x32", argLength: 2, commutative: false},
{name: "ExpandUint32x4", argLength: 2, commutative: false},
{name: "ExpandUint32x8", argLength: 2, commutative: false},
{name: "ExpandUint32x16", argLength: 2, commutative: false},
{name: "ExpandUint64x2", argLength: 2, commutative: false},
{name: "ExpandUint64x4", argLength: 2, commutative: false},
{name: "ExpandUint64x8", argLength: 2, commutative: false},
{name: "FloorFloat32x4", argLength: 1, commutative: false}, {name: "FloorFloat32x4", argLength: 1, commutative: false},
{name: "FloorFloat32x8", argLength: 1, commutative: false}, {name: "FloorFloat32x8", argLength: 1, commutative: false},
{name: "FloorFloat64x2", argLength: 1, commutative: false}, {name: "FloorFloat64x2", argLength: 1, commutative: false},

View file

@ -1268,6 +1268,12 @@ const (
OpAMD64VDIVPSMasked128 OpAMD64VDIVPSMasked128
OpAMD64VDIVPSMasked256 OpAMD64VDIVPSMasked256
OpAMD64VDIVPSMasked512 OpAMD64VDIVPSMasked512
OpAMD64VEXPANDPDMasked128
OpAMD64VEXPANDPDMasked256
OpAMD64VEXPANDPDMasked512
OpAMD64VEXPANDPSMasked128
OpAMD64VEXPANDPSMasked256
OpAMD64VEXPANDPSMasked512
OpAMD64VFMADD213PD128 OpAMD64VFMADD213PD128
OpAMD64VFMADD213PD256 OpAMD64VFMADD213PD256
OpAMD64VFMADD213PD512 OpAMD64VFMADD213PD512
@ -1576,6 +1582,18 @@ const (
OpAMD64VPERMWMasked128 OpAMD64VPERMWMasked128
OpAMD64VPERMWMasked256 OpAMD64VPERMWMasked256
OpAMD64VPERMWMasked512 OpAMD64VPERMWMasked512
OpAMD64VPEXPANDBMasked128
OpAMD64VPEXPANDBMasked256
OpAMD64VPEXPANDBMasked512
OpAMD64VPEXPANDDMasked128
OpAMD64VPEXPANDDMasked256
OpAMD64VPEXPANDDMasked512
OpAMD64VPEXPANDQMasked128
OpAMD64VPEXPANDQMasked256
OpAMD64VPEXPANDQMasked512
OpAMD64VPEXPANDWMasked128
OpAMD64VPEXPANDWMasked256
OpAMD64VPEXPANDWMasked512
OpAMD64VPHADDD128 OpAMD64VPHADDD128
OpAMD64VPHADDD256 OpAMD64VPHADDD256
OpAMD64VPHADDSW128 OpAMD64VPHADDSW128
@ -4925,6 +4943,36 @@ const (
OpEqualUint64x2 OpEqualUint64x2
OpEqualUint64x4 OpEqualUint64x4
OpEqualUint64x8 OpEqualUint64x8
OpExpandFloat32x4
OpExpandFloat32x8
OpExpandFloat32x16
OpExpandFloat64x2
OpExpandFloat64x4
OpExpandFloat64x8
OpExpandInt8x16
OpExpandInt8x32
OpExpandInt8x64
OpExpandInt16x8
OpExpandInt16x16
OpExpandInt16x32
OpExpandInt32x4
OpExpandInt32x8
OpExpandInt32x16
OpExpandInt64x2
OpExpandInt64x4
OpExpandInt64x8
OpExpandUint8x16
OpExpandUint8x32
OpExpandUint8x64
OpExpandUint16x8
OpExpandUint16x16
OpExpandUint16x32
OpExpandUint32x4
OpExpandUint32x8
OpExpandUint32x16
OpExpandUint64x2
OpExpandUint64x4
OpExpandUint64x8
OpFloorFloat32x4 OpFloorFloat32x4
OpFloorFloat32x8 OpFloorFloat32x8
OpFloorFloat64x2 OpFloorFloat64x2
@ -20065,6 +20113,90 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VEXPANDPDMasked128",
argLen: 2,
asm: x86.AVEXPANDPD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VEXPANDPDMasked256",
argLen: 2,
asm: x86.AVEXPANDPD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VEXPANDPDMasked512",
argLen: 2,
asm: x86.AVEXPANDPD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VEXPANDPSMasked128",
argLen: 2,
asm: x86.AVEXPANDPS,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VEXPANDPSMasked256",
argLen: 2,
asm: x86.AVEXPANDPS,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VEXPANDPSMasked512",
argLen: 2,
asm: x86.AVEXPANDPS,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VFMADD213PD128", name: "VFMADD213PD128",
argLen: 3, argLen: 3,
@ -24788,6 +24920,174 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPEXPANDBMasked128",
argLen: 2,
asm: x86.AVPEXPANDB,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDBMasked256",
argLen: 2,
asm: x86.AVPEXPANDB,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDBMasked512",
argLen: 2,
asm: x86.AVPEXPANDB,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDDMasked128",
argLen: 2,
asm: x86.AVPEXPANDD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDDMasked256",
argLen: 2,
asm: x86.AVPEXPANDD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDDMasked512",
argLen: 2,
asm: x86.AVPEXPANDD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDQMasked128",
argLen: 2,
asm: x86.AVPEXPANDQ,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDQMasked256",
argLen: 2,
asm: x86.AVPEXPANDQ,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDQMasked512",
argLen: 2,
asm: x86.AVPEXPANDQ,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDWMasked128",
argLen: 2,
asm: x86.AVPEXPANDW,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDWMasked256",
argLen: 2,
asm: x86.AVPEXPANDW,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPEXPANDWMasked512",
argLen: 2,
asm: x86.AVPEXPANDW,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPHADDD128", name: "VPHADDD128",
argLen: 2, argLen: 2,
@ -64829,6 +65129,156 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "ExpandFloat32x4",
argLen: 2,
generic: true,
},
{
name: "ExpandFloat32x8",
argLen: 2,
generic: true,
},
{
name: "ExpandFloat32x16",
argLen: 2,
generic: true,
},
{
name: "ExpandFloat64x2",
argLen: 2,
generic: true,
},
{
name: "ExpandFloat64x4",
argLen: 2,
generic: true,
},
{
name: "ExpandFloat64x8",
argLen: 2,
generic: true,
},
{
name: "ExpandInt8x16",
argLen: 2,
generic: true,
},
{
name: "ExpandInt8x32",
argLen: 2,
generic: true,
},
{
name: "ExpandInt8x64",
argLen: 2,
generic: true,
},
{
name: "ExpandInt16x8",
argLen: 2,
generic: true,
},
{
name: "ExpandInt16x16",
argLen: 2,
generic: true,
},
{
name: "ExpandInt16x32",
argLen: 2,
generic: true,
},
{
name: "ExpandInt32x4",
argLen: 2,
generic: true,
},
{
name: "ExpandInt32x8",
argLen: 2,
generic: true,
},
{
name: "ExpandInt32x16",
argLen: 2,
generic: true,
},
{
name: "ExpandInt64x2",
argLen: 2,
generic: true,
},
{
name: "ExpandInt64x4",
argLen: 2,
generic: true,
},
{
name: "ExpandInt64x8",
argLen: 2,
generic: true,
},
{
name: "ExpandUint8x16",
argLen: 2,
generic: true,
},
{
name: "ExpandUint8x32",
argLen: 2,
generic: true,
},
{
name: "ExpandUint8x64",
argLen: 2,
generic: true,
},
{
name: "ExpandUint16x8",
argLen: 2,
generic: true,
},
{
name: "ExpandUint16x16",
argLen: 2,
generic: true,
},
{
name: "ExpandUint16x32",
argLen: 2,
generic: true,
},
{
name: "ExpandUint32x4",
argLen: 2,
generic: true,
},
{
name: "ExpandUint32x8",
argLen: 2,
generic: true,
},
{
name: "ExpandUint32x16",
argLen: 2,
generic: true,
},
{
name: "ExpandUint64x2",
argLen: 2,
generic: true,
},
{
name: "ExpandUint64x4",
argLen: 2,
generic: true,
},
{
name: "ExpandUint64x8",
argLen: 2,
generic: true,
},
{ {
name: "FloorFloat32x4", name: "FloorFloat32x4",
argLen: 1, argLen: 1,

View file

@ -1754,6 +1754,66 @@ func rewriteValueAMD64(v *Value) bool {
return true return true
case OpEqualUint8x64: case OpEqualUint8x64:
return rewriteValueAMD64_OpEqualUint8x64(v) return rewriteValueAMD64_OpEqualUint8x64(v)
case OpExpandFloat32x16:
return rewriteValueAMD64_OpExpandFloat32x16(v)
case OpExpandFloat32x4:
return rewriteValueAMD64_OpExpandFloat32x4(v)
case OpExpandFloat32x8:
return rewriteValueAMD64_OpExpandFloat32x8(v)
case OpExpandFloat64x2:
return rewriteValueAMD64_OpExpandFloat64x2(v)
case OpExpandFloat64x4:
return rewriteValueAMD64_OpExpandFloat64x4(v)
case OpExpandFloat64x8:
return rewriteValueAMD64_OpExpandFloat64x8(v)
case OpExpandInt16x16:
return rewriteValueAMD64_OpExpandInt16x16(v)
case OpExpandInt16x32:
return rewriteValueAMD64_OpExpandInt16x32(v)
case OpExpandInt16x8:
return rewriteValueAMD64_OpExpandInt16x8(v)
case OpExpandInt32x16:
return rewriteValueAMD64_OpExpandInt32x16(v)
case OpExpandInt32x4:
return rewriteValueAMD64_OpExpandInt32x4(v)
case OpExpandInt32x8:
return rewriteValueAMD64_OpExpandInt32x8(v)
case OpExpandInt64x2:
return rewriteValueAMD64_OpExpandInt64x2(v)
case OpExpandInt64x4:
return rewriteValueAMD64_OpExpandInt64x4(v)
case OpExpandInt64x8:
return rewriteValueAMD64_OpExpandInt64x8(v)
case OpExpandInt8x16:
return rewriteValueAMD64_OpExpandInt8x16(v)
case OpExpandInt8x32:
return rewriteValueAMD64_OpExpandInt8x32(v)
case OpExpandInt8x64:
return rewriteValueAMD64_OpExpandInt8x64(v)
case OpExpandUint16x16:
return rewriteValueAMD64_OpExpandUint16x16(v)
case OpExpandUint16x32:
return rewriteValueAMD64_OpExpandUint16x32(v)
case OpExpandUint16x8:
return rewriteValueAMD64_OpExpandUint16x8(v)
case OpExpandUint32x16:
return rewriteValueAMD64_OpExpandUint32x16(v)
case OpExpandUint32x4:
return rewriteValueAMD64_OpExpandUint32x4(v)
case OpExpandUint32x8:
return rewriteValueAMD64_OpExpandUint32x8(v)
case OpExpandUint64x2:
return rewriteValueAMD64_OpExpandUint64x2(v)
case OpExpandUint64x4:
return rewriteValueAMD64_OpExpandUint64x4(v)
case OpExpandUint64x8:
return rewriteValueAMD64_OpExpandUint64x8(v)
case OpExpandUint8x16:
return rewriteValueAMD64_OpExpandUint8x16(v)
case OpExpandUint8x32:
return rewriteValueAMD64_OpExpandUint8x32(v)
case OpExpandUint8x64:
return rewriteValueAMD64_OpExpandUint8x64(v)
case OpFMA: case OpFMA:
return rewriteValueAMD64_OpFMA(v) return rewriteValueAMD64_OpFMA(v)
case OpFloor: case OpFloor:
@ -34479,6 +34539,486 @@ func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandFloat32x16 x mask)
// result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VEXPANDPSMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandFloat32x4 x mask)
// result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VEXPANDPSMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandFloat32x8 x mask)
// result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VEXPANDPSMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandFloat64x2 x mask)
// result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VEXPANDPDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandFloat64x4 x mask)
// result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VEXPANDPDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandFloat64x8 x mask)
// result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VEXPANDPDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt16x16 x mask)
// result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt16x32 x mask)
// result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt16x8 x mask)
// result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt32x16 x mask)
// result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt32x4 x mask)
// result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt32x8 x mask)
// result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt64x2 x mask)
// result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDQMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt64x4 x mask)
// result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDQMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt64x8 x mask)
// result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDQMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt8x16 x mask)
// result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDBMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt8x32 x mask)
// result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDBMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandInt8x64 x mask)
// result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDBMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint16x16 x mask)
// result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint16x32 x mask)
// result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint16x8 x mask)
// result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint32x16 x mask)
// result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint32x4 x mask)
// result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint32x8 x mask)
// result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint64x2 x mask)
// result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDQMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint64x4 x mask)
// result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDQMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint64x8 x mask)
// result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDQMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint8x16 x mask)
// result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDBMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint8x32 x mask)
// result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDBMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (ExpandUint8x64 x mask)
// result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPEXPANDBMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpFMA(v *Value) bool { func rewriteValueAMD64_OpFMA(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]

View file

@ -396,6 +396,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Expand", opLen2(ssa.OpExpandFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Expand", opLen2(ssa.OpExpandFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Expand", opLen2(ssa.OpExpandFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Expand", opLen2(ssa.OpExpandInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Expand", opLen2(ssa.OpExpandInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Expand", opLen2(ssa.OpExpandInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Expand", opLen2(ssa.OpExpandInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.Expand", opLen2(ssa.OpExpandInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.Expand", opLen2(ssa.OpExpandInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Expand", opLen2(ssa.OpExpandInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Expand", opLen2(ssa.OpExpandInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.Expand", opLen2(ssa.OpExpandInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Expand", opLen2(ssa.OpExpandInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Expand", opLen2(ssa.OpExpandInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.Expand", opLen2(ssa.OpExpandInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Expand", opLen2(ssa.OpExpandUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Expand", opLen2(ssa.OpExpandUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Expand", opLen2(ssa.OpExpandUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Expand", opLen2(ssa.OpExpandUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Expand", opLen2(ssa.OpExpandUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Expand", opLen2(ssa.OpExpandUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Expand", opLen2(ssa.OpExpandUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Expand", opLen2(ssa.OpExpandUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.Expand", opLen2(ssa.OpExpandUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)

View file

@ -2399,6 +2399,188 @@ func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
// Asm: VPCMPUQ, CPU Feature: AVX512F // Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8 func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* Expand */
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512F
func (x Float32x4) Expand(mask Mask32x4) Float32x4
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512F
func (x Float32x8) Expand(mask Mask32x8) Float32x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPS, CPU Feature: AVX512F
func (x Float32x16) Expand(mask Mask32x16) Float32x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512F
func (x Float64x2) Expand(mask Mask64x2) Float64x2
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512F
func (x Float64x4) Expand(mask Mask64x4) Float64x4
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VEXPANDPD, CPU Feature: AVX512F
func (x Float64x8) Expand(mask Mask64x8) Float64x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Int8x16) Expand(mask Mask8x16) Int8x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Int8x32) Expand(mask Mask8x32) Int8x32
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Int8x64) Expand(mask Mask8x64) Int8x64
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Int16x8) Expand(mask Mask16x8) Int16x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Int16x16) Expand(mask Mask16x16) Int16x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Int16x32) Expand(mask Mask16x32) Int16x32
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512F
func (x Int32x4) Expand(mask Mask32x4) Int32x4
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512F
func (x Int32x8) Expand(mask Mask32x8) Int32x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512F
func (x Int32x16) Expand(mask Mask32x16) Int32x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512F
func (x Int64x2) Expand(mask Mask64x2) Int64x2
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512F
func (x Int64x4) Expand(mask Mask64x4) Int64x4
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512F
func (x Int64x8) Expand(mask Mask64x8) Int64x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Uint8x16) Expand(mask Mask8x16) Uint8x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Uint8x32) Expand(mask Mask8x32) Uint8x32
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
func (x Uint8x64) Expand(mask Mask8x64) Uint8x64
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) Expand(mask Mask16x8) Uint16x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) Expand(mask Mask16x16) Uint16x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) Expand(mask Mask16x32) Uint16x32
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512F
func (x Uint32x4) Expand(mask Mask32x4) Uint32x4
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512F
func (x Uint32x8) Expand(mask Mask32x8) Uint32x8
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDD, CPU Feature: AVX512F
func (x Uint32x16) Expand(mask Mask32x16) Uint32x16
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512F
func (x Uint64x2) Expand(mask Mask64x2) Uint64x2
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512F
func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
// Expand performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
//
// Asm: VPEXPANDQ, CPU Feature: AVX512F
func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
/* Floor */ /* Floor */
// Floor rounds elements down to the nearest integer. // Floor rounds elements down to the nearest integer.

View file

@ -187,6 +187,22 @@ func TestCompress(t *testing.T) {
} }
} }
func TestExpand(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
v0101 := simd.LoadInt32x4Slice([]int32{0, -1, 0, -1})
v2400 := v3400.Expand(v0101.AsMask32x4())
got := make([]int32, 4)
v2400.StoreSlice(got)
want := []int32{0, 3, 0, 4}
if !slices.Equal(got, want) {
t.Errorf("want and got differ, want=%v, got=%v", want, got)
}
}
func TestPairDotProdAccumulate(t *testing.T) { func TestPairDotProdAccumulate(t *testing.T) {
if !simd.HasAVX512GFNI() { if !simd.HasAVX512GFNI() {
// TODO: this function is actually VNNI, let's implement and call the right check. // TODO: this function is actually VNNI, let's implement and call the right check.