mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: support store to bits for mask
This CL is partially generated by CL 689775. Change-Id: I0c36fd2a44706c88db1a1d5ea4a6d0b9f891d85f Reviewed-on: https://go-review.googlesource.com/c/go/+/689795 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
41054cdb1c
commit
6f7a1164e7
15 changed files with 1192 additions and 523 deletions
|
|
@ -24,8 +24,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPABSQ128,
|
ssa.OpAMD64VPABSQ128,
|
||||||
ssa.OpAMD64VPABSQ256,
|
ssa.OpAMD64VPABSQ256,
|
||||||
ssa.OpAMD64VPABSQ512,
|
ssa.OpAMD64VPABSQ512,
|
||||||
ssa.OpAMD64VRCP14PS128,
|
ssa.OpAMD64VRCPPS128,
|
||||||
ssa.OpAMD64VRCP14PS256,
|
ssa.OpAMD64VRCPPS256,
|
||||||
ssa.OpAMD64VRCP14PS512,
|
ssa.OpAMD64VRCP14PS512,
|
||||||
ssa.OpAMD64VRCP14PD128,
|
ssa.OpAMD64VRCP14PD128,
|
||||||
ssa.OpAMD64VRCP14PD256,
|
ssa.OpAMD64VRCP14PD256,
|
||||||
|
|
@ -335,6 +335,16 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPXORQ512:
|
ssa.OpAMD64VPXORQ512:
|
||||||
p = simdV21(s, v)
|
p = simdV21(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VPCMPEQB512,
|
||||||
|
ssa.OpAMD64VPCMPEQW512,
|
||||||
|
ssa.OpAMD64VPCMPEQD512,
|
||||||
|
ssa.OpAMD64VPCMPEQQ512,
|
||||||
|
ssa.OpAMD64VPCMPGTB512,
|
||||||
|
ssa.OpAMD64VPCMPGTW512,
|
||||||
|
ssa.OpAMD64VPCMPGTD512,
|
||||||
|
ssa.OpAMD64VPCMPGTQ512:
|
||||||
|
p = simdV2k(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VADDPSMasked128,
|
case ssa.OpAMD64VADDPSMasked128,
|
||||||
ssa.OpAMD64VADDPSMasked256,
|
ssa.OpAMD64VADDPSMasked256,
|
||||||
ssa.OpAMD64VADDPSMasked512,
|
ssa.OpAMD64VADDPSMasked512,
|
||||||
|
|
@ -733,30 +743,30 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
|
|
||||||
case ssa.OpAMD64VCMPPS512,
|
case ssa.OpAMD64VCMPPS512,
|
||||||
ssa.OpAMD64VCMPPD512,
|
ssa.OpAMD64VCMPPD512,
|
||||||
ssa.OpAMD64VPCMPB512,
|
|
||||||
ssa.OpAMD64VPCMPW512,
|
|
||||||
ssa.OpAMD64VPCMPD512,
|
|
||||||
ssa.OpAMD64VPCMPQ512,
|
|
||||||
ssa.OpAMD64VPCMPUB512,
|
|
||||||
ssa.OpAMD64VPCMPUW512,
|
|
||||||
ssa.OpAMD64VPCMPUD512,
|
|
||||||
ssa.OpAMD64VPCMPUQ512,
|
|
||||||
ssa.OpAMD64VPCMPUB128,
|
ssa.OpAMD64VPCMPUB128,
|
||||||
ssa.OpAMD64VPCMPUB256,
|
ssa.OpAMD64VPCMPUB256,
|
||||||
|
ssa.OpAMD64VPCMPUB512,
|
||||||
ssa.OpAMD64VPCMPUW128,
|
ssa.OpAMD64VPCMPUW128,
|
||||||
ssa.OpAMD64VPCMPUW256,
|
ssa.OpAMD64VPCMPUW256,
|
||||||
|
ssa.OpAMD64VPCMPUW512,
|
||||||
ssa.OpAMD64VPCMPUD128,
|
ssa.OpAMD64VPCMPUD128,
|
||||||
ssa.OpAMD64VPCMPUD256,
|
ssa.OpAMD64VPCMPUD256,
|
||||||
|
ssa.OpAMD64VPCMPUD512,
|
||||||
ssa.OpAMD64VPCMPUQ128,
|
ssa.OpAMD64VPCMPUQ128,
|
||||||
ssa.OpAMD64VPCMPUQ256,
|
ssa.OpAMD64VPCMPUQ256,
|
||||||
|
ssa.OpAMD64VPCMPUQ512,
|
||||||
ssa.OpAMD64VPCMPB128,
|
ssa.OpAMD64VPCMPB128,
|
||||||
ssa.OpAMD64VPCMPB256,
|
ssa.OpAMD64VPCMPB256,
|
||||||
|
ssa.OpAMD64VPCMPB512,
|
||||||
ssa.OpAMD64VPCMPW128,
|
ssa.OpAMD64VPCMPW128,
|
||||||
ssa.OpAMD64VPCMPW256,
|
ssa.OpAMD64VPCMPW256,
|
||||||
|
ssa.OpAMD64VPCMPW512,
|
||||||
ssa.OpAMD64VPCMPD128,
|
ssa.OpAMD64VPCMPD128,
|
||||||
ssa.OpAMD64VPCMPD256,
|
ssa.OpAMD64VPCMPD256,
|
||||||
|
ssa.OpAMD64VPCMPD512,
|
||||||
ssa.OpAMD64VPCMPQ128,
|
ssa.OpAMD64VPCMPQ128,
|
||||||
ssa.OpAMD64VPCMPQ256:
|
ssa.OpAMD64VPCMPQ256,
|
||||||
|
ssa.OpAMD64VPCMPQ512:
|
||||||
p = simdV2kImm8(s, v)
|
p = simdV2kImm8(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VCMPPSMasked128,
|
case ssa.OpAMD64VCMPPSMasked128,
|
||||||
|
|
|
||||||
|
|
@ -1468,10 +1468,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
ssagen.AddAux(&p.From, v)
|
ssagen.AddAux(&p.From, v)
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = simdOrMaskReg(v)
|
p.To.Reg = simdOrMaskReg(v)
|
||||||
case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512:
|
case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512, ssa.OpAMD64KMOVQstore:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
p.From.Reg = simdReg(v.Args[1])
|
p.From.Reg = simdOrMaskReg(v.Args[1])
|
||||||
p.To.Type = obj.TYPE_MEM
|
p.To.Type = obj.TYPE_MEM
|
||||||
p.To.Reg = v.Args[0].Reg()
|
p.To.Reg = v.Args[0].Reg()
|
||||||
ssagen.AddAux(&p.To, v)
|
ssagen.AddAux(&p.To, v)
|
||||||
|
|
|
||||||
|
|
@ -1698,6 +1698,22 @@
|
||||||
(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
|
||||||
|
(StoreMask8x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
|
||||||
|
(StoreMask8x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
|
||||||
|
(StoreMask8x64 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
|
||||||
|
|
||||||
|
(StoreMask16x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
|
||||||
|
(StoreMask16x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
|
||||||
|
(StoreMask16x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
|
||||||
|
|
||||||
|
(StoreMask32x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
|
||||||
|
(StoreMask32x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
|
||||||
|
(StoreMask32x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
|
||||||
|
|
||||||
|
(StoreMask64x2 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
|
||||||
|
(StoreMask64x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
|
||||||
|
(StoreMask64x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
|
||||||
|
|
||||||
(Load <t> ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem)
|
(Load <t> ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem)
|
||||||
|
|
||||||
(Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem)
|
(Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem)
|
||||||
|
|
|
||||||
|
|
@ -234,7 +234,8 @@ func init() {
|
||||||
wfpw = regInfo{inputs: []regMask{w, fp}, outputs: wonly}
|
wfpw = regInfo{inputs: []regMask{w, fp}, outputs: wonly}
|
||||||
wfpkw = regInfo{inputs: []regMask{w, fp, mask}, outputs: wonly}
|
wfpkw = regInfo{inputs: []regMask{w, fp, mask}, outputs: wonly}
|
||||||
|
|
||||||
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
||||||
|
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
|
||||||
|
|
||||||
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
||||||
)
|
)
|
||||||
|
|
@ -1318,6 +1319,7 @@ func init() {
|
||||||
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
||||||
|
|
||||||
{name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
|
{name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
|
||||||
|
{name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"},
|
||||||
}
|
}
|
||||||
|
|
||||||
var AMD64blocks = []blockData{
|
var AMD64blocks = []blockData{
|
||||||
|
|
|
||||||
|
|
@ -678,6 +678,19 @@ var genericOps = []opData{
|
||||||
{name: "LoadMask64x2", argLength: 2}, // arg0 = ptr, arg1 = mem
|
{name: "LoadMask64x2", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
{name: "LoadMask64x4", argLength: 2}, // arg0 = ptr, arg1 = mem
|
{name: "LoadMask64x4", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
{name: "LoadMask64x8", argLength: 2}, // arg0 = ptr, arg1 = mem
|
{name: "LoadMask64x8", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
|
||||||
|
{name: "StoreMask8x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask8x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask8x64", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask16x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask16x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask16x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask32x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask32x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask32x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask64x2", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask64x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
|
{name: "StoreMask64x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
|
||||||
}
|
}
|
||||||
|
|
||||||
// kind controls successors implicit exit
|
// kind controls successors implicit exit
|
||||||
|
|
|
||||||
|
|
@ -152,8 +152,8 @@
|
||||||
(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||||
(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||||
(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||||
(ApproximateReciprocalFloat32x4 ...) => (VRCP14PS128 ...)
|
(ApproximateReciprocalFloat32x4 ...) => (VRCPPS128 ...)
|
||||||
(ApproximateReciprocalFloat32x8 ...) => (VRCP14PS256 ...)
|
(ApproximateReciprocalFloat32x8 ...) => (VRCPPS256 ...)
|
||||||
(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
|
(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
|
||||||
(ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
|
(ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
|
||||||
(ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
|
(ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
|
||||||
|
|
@ -305,28 +305,28 @@
|
||||||
(EqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
|
(EqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
|
||||||
(EqualInt8x16 ...) => (VPCMPEQB128 ...)
|
(EqualInt8x16 ...) => (VPCMPEQB128 ...)
|
||||||
(EqualInt8x32 ...) => (VPCMPEQB256 ...)
|
(EqualInt8x32 ...) => (VPCMPEQB256 ...)
|
||||||
(EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [0] x y))
|
(EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPEQB512 x y))
|
||||||
(EqualInt16x8 ...) => (VPCMPEQW128 ...)
|
(EqualInt16x8 ...) => (VPCMPEQW128 ...)
|
||||||
(EqualInt16x16 ...) => (VPCMPEQW256 ...)
|
(EqualInt16x16 ...) => (VPCMPEQW256 ...)
|
||||||
(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [0] x y))
|
(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPEQW512 x y))
|
||||||
(EqualInt32x4 ...) => (VPCMPEQD128 ...)
|
(EqualInt32x4 ...) => (VPCMPEQD128 ...)
|
||||||
(EqualInt32x8 ...) => (VPCMPEQD256 ...)
|
(EqualInt32x8 ...) => (VPCMPEQD256 ...)
|
||||||
(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [0] x y))
|
(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPEQD512 x y))
|
||||||
(EqualInt64x2 ...) => (VPCMPEQQ128 ...)
|
(EqualInt64x2 ...) => (VPCMPEQQ128 ...)
|
||||||
(EqualInt64x4 ...) => (VPCMPEQQ256 ...)
|
(EqualInt64x4 ...) => (VPCMPEQQ256 ...)
|
||||||
(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y))
|
(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
|
||||||
(EqualUint8x16 ...) => (VPCMPEQB128 ...)
|
(EqualUint8x16 ...) => (VPCMPEQB128 ...)
|
||||||
(EqualUint8x32 ...) => (VPCMPEQB256 ...)
|
(EqualUint8x32 ...) => (VPCMPEQB256 ...)
|
||||||
(EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
|
(EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPEQB512 x y))
|
||||||
(EqualUint16x8 ...) => (VPCMPEQW128 ...)
|
(EqualUint16x8 ...) => (VPCMPEQW128 ...)
|
||||||
(EqualUint16x16 ...) => (VPCMPEQW256 ...)
|
(EqualUint16x16 ...) => (VPCMPEQW256 ...)
|
||||||
(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y))
|
(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPEQW512 x y))
|
||||||
(EqualUint32x4 ...) => (VPCMPEQD128 ...)
|
(EqualUint32x4 ...) => (VPCMPEQD128 ...)
|
||||||
(EqualUint32x8 ...) => (VPCMPEQD256 ...)
|
(EqualUint32x8 ...) => (VPCMPEQD256 ...)
|
||||||
(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y))
|
(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPEQD512 x y))
|
||||||
(EqualUint64x2 ...) => (VPCMPEQQ128 ...)
|
(EqualUint64x2 ...) => (VPCMPEQQ128 ...)
|
||||||
(EqualUint64x4 ...) => (VPCMPEQQ256 ...)
|
(EqualUint64x4 ...) => (VPCMPEQQ256 ...)
|
||||||
(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y))
|
(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
|
||||||
(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
|
(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
|
||||||
(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
|
(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
|
||||||
(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
|
(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
|
||||||
|
|
@ -453,16 +453,16 @@
|
||||||
(GreaterFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
|
(GreaterFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
|
||||||
(GreaterInt8x16 ...) => (VPCMPGTB128 ...)
|
(GreaterInt8x16 ...) => (VPCMPGTB128 ...)
|
||||||
(GreaterInt8x32 ...) => (VPCMPGTB256 ...)
|
(GreaterInt8x32 ...) => (VPCMPGTB256 ...)
|
||||||
(GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [14] x y))
|
(GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPGTB512 x y))
|
||||||
(GreaterInt16x8 ...) => (VPCMPGTW128 ...)
|
(GreaterInt16x8 ...) => (VPCMPGTW128 ...)
|
||||||
(GreaterInt16x16 ...) => (VPCMPGTW256 ...)
|
(GreaterInt16x16 ...) => (VPCMPGTW256 ...)
|
||||||
(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [14] x y))
|
(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPGTW512 x y))
|
||||||
(GreaterInt32x4 ...) => (VPCMPGTD128 ...)
|
(GreaterInt32x4 ...) => (VPCMPGTD128 ...)
|
||||||
(GreaterInt32x8 ...) => (VPCMPGTD256 ...)
|
(GreaterInt32x8 ...) => (VPCMPGTD256 ...)
|
||||||
(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [14] x y))
|
(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPGTD512 x y))
|
||||||
(GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
|
(GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
|
||||||
(GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
|
(GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
|
||||||
(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [14] x y))
|
(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
|
||||||
(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
|
(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
|
||||||
(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
|
(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
|
||||||
(GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
|
(GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VADDPS128", argLength: 2, reg: v21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VADDPS128", argLength: 2, reg: v21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VADDPSMasked128", argLength: 3, reg: w2kw, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VADDPSMasked128", argLength: 3, reg: w2kw, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VRCP14PS128", argLength: 1, reg: w11, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VRCPPS128", argLength: 1, reg: v11, asm: "VRCPPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VRCP14PSMasked128", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VRCP14PSMasked128", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VRSQRTPS128", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VRSQRTPS128", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VRSQRT14PSMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VRSQRT14PSMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
@ -63,7 +63,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VADDPS256", argLength: 2, reg: v21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VADDPS256", argLength: 2, reg: v21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VADDPSMasked256", argLength: 3, reg: w2kw, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VADDPSMasked256", argLength: 3, reg: w2kw, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VRCP14PS256", argLength: 1, reg: w11, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VRCPPS256", argLength: 1, reg: v11, asm: "VRCPPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VRCP14PSMasked256", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VRCP14PSMasked256", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VRSQRTPS256", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VRSQRTPS256", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VRSQRT14PSMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VRSQRT14PSMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -224,6 +224,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPADDWMasked512", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPADDWMasked512", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPCMPEQW512", argLength: 2, reg: w2k, asm: "VPCMPEQW", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPGTW512", argLength: 2, reg: w2k, asm: "VPCMPGTW", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPMAXSW512", argLength: 2, reg: w21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSW512", argLength: 2, reg: w21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMAXSWMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSWMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINSW512", argLength: 2, reg: w21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINSW512", argLength: 2, reg: w21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -305,6 +307,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPANDND512", argLength: 2, reg: w21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPANDND512", argLength: 2, reg: w21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPANDNDMasked512", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPANDNDMasked512", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPCOMPRESSDMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPCOMPRESSDMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPCMPEQD512", argLength: 2, reg: w2k, asm: "VPCMPEQD", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPGTD512", argLength: 2, reg: w2k, asm: "VPCMPGTD", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPMAXSD512", argLength: 2, reg: w21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSD512", argLength: 2, reg: w21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMAXSDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINSD512", argLength: 2, reg: w21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINSD512", argLength: 2, reg: w21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -526,6 +530,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPANDNQ512", argLength: 2, reg: w21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPANDNQ512", argLength: 2, reg: w21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPANDNQMasked512", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPANDNQMasked512", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPCOMPRESSQMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPCOMPRESSQMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPCMPEQQ512", argLength: 2, reg: w2k, asm: "VPCMPEQQ", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPGTQ512", argLength: 2, reg: w2k, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPMAXSQ512", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSQ512", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMAXSQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINSQ512", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINSQ512", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -611,6 +617,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPADDBMasked512", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPADDBMasked512", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPCOMPRESSBMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPCOMPRESSBMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPCMPEQB512", argLength: 2, reg: w2k, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPGTB512", argLength: 2, reg: w2k, asm: "VPCMPGTB", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPMAXSB512", argLength: 2, reg: w21, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSB512", argLength: 2, reg: w21, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMAXSBMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXSBMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINSB512", argLength: 2, reg: w21, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINSB512", argLength: 2, reg: w21, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -692,10 +700,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPERMI2D128", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
|
|
||||||
{name: "VPERMI2PS128", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
{name: "VPERMI2PS128", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
{name: "VPERMI2PSMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
{name: "VPERMI2D128", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
{name: "VPERMI2DMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
|
{name: "VPERMI2DMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
|
{name: "VPERMI2PSMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
@ -705,12 +713,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
|
||||||
{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMI2PS256", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMI2PS256", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
|
{name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPERMPSMasked256", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMPSMasked256", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMDMasked256", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMDMasked256", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -735,10 +743,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
|
||||||
{name: "VPERMPD256", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMPD256", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
|
{name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPERMI2PDMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMI2PDMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPERMI2QMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPERMI2QMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPERMQMasked256", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMQMasked256", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -759,8 +767,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPERMI2PD512", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
{name: "VPERMI2PD512", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
{name: "VPERMI2QMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true},
|
{name: "VPERMI2QMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
{name: "VPERMI2PDMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
{name: "VPERMI2PDMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
{name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
|
||||||
{name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -858,8 +866,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSHLDWMasked256", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHLDWMasked256", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSHRDW256", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHRDW256", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSHRDWMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHRDWMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPSHLDW512", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSHLDW512", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPSHLDWMasked512", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSHLDWMasked512", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPSHRDW512", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSHRDW512", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -872,8 +880,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSHLDWMasked128", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSHLDWMasked128", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSHRDW128", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSHRDW128", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSHRDWMasked128", argLength: 3, reg: w2kw, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSHRDWMasked128", argLength: 3, reg: w2kw, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPROLD512", argLength: 1, reg: w11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPROLD512", argLength: 1, reg: w11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPROLDMasked512", argLength: 2, reg: wkw, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPROLDMasked512", argLength: 2, reg: wkw, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPRORD512", argLength: 1, reg: w11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPRORD512", argLength: 1, reg: w11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -926,8 +934,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSHLDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHLDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSHRDQ256", argLength: 2, reg: w21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHRDQ256", argLength: 2, reg: w21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSHRDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHRDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPROLQ512", argLength: 1, reg: w11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPROLQ512", argLength: 1, reg: w11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPROLQMasked512", argLength: 2, reg: wkw, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPROLQMasked512", argLength: 2, reg: wkw, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPRORQ512", argLength: 1, reg: w11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPRORQ512", argLength: 1, reg: w11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -944,16 +952,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
|
@ -962,8 +970,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VGF2P8AFFINEQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
@ -976,11 +984,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
|
||||||
{name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VGF2P8AFFINEQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEINVQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VGF2P8AFFINEINVQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEINVQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VGF2P8AFFINEINVQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -912,10 +912,10 @@ func simdGenericOps() []opData {
|
||||||
{name: "PermuteUint16x16", argLength: 2, commutative: false},
|
{name: "PermuteUint16x16", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Uint16x16", argLength: 3, commutative: false},
|
{name: "Permute2Uint16x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Int16x16", argLength: 3, commutative: false},
|
{name: "Permute2Int16x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
|
{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint16x16", argLength: 1, commutative: false},
|
{name: "PopCountUint16x16", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
||||||
|
|
@ -966,8 +966,8 @@ func simdGenericOps() []opData {
|
||||||
{name: "Permute2Int16x32", argLength: 3, commutative: false},
|
{name: "Permute2Int16x32", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint16x32", argLength: 1, commutative: false},
|
{name: "PopCountUint16x32", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint16x32", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint16x32", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
|
||||||
|
|
@ -1018,12 +1018,12 @@ func simdGenericOps() []opData {
|
||||||
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
|
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt16x8", argLength: 2, commutative: false},
|
{name: "PermuteInt16x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint16x8", argLength: 2, commutative: false},
|
{name: "PermuteUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Int16x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint16x8", argLength: 3, commutative: false},
|
{name: "Permute2Uint16x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Int16x8", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint16x8", argLength: 1, commutative: false},
|
{name: "PopCountUint16x8", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
||||||
|
|
@ -1070,17 +1070,17 @@ func simdGenericOps() []opData {
|
||||||
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
|
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
|
||||||
{name: "OrUint32x16", argLength: 2, commutative: true},
|
{name: "OrUint32x16", argLength: 2, commutative: true},
|
||||||
{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
|
{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
|
||||||
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteInt32x16", argLength: 2, commutative: false},
|
{name: "PermuteInt32x16", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint32x16", argLength: 2, commutative: false},
|
{name: "PermuteUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Uint32x16", argLength: 3, commutative: false},
|
{name: "Permute2Uint32x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Float32x16", argLength: 3, commutative: false},
|
{name: "Permute2Float32x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Int32x16", argLength: 3, commutative: false},
|
{name: "Permute2Int32x16", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
|
|
||||||
{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint32x16", argLength: 1, commutative: false},
|
{name: "PopCountUint32x16", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
|
||||||
|
|
@ -1307,15 +1307,15 @@ func simdGenericOps() []opData {
|
||||||
{name: "PermuteUint64x4", argLength: 2, commutative: false},
|
{name: "PermuteUint64x4", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt64x4", argLength: 2, commutative: false},
|
{name: "PermuteInt64x4", argLength: 2, commutative: false},
|
||||||
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Float64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint64x4", argLength: 3, commutative: false},
|
{name: "Permute2Uint64x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
|
{name: "Permute2Int64x4", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Float64x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
|
||||||
|
{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
|
{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "PopCountUint64x4", argLength: 1, commutative: false},
|
{name: "PopCountUint64x4", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
|
{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
|
||||||
|
|
@ -1365,18 +1365,18 @@ func simdGenericOps() []opData {
|
||||||
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
|
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
|
||||||
{name: "OrUint64x8", argLength: 2, commutative: true},
|
{name: "OrUint64x8", argLength: 2, commutative: true},
|
||||||
{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
|
{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
|
||||||
|
{name: "PermuteUint64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt64x8", argLength: 2, commutative: false},
|
{name: "PermuteInt64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint64x8", argLength: 2, commutative: false},
|
|
||||||
{name: "Permute2Int64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float64x8", argLength: 3, commutative: false},
|
{name: "Permute2Float64x8", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
|
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Int64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
|
|
||||||
{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint64x8", argLength: 1, commutative: false},
|
{name: "PopCountUint64x8", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
|
{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -985,10 +985,10 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
v.Op = OpAMD64VRCP14PS512
|
v.Op = OpAMD64VRCP14PS512
|
||||||
return true
|
return true
|
||||||
case OpApproximateReciprocalFloat32x4:
|
case OpApproximateReciprocalFloat32x4:
|
||||||
v.Op = OpAMD64VRCP14PS128
|
v.Op = OpAMD64VRCPPS128
|
||||||
return true
|
return true
|
||||||
case OpApproximateReciprocalFloat32x8:
|
case OpApproximateReciprocalFloat32x8:
|
||||||
v.Op = OpAMD64VRCP14PS256
|
v.Op = OpAMD64VRCPPS256
|
||||||
return true
|
return true
|
||||||
case OpApproximateReciprocalFloat64x2:
|
case OpApproximateReciprocalFloat64x2:
|
||||||
v.Op = OpAMD64VRCP14PD128
|
v.Op = OpAMD64VRCP14PD128
|
||||||
|
|
@ -5184,6 +5184,30 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
case OpStore:
|
case OpStore:
|
||||||
return rewriteValueAMD64_OpStore(v)
|
return rewriteValueAMD64_OpStore(v)
|
||||||
|
case OpStoreMask16x16:
|
||||||
|
return rewriteValueAMD64_OpStoreMask16x16(v)
|
||||||
|
case OpStoreMask16x32:
|
||||||
|
return rewriteValueAMD64_OpStoreMask16x32(v)
|
||||||
|
case OpStoreMask16x8:
|
||||||
|
return rewriteValueAMD64_OpStoreMask16x8(v)
|
||||||
|
case OpStoreMask32x16:
|
||||||
|
return rewriteValueAMD64_OpStoreMask32x16(v)
|
||||||
|
case OpStoreMask32x4:
|
||||||
|
return rewriteValueAMD64_OpStoreMask32x4(v)
|
||||||
|
case OpStoreMask32x8:
|
||||||
|
return rewriteValueAMD64_OpStoreMask32x8(v)
|
||||||
|
case OpStoreMask64x2:
|
||||||
|
return rewriteValueAMD64_OpStoreMask64x2(v)
|
||||||
|
case OpStoreMask64x4:
|
||||||
|
return rewriteValueAMD64_OpStoreMask64x4(v)
|
||||||
|
case OpStoreMask64x8:
|
||||||
|
return rewriteValueAMD64_OpStoreMask64x8(v)
|
||||||
|
case OpStoreMask8x16:
|
||||||
|
return rewriteValueAMD64_OpStoreMask8x16(v)
|
||||||
|
case OpStoreMask8x32:
|
||||||
|
return rewriteValueAMD64_OpStoreMask8x32(v)
|
||||||
|
case OpStoreMask8x64:
|
||||||
|
return rewriteValueAMD64_OpStoreMask8x64(v)
|
||||||
case OpSub16:
|
case OpSub16:
|
||||||
v.Op = OpAMD64SUBL
|
v.Op = OpAMD64SUBL
|
||||||
return true
|
return true
|
||||||
|
|
@ -33388,13 +33412,12 @@ func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualInt16x32 x y)
|
// match: (EqualInt16x32 x y)
|
||||||
// result: (VPMOVMToVec16x32 (VPCMPW512 [0] x y))
|
// result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec16x32)
|
v.reset(OpAMD64VPMOVMToVec16x32)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -33406,13 +33429,12 @@ func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualInt32x16 x y)
|
// match: (EqualInt32x16 x y)
|
||||||
// result: (VPMOVMToVec32x16 (VPCMPD512 [0] x y))
|
// result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec32x16)
|
v.reset(OpAMD64VPMOVMToVec32x16)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -33424,13 +33446,12 @@ func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualInt64x8 x y)
|
// match: (EqualInt64x8 x y)
|
||||||
// result: (VPMOVMToVec64x8 (VPCMPQ512 [0] x y))
|
// result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec64x8)
|
v.reset(OpAMD64VPMOVMToVec64x8)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -33442,13 +33463,12 @@ func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualInt8x64 x y)
|
// match: (EqualInt8x64 x y)
|
||||||
// result: (VPMOVMToVec8x64 (VPCMPB512 [0] x y))
|
// result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec8x64)
|
v.reset(OpAMD64VPMOVMToVec8x64)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -34120,13 +34140,12 @@ func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualUint16x32 x y)
|
// match: (EqualUint16x32 x y)
|
||||||
// result: (VPMOVMToVec16x32 (VPCMPUW512 [0] x y))
|
// result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec16x32)
|
v.reset(OpAMD64VPMOVMToVec16x32)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -34138,13 +34157,12 @@ func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualUint32x16 x y)
|
// match: (EqualUint32x16 x y)
|
||||||
// result: (VPMOVMToVec32x16 (VPCMPUD512 [0] x y))
|
// result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec32x16)
|
v.reset(OpAMD64VPMOVMToVec32x16)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -34156,13 +34174,12 @@ func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualUint64x8 x y)
|
// match: (EqualUint64x8 x y)
|
||||||
// result: (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y))
|
// result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec64x8)
|
v.reset(OpAMD64VPMOVMToVec64x8)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -34174,13 +34191,12 @@ func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (EqualUint8x64 x y)
|
// match: (EqualUint8x64 x y)
|
||||||
// result: (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
|
// result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec8x64)
|
v.reset(OpAMD64VPMOVMToVec8x64)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(0)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -36279,13 +36295,12 @@ func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (GreaterInt16x32 x y)
|
// match: (GreaterInt16x32 x y)
|
||||||
// result: (VPMOVMToVec16x32 (VPCMPW512 [14] x y))
|
// result: (VPMOVMToVec16x32 (VPCMPGTW512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec16x32)
|
v.reset(OpAMD64VPMOVMToVec16x32)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(14)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -36297,13 +36312,12 @@ func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (GreaterInt32x16 x y)
|
// match: (GreaterInt32x16 x y)
|
||||||
// result: (VPMOVMToVec32x16 (VPCMPD512 [14] x y))
|
// result: (VPMOVMToVec32x16 (VPCMPGTD512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec32x16)
|
v.reset(OpAMD64VPMOVMToVec32x16)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(14)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -36315,13 +36329,12 @@ func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (GreaterInt64x8 x y)
|
// match: (GreaterInt64x8 x y)
|
||||||
// result: (VPMOVMToVec64x8 (VPCMPQ512 [14] x y))
|
// result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec64x8)
|
v.reset(OpAMD64VPMOVMToVec64x8)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(14)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -36333,13 +36346,12 @@ func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
// match: (GreaterInt8x64 x y)
|
// match: (GreaterInt8x64 x y)
|
||||||
// result: (VPMOVMToVec8x64 (VPCMPB512 [14] x y))
|
// result: (VPMOVMToVec8x64 (VPCMPGTB512 x y))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
y := v_1
|
y := v_1
|
||||||
v.reset(OpAMD64VPMOVMToVec8x64)
|
v.reset(OpAMD64VPMOVMToVec8x64)
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
|
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask)
|
||||||
v0.AuxInt = int8ToAuxInt(14)
|
|
||||||
v0.AddArg2(x, y)
|
v0.AddArg2(x, y)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
return true
|
return true
|
||||||
|
|
@ -53277,6 +53289,234 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask16x16 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask16x32 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask16x8 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask32x16 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask32x4 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask32x8 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask64x2 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask64x4 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask64x8 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask8x16 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask8x32 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMask8x64 {t} ptr val mem)
|
||||||
|
// result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
val := v_1
|
||||||
|
mem := v_2
|
||||||
|
v.reset(OpAMD64KMOVQstore)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
|
||||||
|
v0.AddArg(val)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
|
func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
|
|
|
||||||
|
|
@ -1791,6 +1791,23 @@ func simdLoadMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ss
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func simdStoreMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
opCodes := map[int]map[int]ssa.Op{
|
||||||
|
8: {16: ssa.OpStoreMask8x16, 32: ssa.OpStoreMask8x32, 64: ssa.OpStoreMask8x64},
|
||||||
|
16: {8: ssa.OpStoreMask16x8, 16: ssa.OpStoreMask16x16, 32: ssa.OpStoreMask16x32},
|
||||||
|
32: {4: ssa.OpStoreMask32x4, 8: ssa.OpStoreMask32x8, 16: ssa.OpStoreMask32x16},
|
||||||
|
64: {2: ssa.OpStoreMask64x2, 4: ssa.OpStoreMask64x4, 8: ssa.OpStoreMask64x8},
|
||||||
|
}
|
||||||
|
op := opCodes[elemBits][lanes]
|
||||||
|
if op == 0 {
|
||||||
|
panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
|
||||||
|
}
|
||||||
|
s.vars[memVar] = s.newValue3A(op, types.TypeMem, types.TypeMask, args[1], args[0], s.mem())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// findIntrinsic returns a function which builds the SSA equivalent of the
|
// findIntrinsic returns a function which builds the SSA equivalent of the
|
||||||
// function identified by the symbol sym. If sym is not an intrinsic call, returns nil.
|
// function identified by the symbol sym. If sym is not an intrinsic call, returns nil.
|
||||||
func findIntrinsic(sym *types.Sym) intrinsicBuilder {
|
func findIntrinsic(sym *types.Sym) intrinsicBuilder {
|
||||||
|
|
|
||||||
|
|
@ -310,34 +310,34 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.Equal", opLen2(ssa.OpEqualInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.Equal", opLen2(ssa.OpEqualInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.Equal", opLen2(ssa.OpEqualInt16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.Equal", opLen2(ssa.OpEqualInt16x16, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.Equal", opLen2(ssa.OpEqualInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int32x4.Equal", opLen2(ssa.OpEqualInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
|
@ -458,22 +458,22 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64)
|
addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.Greater", opLen2(ssa.OpGreaterFloat32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float32x16.Greater", opLen2(ssa.OpGreaterFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
|
|
@ -2137,59 +2137,71 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
|
addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask8x16.StoreToBits", simdStoreMask(8, 16), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.AsMask8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int8x32.AsMask8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
|
addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask8x32.StoreToBits", simdStoreMask(8, 32), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.AsMask8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int8x64.AsMask8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
|
addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask8x64.StoreToBits", simdStoreMask(8, 64), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.AsMask16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int16x8.AsMask16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
|
addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask16x8.StoreToBits", simdStoreMask(16, 8), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
|
addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask16x16.StoreToBits", simdStoreMask(16, 16), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int16x32.AsMask16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int16x32.AsMask16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
|
addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask16x32.StoreToBits", simdStoreMask(16, 32), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.AsMask32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int32x4.AsMask32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
|
addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask32x4.StoreToBits", simdStoreMask(32, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.AsMask32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int32x8.AsMask32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
|
addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask32x8.StoreToBits", simdStoreMask(32, 8), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.AsMask32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int32x16.AsMask32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
|
addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask32x16.StoreToBits", simdStoreMask(32, 16), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int64x2.AsMask64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int64x2.AsMask64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
|
addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask64x2.StoreToBits", simdStoreMask(64, 2), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int64x4.AsMask64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int64x4.AsMask64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
|
addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask64x4.StoreToBits", simdStoreMask(64, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int64x8.AsMask64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int64x8.AsMask64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
|
addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
|
||||||
|
addF(simdPackage, "Mask64x8.StoreToBits", simdStoreMask(64, 8), sys.AMD64)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -918,12 +918,12 @@ func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
|
||||||
|
|
||||||
// ApproximateReciprocal computes an approximate reciprocal of each element.
|
// ApproximateReciprocal computes an approximate reciprocal of each element.
|
||||||
//
|
//
|
||||||
// Asm: VRCP14PS, CPU Feature: AVX512F
|
// Asm: VRCPPS, CPU Feature: AVX
|
||||||
func (x Float32x4) ApproximateReciprocal() Float32x4
|
func (x Float32x4) ApproximateReciprocal() Float32x4
|
||||||
|
|
||||||
// ApproximateReciprocal computes an approximate reciprocal of each element.
|
// ApproximateReciprocal computes an approximate reciprocal of each element.
|
||||||
//
|
//
|
||||||
// Asm: VRCP14PS, CPU Feature: AVX512F
|
// Asm: VRCPPS, CPU Feature: AVX
|
||||||
func (x Float32x8) ApproximateReciprocal() Float32x8
|
func (x Float32x8) ApproximateReciprocal() Float32x8
|
||||||
|
|
||||||
// ApproximateReciprocal computes an approximate reciprocal of each element.
|
// ApproximateReciprocal computes an approximate reciprocal of each element.
|
||||||
|
|
@ -1951,6 +1951,11 @@ func (x Int8x16) Equal(y Int8x16) Mask8x16
|
||||||
// Asm: VPCMPEQB, CPU Feature: AVX2
|
// Asm: VPCMPEQB, CPU Feature: AVX2
|
||||||
func (x Int8x32) Equal(y Int8x32) Mask8x32
|
func (x Int8x32) Equal(y Int8x32) Mask8x32
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQB, CPU Feature: AVX512BW
|
||||||
|
func (x Int8x64) Equal(y Int8x64) Mask8x64
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQW, CPU Feature: AVX
|
// Asm: VPCMPEQW, CPU Feature: AVX
|
||||||
|
|
@ -1961,6 +1966,11 @@ func (x Int16x8) Equal(y Int16x8) Mask16x8
|
||||||
// Asm: VPCMPEQW, CPU Feature: AVX2
|
// Asm: VPCMPEQW, CPU Feature: AVX2
|
||||||
func (x Int16x16) Equal(y Int16x16) Mask16x16
|
func (x Int16x16) Equal(y Int16x16) Mask16x16
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQW, CPU Feature: AVX512BW
|
||||||
|
func (x Int16x32) Equal(y Int16x32) Mask16x32
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQD, CPU Feature: AVX
|
// Asm: VPCMPEQD, CPU Feature: AVX
|
||||||
|
|
@ -1971,6 +1981,11 @@ func (x Int32x4) Equal(y Int32x4) Mask32x4
|
||||||
// Asm: VPCMPEQD, CPU Feature: AVX2
|
// Asm: VPCMPEQD, CPU Feature: AVX2
|
||||||
func (x Int32x8) Equal(y Int32x8) Mask32x8
|
func (x Int32x8) Equal(y Int32x8) Mask32x8
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQD, CPU Feature: AVX512F
|
||||||
|
func (x Int32x16) Equal(y Int32x16) Mask32x16
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQQ, CPU Feature: AVX
|
// Asm: VPCMPEQQ, CPU Feature: AVX
|
||||||
|
|
@ -1981,6 +1996,11 @@ func (x Int64x2) Equal(y Int64x2) Mask64x2
|
||||||
// Asm: VPCMPEQQ, CPU Feature: AVX2
|
// Asm: VPCMPEQQ, CPU Feature: AVX2
|
||||||
func (x Int64x4) Equal(y Int64x4) Mask64x4
|
func (x Int64x4) Equal(y Int64x4) Mask64x4
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQQ, CPU Feature: AVX512F
|
||||||
|
func (x Int64x8) Equal(y Int64x8) Mask64x8
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQB, CPU Feature: AVX
|
// Asm: VPCMPEQB, CPU Feature: AVX
|
||||||
|
|
@ -1991,6 +2011,11 @@ func (x Uint8x16) Equal(y Uint8x16) Mask8x16
|
||||||
// Asm: VPCMPEQB, CPU Feature: AVX2
|
// Asm: VPCMPEQB, CPU Feature: AVX2
|
||||||
func (x Uint8x32) Equal(y Uint8x32) Mask8x32
|
func (x Uint8x32) Equal(y Uint8x32) Mask8x32
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQB, CPU Feature: AVX512BW
|
||||||
|
func (x Uint8x64) Equal(y Uint8x64) Mask8x64
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQW, CPU Feature: AVX
|
// Asm: VPCMPEQW, CPU Feature: AVX
|
||||||
|
|
@ -2001,6 +2026,11 @@ func (x Uint16x8) Equal(y Uint16x8) Mask16x8
|
||||||
// Asm: VPCMPEQW, CPU Feature: AVX2
|
// Asm: VPCMPEQW, CPU Feature: AVX2
|
||||||
func (x Uint16x16) Equal(y Uint16x16) Mask16x16
|
func (x Uint16x16) Equal(y Uint16x16) Mask16x16
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQW, CPU Feature: AVX512BW
|
||||||
|
func (x Uint16x32) Equal(y Uint16x32) Mask16x32
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQD, CPU Feature: AVX
|
// Asm: VPCMPEQD, CPU Feature: AVX
|
||||||
|
|
@ -2011,6 +2041,11 @@ func (x Uint32x4) Equal(y Uint32x4) Mask32x4
|
||||||
// Asm: VPCMPEQD, CPU Feature: AVX2
|
// Asm: VPCMPEQD, CPU Feature: AVX2
|
||||||
func (x Uint32x8) Equal(y Uint32x8) Mask32x8
|
func (x Uint32x8) Equal(y Uint32x8) Mask32x8
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQD, CPU Feature: AVX512F
|
||||||
|
func (x Uint32x16) Equal(y Uint32x16) Mask32x16
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPEQQ, CPU Feature: AVX
|
// Asm: VPCMPEQQ, CPU Feature: AVX
|
||||||
|
|
@ -2021,6 +2056,11 @@ func (x Uint64x2) Equal(y Uint64x2) Mask64x2
|
||||||
// Asm: VPCMPEQQ, CPU Feature: AVX2
|
// Asm: VPCMPEQQ, CPU Feature: AVX2
|
||||||
func (x Uint64x4) Equal(y Uint64x4) Mask64x4
|
func (x Uint64x4) Equal(y Uint64x4) Mask64x4
|
||||||
|
|
||||||
|
// Equal compares for equality.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPEQQ, CPU Feature: AVX512F
|
||||||
|
func (x Uint64x8) Equal(y Uint64x8) Mask64x8
|
||||||
|
|
||||||
// Equal compares for equality.
|
// Equal compares for equality.
|
||||||
//
|
//
|
||||||
// Asm: VCMPPS, CPU Feature: AVX
|
// Asm: VCMPPS, CPU Feature: AVX
|
||||||
|
|
@ -2051,46 +2091,6 @@ func (x Float64x4) Equal(y Float64x4) Mask64x4
|
||||||
// Asm: VCMPPD, CPU Feature: AVX512F
|
// Asm: VCMPPD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) Equal(y Float64x8) Mask64x8
|
func (x Float64x8) Equal(y Float64x8) Mask64x8
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPB, CPU Feature: AVX512BW
|
|
||||||
func (x Int8x64) Equal(y Int8x64) Mask8x64
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPW, CPU Feature: AVX512BW
|
|
||||||
func (x Int16x32) Equal(y Int16x32) Mask16x32
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPD, CPU Feature: AVX512F
|
|
||||||
func (x Int32x16) Equal(y Int32x16) Mask32x16
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPQ, CPU Feature: AVX512F
|
|
||||||
func (x Int64x8) Equal(y Int64x8) Mask64x8
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPUB, CPU Feature: AVX512BW
|
|
||||||
func (x Uint8x64) Equal(y Uint8x64) Mask8x64
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPUW, CPU Feature: AVX512BW
|
|
||||||
func (x Uint16x32) Equal(y Uint16x32) Mask16x32
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPUD, CPU Feature: AVX512F
|
|
||||||
func (x Uint32x16) Equal(y Uint32x16) Mask32x16
|
|
||||||
|
|
||||||
// Equal compares for equality.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPUQ, CPU Feature: AVX512F
|
|
||||||
func (x Uint64x8) Equal(y Uint64x8) Mask64x8
|
|
||||||
|
|
||||||
/* EqualMasked */
|
/* EqualMasked */
|
||||||
|
|
||||||
// EqualMasked compares for equality.
|
// EqualMasked compares for equality.
|
||||||
|
|
@ -2733,7 +2733,7 @@ func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x6
|
||||||
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
|
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
|
||||||
func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, m Mask8x16) Uint8x16
|
func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
|
||||||
|
|
||||||
// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
|
// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
|
||||||
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
|
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
|
||||||
|
|
@ -2746,7 +2746,7 @@ func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, m
|
||||||
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
|
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
|
||||||
func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, m Mask8x32) Uint8x32
|
func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
|
||||||
|
|
||||||
// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
|
// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
|
||||||
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
|
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
|
||||||
|
|
@ -2759,7 +2759,7 @@ func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, m
|
||||||
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
|
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
|
||||||
func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, m Mask8x64) Uint8x64
|
func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
|
||||||
|
|
||||||
/* GaloisFieldAffineTransformMasked */
|
/* GaloisFieldAffineTransformMasked */
|
||||||
|
|
||||||
|
|
@ -2773,7 +2773,7 @@ func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, m
|
||||||
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
|
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
|
||||||
func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, m Mask8x16) Uint8x16
|
func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
|
||||||
|
|
||||||
// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
|
// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
|
||||||
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
|
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
|
||||||
|
|
@ -2785,7 +2785,7 @@ func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, m Mask8x
|
||||||
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
|
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
|
||||||
func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, m Mask8x32) Uint8x32
|
func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
|
||||||
|
|
||||||
// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
|
// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
|
||||||
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
|
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
|
||||||
|
|
@ -2797,7 +2797,7 @@ func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, m Mask8x
|
||||||
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
// b is expected to be a constant, non-constant value will trigger a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
|
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
|
||||||
func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, m Mask8x64) Uint8x64
|
func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
|
||||||
|
|
||||||
/* GaloisFieldMul */
|
/* GaloisFieldMul */
|
||||||
|
|
||||||
|
|
@ -2987,6 +2987,11 @@ func (x Int8x16) Greater(y Int8x16) Mask8x16
|
||||||
// Asm: VPCMPGTB, CPU Feature: AVX2
|
// Asm: VPCMPGTB, CPU Feature: AVX2
|
||||||
func (x Int8x32) Greater(y Int8x32) Mask8x32
|
func (x Int8x32) Greater(y Int8x32) Mask8x32
|
||||||
|
|
||||||
|
// Greater compares for greater than.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPGTB, CPU Feature: AVX512BW
|
||||||
|
func (x Int8x64) Greater(y Int8x64) Mask8x64
|
||||||
|
|
||||||
// Greater compares for greater than.
|
// Greater compares for greater than.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPGTW, CPU Feature: AVX
|
// Asm: VPCMPGTW, CPU Feature: AVX
|
||||||
|
|
@ -2997,6 +3002,11 @@ func (x Int16x8) Greater(y Int16x8) Mask16x8
|
||||||
// Asm: VPCMPGTW, CPU Feature: AVX2
|
// Asm: VPCMPGTW, CPU Feature: AVX2
|
||||||
func (x Int16x16) Greater(y Int16x16) Mask16x16
|
func (x Int16x16) Greater(y Int16x16) Mask16x16
|
||||||
|
|
||||||
|
// Greater compares for greater than.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPGTW, CPU Feature: AVX512BW
|
||||||
|
func (x Int16x32) Greater(y Int16x32) Mask16x32
|
||||||
|
|
||||||
// Greater compares for greater than.
|
// Greater compares for greater than.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPGTD, CPU Feature: AVX
|
// Asm: VPCMPGTD, CPU Feature: AVX
|
||||||
|
|
@ -3007,6 +3017,11 @@ func (x Int32x4) Greater(y Int32x4) Mask32x4
|
||||||
// Asm: VPCMPGTD, CPU Feature: AVX2
|
// Asm: VPCMPGTD, CPU Feature: AVX2
|
||||||
func (x Int32x8) Greater(y Int32x8) Mask32x8
|
func (x Int32x8) Greater(y Int32x8) Mask32x8
|
||||||
|
|
||||||
|
// Greater compares for greater than.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPGTD, CPU Feature: AVX512F
|
||||||
|
func (x Int32x16) Greater(y Int32x16) Mask32x16
|
||||||
|
|
||||||
// Greater compares for greater than.
|
// Greater compares for greater than.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPGTQ, CPU Feature: AVX
|
// Asm: VPCMPGTQ, CPU Feature: AVX
|
||||||
|
|
@ -3017,6 +3032,11 @@ func (x Int64x2) Greater(y Int64x2) Mask64x2
|
||||||
// Asm: VPCMPGTQ, CPU Feature: AVX2
|
// Asm: VPCMPGTQ, CPU Feature: AVX2
|
||||||
func (x Int64x4) Greater(y Int64x4) Mask64x4
|
func (x Int64x4) Greater(y Int64x4) Mask64x4
|
||||||
|
|
||||||
|
// Greater compares for greater than.
|
||||||
|
//
|
||||||
|
// Asm: VPCMPGTQ, CPU Feature: AVX512F
|
||||||
|
func (x Int64x8) Greater(y Int64x8) Mask64x8
|
||||||
|
|
||||||
// Greater compares for greater than.
|
// Greater compares for greater than.
|
||||||
//
|
//
|
||||||
// Asm: VCMPPS, CPU Feature: AVX
|
// Asm: VCMPPS, CPU Feature: AVX
|
||||||
|
|
@ -3047,26 +3067,6 @@ func (x Float64x4) Greater(y Float64x4) Mask64x4
|
||||||
// Asm: VCMPPD, CPU Feature: AVX512F
|
// Asm: VCMPPD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) Greater(y Float64x8) Mask64x8
|
func (x Float64x8) Greater(y Float64x8) Mask64x8
|
||||||
|
|
||||||
// Greater compares for greater than.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPB, CPU Feature: AVX512BW
|
|
||||||
func (x Int8x64) Greater(y Int8x64) Mask8x64
|
|
||||||
|
|
||||||
// Greater compares for greater than.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPW, CPU Feature: AVX512BW
|
|
||||||
func (x Int16x32) Greater(y Int16x32) Mask16x32
|
|
||||||
|
|
||||||
// Greater compares for greater than.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPD, CPU Feature: AVX512F
|
|
||||||
func (x Int32x16) Greater(y Int32x16) Mask32x16
|
|
||||||
|
|
||||||
// Greater compares for greater than.
|
|
||||||
//
|
|
||||||
// Asm: VPCMPQ, CPU Feature: AVX512F
|
|
||||||
func (x Int64x8) Greater(y Int64x8) Mask64x8
|
|
||||||
|
|
||||||
// Greater compares for greater than.
|
// Greater compares for greater than.
|
||||||
//
|
//
|
||||||
// Asm: VPCMPUB, CPU Feature: AVX512BW
|
// Asm: VPCMPUB, CPU Feature: AVX512BW
|
||||||
|
|
@ -6475,84 +6475,84 @@ func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8
|
||||||
|
|
||||||
/* Permute */
|
/* Permute */
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Int8x16) Permute(indices Uint8x16) Int8x16
|
func (x Int8x16) Permute(indices Uint8x16) Int8x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
|
func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Int8x32) Permute(indices Uint8x32) Int8x32
|
func (x Int8x32) Permute(indices Uint8x32) Int8x32
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
|
func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Int8x64) Permute(indices Uint8x64) Int8x64
|
func (x Int8x64) Permute(indices Uint8x64) Int8x64
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
|
func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Int16x8) Permute(indices Uint16x8) Int16x8
|
func (x Int16x8) Permute(indices Uint16x8) Int16x8
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
|
func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Int16x16) Permute(indices Uint16x16) Int16x16
|
func (x Int16x16) Permute(indices Uint16x16) Int16x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
|
func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Int16x32) Permute(indices Uint16x32) Int16x32
|
func (x Int16x32) Permute(indices Uint16x32) Int16x32
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -6580,63 +6580,63 @@ func (x Int32x8) Permute(indices Uint32x8) Int32x8
|
||||||
// Asm: VPERMD, CPU Feature: AVX2
|
// Asm: VPERMD, CPU Feature: AVX2
|
||||||
func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
|
func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMPS, CPU Feature: AVX512F
|
// Asm: VPERMPS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) Permute(indices Uint32x16) Float32x16
|
func (x Float32x16) Permute(indices Uint32x16) Float32x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMD, CPU Feature: AVX512F
|
// Asm: VPERMD, CPU Feature: AVX512F
|
||||||
func (x Int32x16) Permute(indices Uint32x16) Int32x16
|
func (x Int32x16) Permute(indices Uint32x16) Int32x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMD, CPU Feature: AVX512F
|
// Asm: VPERMD, CPU Feature: AVX512F
|
||||||
func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
|
func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMPD, CPU Feature: AVX512F
|
// Asm: VPERMPD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) Permute(indices Uint64x4) Float64x4
|
func (x Float64x4) Permute(indices Uint64x4) Float64x4
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMQ, CPU Feature: AVX512F
|
// Asm: VPERMQ, CPU Feature: AVX512F
|
||||||
func (x Int64x4) Permute(indices Uint64x4) Int64x4
|
func (x Int64x4) Permute(indices Uint64x4) Int64x4
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMQ, CPU Feature: AVX512F
|
// Asm: VPERMQ, CPU Feature: AVX512F
|
||||||
func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
|
func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMPD, CPU Feature: AVX512F
|
// Asm: VPERMPD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) Permute(indices Uint64x8) Float64x8
|
func (x Float64x8) Permute(indices Uint64x8) Float64x8
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
// Asm: VPERMQ, CPU Feature: AVX512F
|
// Asm: VPERMQ, CPU Feature: AVX512F
|
||||||
func (x Int64x8) Permute(indices Uint64x8) Int64x8
|
func (x Int64x8) Permute(indices Uint64x8) Int64x8
|
||||||
|
|
||||||
// Permute performs a full permutation of vector y using indices:
|
// Permute performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7189,7 +7189,7 @@ func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Ui
|
||||||
|
|
||||||
/* PermuteMasked */
|
/* PermuteMasked */
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7198,7 +7198,7 @@ func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Ui
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
|
func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7207,7 +7207,7 @@ func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
|
func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7216,7 +7216,7 @@ func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
|
func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7225,7 +7225,7 @@ func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
|
func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7234,7 +7234,7 @@ func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
|
func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7243,7 +7243,7 @@ func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
|
||||||
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
// Asm: VPERMB, CPU Feature: AVX512VBMI
|
||||||
func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
|
func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7252,7 +7252,7 @@ func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
|
func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7261,7 +7261,7 @@ func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
|
func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7270,7 +7270,7 @@ func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
|
func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7279,7 +7279,7 @@ func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
|
func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7288,7 +7288,7 @@ func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
|
func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7297,7 +7297,7 @@ func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
|
||||||
// Asm: VPERMW, CPU Feature: AVX512BW
|
// Asm: VPERMW, CPU Feature: AVX512BW
|
||||||
func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
|
func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7306,7 +7306,7 @@ func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
|
||||||
// Asm: VPERMPS, CPU Feature: AVX512F
|
// Asm: VPERMPS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
|
func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7315,7 +7315,7 @@ func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
|
||||||
// Asm: VPERMD, CPU Feature: AVX512F
|
// Asm: VPERMD, CPU Feature: AVX512F
|
||||||
func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
|
func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7324,7 +7324,7 @@ func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
|
||||||
// Asm: VPERMD, CPU Feature: AVX512F
|
// Asm: VPERMD, CPU Feature: AVX512F
|
||||||
func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
|
func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7333,7 +7333,7 @@ func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
|
||||||
// Asm: VPERMPS, CPU Feature: AVX512F
|
// Asm: VPERMPS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
|
func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7342,7 +7342,7 @@ func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
|
||||||
// Asm: VPERMD, CPU Feature: AVX512F
|
// Asm: VPERMD, CPU Feature: AVX512F
|
||||||
func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
|
func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7351,7 +7351,7 @@ func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
|
||||||
// Asm: VPERMD, CPU Feature: AVX512F
|
// Asm: VPERMD, CPU Feature: AVX512F
|
||||||
func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
|
func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7360,7 +7360,7 @@ func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
|
||||||
// Asm: VPERMPD, CPU Feature: AVX512F
|
// Asm: VPERMPD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
|
func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7369,7 +7369,7 @@ func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
|
||||||
// Asm: VPERMQ, CPU Feature: AVX512F
|
// Asm: VPERMQ, CPU Feature: AVX512F
|
||||||
func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
|
func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7378,7 +7378,7 @@ func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
|
||||||
// Asm: VPERMQ, CPU Feature: AVX512F
|
// Asm: VPERMQ, CPU Feature: AVX512F
|
||||||
func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
|
func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7387,7 +7387,7 @@ func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
|
||||||
// Asm: VPERMPD, CPU Feature: AVX512F
|
// Asm: VPERMPD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
|
func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
@ -7396,7 +7396,7 @@ func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
|
||||||
// Asm: VPERMQ, CPU Feature: AVX512F
|
// Asm: VPERMQ, CPU Feature: AVX512F
|
||||||
func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
|
func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
|
||||||
|
|
||||||
// PermuteMasked performs a full permutation of vector y using indices:
|
// PermuteMasked performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
// Only the needed bits to represent x's index are used in indices' elements.
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -461,7 +461,7 @@ func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) si
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBitMask(t *testing.T) {
|
func TestBitMaskLoad(t *testing.T) {
|
||||||
if !simd.HasAVX512() {
|
if !simd.HasAVX512() {
|
||||||
t.Skip("Test requires HasAVX512, not available on this hardware")
|
t.Skip("Test requires HasAVX512, not available on this hardware")
|
||||||
return
|
return
|
||||||
|
|
@ -477,3 +477,19 @@ func TestBitMask(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBitMaskStore(t *testing.T) {
|
||||||
|
if !simd.HasAVX512() {
|
||||||
|
t.Skip("Test requires HasAVX512, not available on this hardware")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var want uint64 = 0b101
|
||||||
|
var got uint64
|
||||||
|
x := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
|
||||||
|
y := simd.LoadInt32x4Slice([]int32{5, 0, 5, 0})
|
||||||
|
m := y.Greater(x)
|
||||||
|
m.StoreToBits(&got)
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("Result incorrect: want %b, got %b", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -205,48 +205,88 @@ type Mask8x16 struct {
|
||||||
vals [16]int8
|
vals [16]int8
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask8x16FromBits constructs a Mask8x16 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask8x16FromBits constructs a Mask8x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 16 bits of y are used.
|
// Only the lower 16 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask8x16FromBits(y *uint64) Mask8x16
|
func LoadMask8x16FromBits(y *uint64) Mask8x16
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask8x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 16 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask8x16) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask16x8 is a 128-bit SIMD vector of 8 int16
|
// Mask16x8 is a 128-bit SIMD vector of 8 int16
|
||||||
type Mask16x8 struct {
|
type Mask16x8 struct {
|
||||||
int16x8 v128
|
int16x8 v128
|
||||||
vals [8]int16
|
vals [8]int16
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask16x8FromBits constructs a Mask16x8 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask16x8FromBits constructs a Mask16x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 8 bits of y are used.
|
// Only the lower 8 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask16x8FromBits(y *uint64) Mask16x8
|
func LoadMask16x8FromBits(y *uint64) Mask16x8
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask16x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 8 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask16x8) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask32x4 is a 128-bit SIMD vector of 4 int32
|
// Mask32x4 is a 128-bit SIMD vector of 4 int32
|
||||||
type Mask32x4 struct {
|
type Mask32x4 struct {
|
||||||
int32x4 v128
|
int32x4 v128
|
||||||
vals [4]int32
|
vals [4]int32
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask32x4FromBits constructs a Mask32x4 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask32x4FromBits constructs a Mask32x4 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 4 bits of y are used.
|
// Only the lower 4 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask32x4FromBits(y *uint64) Mask32x4
|
func LoadMask32x4FromBits(y *uint64) Mask32x4
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask32x4 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 4 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask32x4) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask64x2 is a 128-bit SIMD vector of 2 int64
|
// Mask64x2 is a 128-bit SIMD vector of 2 int64
|
||||||
type Mask64x2 struct {
|
type Mask64x2 struct {
|
||||||
int64x2 v128
|
int64x2 v128
|
||||||
vals [2]int64
|
vals [2]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask64x2FromBits constructs a Mask64x2 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask64x2FromBits constructs a Mask64x2 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 2 bits of y are used.
|
// Only the lower 2 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask64x2FromBits(y *uint64) Mask64x2
|
func LoadMask64x2FromBits(y *uint64) Mask64x2
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask64x2 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 2 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask64x2) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// v256 is a tag type that tells the compiler that this is really 256-bit SIMD
|
// v256 is a tag type that tells the compiler that this is really 256-bit SIMD
|
||||||
type v256 struct {
|
type v256 struct {
|
||||||
_256 struct{}
|
_256 struct{}
|
||||||
|
|
@ -448,48 +488,88 @@ type Mask8x32 struct {
|
||||||
vals [32]int8
|
vals [32]int8
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask8x32FromBits constructs a Mask8x32 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask8x32FromBits constructs a Mask8x32 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 32 bits of y are used.
|
// Only the lower 32 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask8x32FromBits(y *uint64) Mask8x32
|
func LoadMask8x32FromBits(y *uint64) Mask8x32
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask8x32 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 32 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask8x32) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask16x16 is a 256-bit SIMD vector of 16 int16
|
// Mask16x16 is a 256-bit SIMD vector of 16 int16
|
||||||
type Mask16x16 struct {
|
type Mask16x16 struct {
|
||||||
int16x16 v256
|
int16x16 v256
|
||||||
vals [16]int16
|
vals [16]int16
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask16x16FromBits constructs a Mask16x16 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask16x16FromBits constructs a Mask16x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 16 bits of y are used.
|
// Only the lower 16 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask16x16FromBits(y *uint64) Mask16x16
|
func LoadMask16x16FromBits(y *uint64) Mask16x16
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask16x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 16 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask16x16) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask32x8 is a 256-bit SIMD vector of 8 int32
|
// Mask32x8 is a 256-bit SIMD vector of 8 int32
|
||||||
type Mask32x8 struct {
|
type Mask32x8 struct {
|
||||||
int32x8 v256
|
int32x8 v256
|
||||||
vals [8]int32
|
vals [8]int32
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask32x8FromBits constructs a Mask32x8 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask32x8FromBits constructs a Mask32x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 8 bits of y are used.
|
// Only the lower 8 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask32x8FromBits(y *uint64) Mask32x8
|
func LoadMask32x8FromBits(y *uint64) Mask32x8
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask32x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 8 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask32x8) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask64x4 is a 256-bit SIMD vector of 4 int64
|
// Mask64x4 is a 256-bit SIMD vector of 4 int64
|
||||||
type Mask64x4 struct {
|
type Mask64x4 struct {
|
||||||
int64x4 v256
|
int64x4 v256
|
||||||
vals [4]int64
|
vals [4]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask64x4FromBits constructs a Mask64x4 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask64x4FromBits constructs a Mask64x4 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 4 bits of y are used.
|
// Only the lower 4 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask64x4FromBits(y *uint64) Mask64x4
|
func LoadMask64x4FromBits(y *uint64) Mask64x4
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask64x4 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 4 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask64x4) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// v512 is a tag type that tells the compiler that this is really 512-bit SIMD
|
// v512 is a tag type that tells the compiler that this is really 512-bit SIMD
|
||||||
type v512 struct {
|
type v512 struct {
|
||||||
_512 struct{}
|
_512 struct{}
|
||||||
|
|
@ -691,44 +771,84 @@ type Mask8x64 struct {
|
||||||
vals [64]int8
|
vals [64]int8
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask8x64FromBits constructs a Mask8x64 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask8x64FromBits constructs a Mask8x64 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 64 bits of y are used.
|
// Only the lower 64 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask8x64FromBits(y *uint64) Mask8x64
|
func LoadMask8x64FromBits(y *uint64) Mask8x64
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask8x64 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 64 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask8x64) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask16x32 is a 512-bit SIMD vector of 32 int16
|
// Mask16x32 is a 512-bit SIMD vector of 32 int16
|
||||||
type Mask16x32 struct {
|
type Mask16x32 struct {
|
||||||
int16x32 v512
|
int16x32 v512
|
||||||
vals [32]int16
|
vals [32]int16
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask16x32FromBits constructs a Mask16x32 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask16x32FromBits constructs a Mask16x32 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 32 bits of y are used.
|
// Only the lower 32 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask16x32FromBits(y *uint64) Mask16x32
|
func LoadMask16x32FromBits(y *uint64) Mask16x32
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask16x32 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 32 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask16x32) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask32x16 is a 512-bit SIMD vector of 16 int32
|
// Mask32x16 is a 512-bit SIMD vector of 16 int32
|
||||||
type Mask32x16 struct {
|
type Mask32x16 struct {
|
||||||
int32x16 v512
|
int32x16 v512
|
||||||
vals [16]int32
|
vals [16]int32
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask32x16FromBits constructs a Mask32x16 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask32x16FromBits constructs a Mask32x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 16 bits of y are used.
|
// Only the lower 16 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask32x16FromBits(y *uint64) Mask32x16
|
func LoadMask32x16FromBits(y *uint64) Mask32x16
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask32x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 16 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask32x16) StoreToBits(y *uint64)
|
||||||
|
|
||||||
// Mask64x8 is a 512-bit SIMD vector of 8 int64
|
// Mask64x8 is a 512-bit SIMD vector of 8 int64
|
||||||
type Mask64x8 struct {
|
type Mask64x8 struct {
|
||||||
int64x8 v512
|
int64x8 v512
|
||||||
vals [8]int64
|
vals [8]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask64x8FromBits constructs a Mask64x8 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
// LoadMask64x8FromBits constructs a Mask64x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
// Only the lower 8 bits of y are used.
|
// Only the lower 8 bits of y are used.
|
||||||
//
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func LoadMask64x8FromBits(y *uint64) Mask64x8
|
func LoadMask64x8FromBits(y *uint64) Mask64x8
|
||||||
|
|
||||||
|
// StoreToBits stores a Mask64x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 8 bits of y are used.
|
||||||
|
//
|
||||||
|
// CPU Features: AVX512
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Mask64x8) StoreToBits(y *uint64)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue