mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add VPTEST
Change-Id: Ia5103100eca2747fd10917ee2f32e3403e68e844 Reviewed-on: https://go-review.googlesource.com/c/go/+/702175 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Rob Lee <leerob7691@gmail.com>
This commit is contained in:
parent
d9751166a6
commit
f1e3651c33
9 changed files with 236 additions and 16 deletions
|
|
@ -1845,6 +1845,14 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.From.Reg = v.Args[0].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpAMD64VPTEST:
|
||||
// Some instructions setting flags put their second operand into the destination reg.
|
||||
// See also CMP[BWDQ].
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[0])
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v.Args[1])
|
||||
|
||||
default:
|
||||
if !ssaGenSIMDValue(s, v) {
|
||||
|
|
|
|||
|
|
@ -1732,6 +1732,9 @@
|
|||
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 16 => (VPMASK64store128 ptr mask val mem)
|
||||
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 32 => (VPMASK64store256 ptr mask val mem)
|
||||
|
||||
// Misc
|
||||
(IsZeroVec x) => (SETEQ (VPTEST x x))
|
||||
|
||||
// SIMD vector K-masked loads and stores
|
||||
|
||||
(LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
|
||||
|
|
|
|||
|
|
@ -212,22 +212,23 @@ func init() {
|
|||
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
|
||||
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
||||
|
||||
v11 = regInfo{inputs: vzonly, outputs: vonly}
|
||||
v21 = regInfo{inputs: []regMask{vz, vz}, outputs: vonly}
|
||||
vk = regInfo{inputs: vzonly, outputs: maskonly}
|
||||
kv = regInfo{inputs: maskonly, outputs: vonly}
|
||||
v2k = regInfo{inputs: []regMask{vz, vz}, outputs: maskonly}
|
||||
vkv = regInfo{inputs: []regMask{vz, mask}, outputs: vonly}
|
||||
v2kv = regInfo{inputs: []regMask{vz, vz, mask}, outputs: vonly}
|
||||
v2kk = regInfo{inputs: []regMask{vz, vz, mask}, outputs: maskonly}
|
||||
v31 = regInfo{inputs: []regMask{v, vz, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
v3kv = regInfo{inputs: []regMask{v, vz, vz, mask}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
vgpv = regInfo{inputs: []regMask{vz, gp}, outputs: vonly}
|
||||
vgp = regInfo{inputs: vonly, outputs: gponly}
|
||||
vfpv = regInfo{inputs: []regMask{vz, fp}, outputs: vonly}
|
||||
vfpkv = regInfo{inputs: []regMask{vz, fp, mask}, outputs: vonly}
|
||||
fpv = regInfo{inputs: []regMask{fp}, outputs: vonly}
|
||||
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
|
||||
v11 = regInfo{inputs: vzonly, outputs: vonly}
|
||||
v21 = regInfo{inputs: []regMask{vz, vz}, outputs: vonly}
|
||||
vk = regInfo{inputs: vzonly, outputs: maskonly}
|
||||
kv = regInfo{inputs: maskonly, outputs: vonly}
|
||||
v2k = regInfo{inputs: []regMask{vz, vz}, outputs: maskonly}
|
||||
vkv = regInfo{inputs: []regMask{vz, mask}, outputs: vonly}
|
||||
v2kv = regInfo{inputs: []regMask{vz, vz, mask}, outputs: vonly}
|
||||
v2kk = regInfo{inputs: []regMask{vz, vz, mask}, outputs: maskonly}
|
||||
v31 = regInfo{inputs: []regMask{v, vz, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
v3kv = regInfo{inputs: []regMask{v, vz, vz, mask}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
vgpv = regInfo{inputs: []regMask{vz, gp}, outputs: vonly}
|
||||
vgp = regInfo{inputs: vonly, outputs: gponly}
|
||||
vfpv = regInfo{inputs: []regMask{vz, fp}, outputs: vonly}
|
||||
vfpkv = regInfo{inputs: []regMask{vz, fp, mask}, outputs: vonly}
|
||||
fpv = regInfo{inputs: []regMask{fp}, outputs: vonly}
|
||||
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
|
||||
v2flags = regInfo{inputs: []regMask{vz, vz}}
|
||||
|
||||
w11 = regInfo{inputs: wzonly, outputs: wonly}
|
||||
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
|
||||
|
|
@ -1426,6 +1427,9 @@ func init() {
|
|||
{name: "KMOVDi", argLength: 1, reg: kgp, asm: "KMOVD"},
|
||||
{name: "KMOVWi", argLength: 1, reg: kgp, asm: "KMOVW"},
|
||||
{name: "KMOVBi", argLength: 1, reg: kgp, asm: "KMOVB"},
|
||||
|
||||
// VPTEST
|
||||
{name: "VPTEST", asm: "VPTEST", argLength: 2, reg: v2flags, clobberFlags: true, typ: "Flags"},
|
||||
}
|
||||
|
||||
var AMD64blocks = []blockData{
|
||||
|
|
|
|||
|
|
@ -731,6 +731,9 @@ var genericOps = []opData{
|
|||
{name: "CvtMask64x2to8", argLength: 1}, // arg0 = mask
|
||||
{name: "CvtMask64x4to8", argLength: 1}, // arg0 = mask
|
||||
{name: "CvtMask64x8to8", argLength: 1}, // arg0 = mask
|
||||
|
||||
// Returns true if arg0 is all zero.
|
||||
{name: "IsZeroVec", argLength: 1},
|
||||
}
|
||||
|
||||
// kind controls successors implicit exit
|
||||
|
|
|
|||
|
|
@ -1236,6 +1236,7 @@ const (
|
|||
OpAMD64KMOVDi
|
||||
OpAMD64KMOVWi
|
||||
OpAMD64KMOVBi
|
||||
OpAMD64VPTEST
|
||||
OpAMD64VADDPD128
|
||||
OpAMD64VADDPD256
|
||||
OpAMD64VADDPD512
|
||||
|
|
@ -5390,6 +5391,7 @@ const (
|
|||
OpCvtMask64x2to8
|
||||
OpCvtMask64x4to8
|
||||
OpCvtMask64x8to8
|
||||
OpIsZeroVec
|
||||
OpAbsInt8x16
|
||||
OpAbsInt8x32
|
||||
OpAbsInt8x64
|
||||
|
|
@ -19799,6 +19801,18 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPTEST",
|
||||
argLen: 2,
|
||||
clobberFlags: true,
|
||||
asm: x86.AVPTEST,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VADDPD128",
|
||||
argLen: 2,
|
||||
|
|
@ -75862,6 +75876,11 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "IsZeroVec",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AbsInt8x16",
|
||||
argLen: 1,
|
||||
|
|
|
|||
|
|
@ -3599,6 +3599,8 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpIsNonNil(v)
|
||||
case OpIsSliceInBounds:
|
||||
return rewriteValueAMD64_OpIsSliceInBounds(v)
|
||||
case OpIsZeroVec:
|
||||
return rewriteValueAMD64_OpIsZeroVec(v)
|
||||
case OpLeadingZerosInt32x16:
|
||||
v.Op = OpAMD64VPLZCNTD512
|
||||
return true
|
||||
|
|
@ -53712,6 +53714,20 @@ func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpIsZeroVec(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (IsZeroVec x)
|
||||
// result: (SETEQ (VPTEST x x))
|
||||
for {
|
||||
x := v_0
|
||||
v.reset(OpAMD64SETEQ)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
|
||||
v0.AddArg2(x, x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpLeq16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
|
|
|||
|
|
@ -1614,6 +1614,22 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
|||
return nil
|
||||
},
|
||||
sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,3 +15,131 @@ package simd
|
|||
//
|
||||
// Asm: VZEROUPPER, CPU Feature: AVX
|
||||
func ClearAVXUpperBits()
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int8x16) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int8x32) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int16x8) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int16x16) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int32x4) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int32x8) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int64x2) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Int64x4) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint8x16) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint8x32) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint16x8) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint16x16) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint32x4) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint32x8) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint64x2) IsZero() bool
|
||||
|
||||
// IsZero returns true if all elements of x are zeros.
|
||||
//
|
||||
// This method compiles to VPTEST x, x.
|
||||
// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
|
||||
//
|
||||
// Asm: VPTEST, CPU Feature: AVX
|
||||
func (x Uint64x4) IsZero() bool
|
||||
|
|
|
|||
|
|
@ -557,3 +557,26 @@ func TestLeadingZeros(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsZero(t *testing.T) {
|
||||
v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
|
||||
v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
|
||||
if v1.IsZero() {
|
||||
t.Errorf("Result incorrect, want false, got true")
|
||||
}
|
||||
if !v2.IsZero() {
|
||||
t.Errorf("Result incorrect, want true, got false")
|
||||
}
|
||||
if !v1.And(v2).IsZero() {
|
||||
t.Errorf("Result incorrect, want true, got false")
|
||||
}
|
||||
if v1.AndNot(v2).IsZero() {
|
||||
t.Errorf("Result incorrect, want false, got true")
|
||||
}
|
||||
if !v2.And(v1).IsZero() {
|
||||
t.Errorf("Result incorrect, want true, got false")
|
||||
}
|
||||
if !v2.AndNot(v1).IsZero() {
|
||||
t.Errorf("Result incorrect, want true, got false")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue