From a034826e263c31d2e7e34944f4849d1996f9d901 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 18 Aug 2025 19:35:53 +0000 Subject: [PATCH] [dev.simd] simd, cmd/compile: implement ToMask, unexport asMask. This CL defines the mask semantic better: When converting from vector to mask, its element is set to true iff the corresponding vector element is non zero. Change-Id: I331c1c7992dc9e81c211bdc6d73e5eb3b8414506 Reviewed-on: https://go-review.googlesource.com/c/go/+/697056 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- .../compile/internal/ssagen/simdintrinsics.go | 24 +-- src/simd/_gen/simdgen/gen_simdIntrinsics.go | 2 +- src/simd/_gen/simdgen/gen_simdTypes.go | 6 +- src/simd/compare_gen_amd64.go | 96 +++++------ src/simd/comparemasked_helpers_test.go | 60 +++---- src/simd/genfiles.go | 26 +-- src/simd/ops_amd64.go | 72 ++++----- src/simd/other_gen_amd64.go | 150 ++++++++++++++++++ src/simd/simd_test.go | 30 ++-- src/simd/slice_gen_amd64.go | 48 +++--- 10 files changed, 333 insertions(+), 181 deletions(-) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index be3d917f8ff..90149300b2c 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -2299,7 +2299,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "LoadMaskedMask64x8", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64) addF(simdPackage, "Mask64x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64) addF(simdPackage, "Mask8x16.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int8x16.AsMask8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int8x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64) @@ -2307,7 +2307,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask8x16FromBits", simdCvtVToMask(8, 16), sys.AMD64) addF(simdPackage, "Mask8x16.ToBits", simdCvtMaskToV(8, 16), sys.AMD64) addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int8x32.AsMask8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int8x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64) @@ -2315,7 +2315,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask8x32FromBits", simdCvtVToMask(8, 32), sys.AMD64) addF(simdPackage, "Mask8x32.ToBits", simdCvtMaskToV(8, 32), sys.AMD64) addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int8x64.AsMask8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int8x64.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64) @@ -2323,7 +2323,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask8x64FromBits", simdCvtVToMask(8, 64), sys.AMD64) addF(simdPackage, "Mask8x64.ToBits", simdCvtMaskToV(8, 64), sys.AMD64) addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int16x8.AsMask16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int16x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64) @@ -2331,7 +2331,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask16x8FromBits", simdCvtVToMask(16, 8), sys.AMD64) addF(simdPackage, "Mask16x8.ToBits", simdCvtMaskToV(16, 8), sys.AMD64) addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int16x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64) @@ -2339,7 +2339,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask16x16FromBits", simdCvtVToMask(16, 16), sys.AMD64) addF(simdPackage, "Mask16x16.ToBits", simdCvtMaskToV(16, 16), sys.AMD64) addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int16x32.AsMask16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int16x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64) @@ -2347,7 +2347,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask16x32FromBits", simdCvtVToMask(16, 32), sys.AMD64) addF(simdPackage, "Mask16x32.ToBits", simdCvtMaskToV(16, 32), sys.AMD64) addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int32x4.AsMask32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int32x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64) @@ -2355,7 +2355,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask32x4FromBits", simdCvtVToMask(32, 4), sys.AMD64) addF(simdPackage, "Mask32x4.ToBits", simdCvtMaskToV(32, 4), sys.AMD64) addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int32x8.AsMask32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int32x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64) @@ -2363,7 +2363,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask32x8FromBits", simdCvtVToMask(32, 8), sys.AMD64) addF(simdPackage, "Mask32x8.ToBits", simdCvtMaskToV(32, 8), sys.AMD64) addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int32x16.AsMask32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int32x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64) @@ -2371,7 +2371,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask32x16FromBits", simdCvtVToMask(32, 16), sys.AMD64) addF(simdPackage, "Mask32x16.ToBits", simdCvtMaskToV(32, 16), sys.AMD64) addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int64x2.AsMask64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int64x2.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64) @@ -2379,7 +2379,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask64x2FromBits", simdCvtVToMask(64, 2), sys.AMD64) addF(simdPackage, "Mask64x2.ToBits", simdCvtMaskToV(64, 2), sys.AMD64) addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int64x4.AsMask64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int64x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64) @@ -2387,7 +2387,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask64x4FromBits", simdCvtVToMask(64, 4), sys.AMD64) addF(simdPackage, "Mask64x4.ToBits", simdCvtMaskToV(64, 4), sys.AMD64) addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "Int64x8.AsMask64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "Int64x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64) diff --git a/src/simd/_gen/simdgen/gen_simdIntrinsics.go b/src/simd/_gen/simdgen/gen_simdIntrinsics.go index 6a1501e17bf..353bc46b317 100644 --- a/src/simd/_gen/simdgen/gen_simdIntrinsics.go +++ b/src/simd/_gen/simdgen/gen_simdIntrinsics.go @@ -75,7 +75,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "mask"}} addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "{{.VectorCounterpart}}.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/_gen/simdgen/gen_simdTypes.go index a367cce0144..22d19be0e2a 100644 --- a/src/simd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/_gen/simdgen/gen_simdTypes.go @@ -389,11 +389,11 @@ func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}}) {{end}} {{define "mask"}} -// converts from {{.Name}} to {{.VectorCounterpart}} +// As{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}} func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}}) -// converts from {{.VectorCounterpart}} to {{.Name}} -func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}}) +// asMask converts from {{.VectorCounterpart}} to {{.Name}} +func (from {{.VectorCounterpart}}) asMask() (to {{.Name}}) func (x {{.Name}}) And(y {{.Name}}) {{.Name}} diff --git a/src/simd/compare_gen_amd64.go b/src/simd/compare_gen_amd64.go index 65919fe4031..01e4f842118 100644 --- a/src/simd/compare_gen_amd64.go +++ b/src/simd/compare_gen_amd64.go @@ -16,7 +16,7 @@ func (x Int8x16) Less(y Int8x16) Mask8x16 { // Emulated, CPU Feature AVX func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).AsInt8x16() - return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16() + return y.Greater(x).AsInt8x16().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -24,7 +24,7 @@ func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { // Emulated, CPU Feature AVX func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).AsInt8x16() - return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16() + return x.Greater(y).AsInt8x16().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -32,7 +32,7 @@ func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { // Emulated, CPU Feature AVX func (x Int8x16) NotEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).AsInt8x16() - return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16() + return x.Equal(y).AsInt8x16().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -47,7 +47,7 @@ func (x Int16x8) Less(y Int16x8) Mask16x8 { // Emulated, CPU Feature AVX func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).AsInt16x8() - return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8() + return y.Greater(x).AsInt16x8().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -55,7 +55,7 @@ func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { // Emulated, CPU Feature AVX func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).AsInt16x8() - return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8() + return x.Greater(y).AsInt16x8().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -63,7 +63,7 @@ func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { // Emulated, CPU Feature AVX func (x Int16x8) NotEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).AsInt16x8() - return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8() + return x.Equal(y).AsInt16x8().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -78,7 +78,7 @@ func (x Int32x4) Less(y Int32x4) Mask32x4 { // Emulated, CPU Feature AVX func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).AsInt32x4() - return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4() + return y.Greater(x).AsInt32x4().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -86,7 +86,7 @@ func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { // Emulated, CPU Feature AVX func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).AsInt32x4() - return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4() + return x.Greater(y).AsInt32x4().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -94,7 +94,7 @@ func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { // Emulated, CPU Feature AVX func (x Int32x4) NotEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).AsInt32x4() - return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4() + return x.Equal(y).AsInt32x4().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -109,7 +109,7 @@ func (x Int64x2) Less(y Int64x2) Mask64x2 { // Emulated, CPU Feature AVX func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).AsInt64x2() - return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2() + return y.Greater(x).AsInt64x2().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -117,7 +117,7 @@ func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { // Emulated, CPU Feature AVX func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).AsInt64x2() - return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2() + return x.Greater(y).AsInt64x2().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -125,7 +125,7 @@ func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { // Emulated, CPU Feature AVX func (x Int64x2) NotEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).AsInt64x2() - return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2() + return x.Equal(y).AsInt64x2().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -140,7 +140,7 @@ func (x Int8x32) Less(y Int8x32) Mask8x32 { // Emulated, CPU Feature AVX2 func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).AsInt8x32() - return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32() + return y.Greater(x).AsInt8x32().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -148,7 +148,7 @@ func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { // Emulated, CPU Feature AVX2 func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).AsInt8x32() - return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32() + return x.Greater(y).AsInt8x32().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -156,7 +156,7 @@ func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { // Emulated, CPU Feature AVX2 func (x Int8x32) NotEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).AsInt8x32() - return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32() + return x.Equal(y).AsInt8x32().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -171,7 +171,7 @@ func (x Int16x16) Less(y Int16x16) Mask16x16 { // Emulated, CPU Feature AVX2 func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).AsInt16x16() - return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16() + return y.Greater(x).AsInt16x16().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -179,7 +179,7 @@ func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { // Emulated, CPU Feature AVX2 func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).AsInt16x16() - return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16() + return x.Greater(y).AsInt16x16().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -187,7 +187,7 @@ func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { // Emulated, CPU Feature AVX2 func (x Int16x16) NotEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).AsInt16x16() - return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16() + return x.Equal(y).AsInt16x16().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -202,7 +202,7 @@ func (x Int32x8) Less(y Int32x8) Mask32x8 { // Emulated, CPU Feature AVX2 func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).AsInt32x8() - return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8() + return y.Greater(x).AsInt32x8().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -210,7 +210,7 @@ func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { // Emulated, CPU Feature AVX2 func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).AsInt32x8() - return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8() + return x.Greater(y).AsInt32x8().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -218,7 +218,7 @@ func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { // Emulated, CPU Feature AVX2 func (x Int32x8) NotEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).AsInt32x8() - return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8() + return x.Equal(y).AsInt32x8().Xor(ones).asMask() } // Less returns a mask whose elements indicate whether x < y @@ -233,7 +233,7 @@ func (x Int64x4) Less(y Int64x4) Mask64x4 { // Emulated, CPU Feature AVX2 func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).AsInt64x4() - return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4() + return y.Greater(x).AsInt64x4().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -241,7 +241,7 @@ func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { // Emulated, CPU Feature AVX2 func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).AsInt64x4() - return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4() + return x.Greater(y).AsInt64x4().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -249,7 +249,7 @@ func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { // Emulated, CPU Feature AVX2 func (x Int64x4) NotEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).AsInt64x4() - return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4() + return x.Equal(y).AsInt64x4().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -277,7 +277,7 @@ func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).AsInt8x16() signs := BroadcastInt8x16(-1 << (8 - 1)) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -287,7 +287,7 @@ func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).AsInt8x16() signs := BroadcastInt8x16(-1 << (8 - 1)) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -296,7 +296,7 @@ func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).AsInt8x16() - return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16() + return a.Equal(b).AsInt8x16().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -326,7 +326,7 @@ func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).AsInt16x8() signs := ones.ShiftAllLeft(16 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -336,7 +336,7 @@ func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).AsInt16x8() signs := ones.ShiftAllLeft(16 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -345,7 +345,7 @@ func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).AsInt16x8() - return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8() + return a.Equal(b).AsInt16x8().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -375,7 +375,7 @@ func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).AsInt32x4() signs := ones.ShiftAllLeft(32 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -385,7 +385,7 @@ func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).AsInt32x4() signs := ones.ShiftAllLeft(32 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -394,7 +394,7 @@ func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).AsInt32x4() - return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4() + return a.Equal(b).AsInt32x4().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -424,7 +424,7 @@ func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).AsInt64x2() signs := ones.ShiftAllLeft(64 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -434,7 +434,7 @@ func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).AsInt64x2() signs := ones.ShiftAllLeft(64 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -443,7 +443,7 @@ func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).AsInt64x2() - return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2() + return a.Equal(b).AsInt64x2().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -471,7 +471,7 @@ func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).AsInt8x32() signs := BroadcastInt8x32(-1 << (8 - 1)) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -481,7 +481,7 @@ func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).AsInt8x32() signs := BroadcastInt8x32(-1 << (8 - 1)) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -490,7 +490,7 @@ func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).AsInt8x32() - return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32() + return a.Equal(b).AsInt8x32().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -520,7 +520,7 @@ func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).AsInt16x16() signs := ones.ShiftAllLeft(16 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -530,7 +530,7 @@ func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).AsInt16x16() signs := ones.ShiftAllLeft(16 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -539,7 +539,7 @@ func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).AsInt16x16() - return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16() + return a.Equal(b).AsInt16x16().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -569,7 +569,7 @@ func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).AsInt32x8() signs := ones.ShiftAllLeft(32 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -579,7 +579,7 @@ func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).AsInt32x8() signs := ones.ShiftAllLeft(32 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -588,7 +588,7 @@ func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).AsInt32x8() - return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8() + return a.Equal(b).AsInt32x8().Xor(ones).asMask() } // Greater returns a mask whose elements indicate whether x > y @@ -618,7 +618,7 @@ func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).AsInt64x4() signs := ones.ShiftAllLeft(64 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -628,7 +628,7 @@ func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).AsInt64x4() signs := ones.ShiftAllLeft(64 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -637,5 +637,5 @@ func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).AsInt64x4() - return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4() + return a.Equal(b).AsInt64x4().Xor(ones).asMask() } diff --git a/src/simd/comparemasked_helpers_test.go b/src/simd/comparemasked_helpers_test.go index 542145c11e1..4c05d10bb32 100644 --- a/src/simd/comparemasked_helpers_test.go +++ b/src/simd/comparemasked_helpers_test.go @@ -24,7 +24,7 @@ func testInt8x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt8x16Slice(x) b := simd.LoadInt8x16Slice(y) - k := simd.LoadInt8x16Slice(toVect[int8](m)).AsMask8x16() + k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x16().StoreSlice(g) w := want(x, y) @@ -48,7 +48,7 @@ func testInt16x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt16x8Slice(x) b := simd.LoadInt16x8Slice(y) - k := simd.LoadInt16x8Slice(toVect[int16](m)).AsMask16x8() + k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x8().StoreSlice(g) w := want(x, y) @@ -72,7 +72,7 @@ func testInt32x4CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt32x4Slice(x) b := simd.LoadInt32x4Slice(y) - k := simd.LoadInt32x4Slice(toVect[int32](m)).AsMask32x4() + k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -96,7 +96,7 @@ func testInt64x2CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt64x2Slice(x) b := simd.LoadInt64x2Slice(y) - k := simd.LoadInt64x2Slice(toVect[int64](m)).AsMask64x2() + k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -120,7 +120,7 @@ func testUint8x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint8x16Slice(x) b := simd.LoadUint8x16Slice(y) - k := simd.LoadInt8x16Slice(toVect[int8](m)).AsMask8x16() + k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x16().StoreSlice(g) w := want(x, y) @@ -144,7 +144,7 @@ func testUint16x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint16x8Slice(x) b := simd.LoadUint16x8Slice(y) - k := simd.LoadInt16x8Slice(toVect[int16](m)).AsMask16x8() + k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x8().StoreSlice(g) w := want(x, y) @@ -168,7 +168,7 @@ func testUint32x4CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint32x4Slice(x) b := simd.LoadUint32x4Slice(y) - k := simd.LoadInt32x4Slice(toVect[int32](m)).AsMask32x4() + k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -192,7 +192,7 @@ func testUint64x2CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint64x2Slice(x) b := simd.LoadUint64x2Slice(y) - k := simd.LoadInt64x2Slice(toVect[int64](m)).AsMask64x2() + k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -216,7 +216,7 @@ func testFloat32x4CompareMasked(t *testing.T, t.Helper() a := simd.LoadFloat32x4Slice(x) b := simd.LoadFloat32x4Slice(y) - k := simd.LoadInt32x4Slice(toVect[int32](m)).AsMask32x4() + k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -240,7 +240,7 @@ func testFloat64x2CompareMasked(t *testing.T, t.Helper() a := simd.LoadFloat64x2Slice(x) b := simd.LoadFloat64x2Slice(y) - k := simd.LoadInt64x2Slice(toVect[int64](m)).AsMask64x2() + k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -264,7 +264,7 @@ func testInt8x32CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt8x32Slice(x) b := simd.LoadInt8x32Slice(y) - k := simd.LoadInt8x32Slice(toVect[int8](m)).AsMask8x32() + k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x32().StoreSlice(g) w := want(x, y) @@ -288,7 +288,7 @@ func testInt16x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt16x16Slice(x) b := simd.LoadInt16x16Slice(y) - k := simd.LoadInt16x16Slice(toVect[int16](m)).AsMask16x16() + k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x16().StoreSlice(g) w := want(x, y) @@ -312,7 +312,7 @@ func testInt32x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt32x8Slice(x) b := simd.LoadInt32x8Slice(y) - k := simd.LoadInt32x8Slice(toVect[int32](m)).AsMask32x8() + k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -336,7 +336,7 @@ func testInt64x4CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt64x4Slice(x) b := simd.LoadInt64x4Slice(y) - k := simd.LoadInt64x4Slice(toVect[int64](m)).AsMask64x4() + k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -360,7 +360,7 @@ func testUint8x32CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint8x32Slice(x) b := simd.LoadUint8x32Slice(y) - k := simd.LoadInt8x32Slice(toVect[int8](m)).AsMask8x32() + k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x32().StoreSlice(g) w := want(x, y) @@ -384,7 +384,7 @@ func testUint16x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint16x16Slice(x) b := simd.LoadUint16x16Slice(y) - k := simd.LoadInt16x16Slice(toVect[int16](m)).AsMask16x16() + k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x16().StoreSlice(g) w := want(x, y) @@ -408,7 +408,7 @@ func testUint32x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint32x8Slice(x) b := simd.LoadUint32x8Slice(y) - k := simd.LoadInt32x8Slice(toVect[int32](m)).AsMask32x8() + k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -432,7 +432,7 @@ func testUint64x4CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint64x4Slice(x) b := simd.LoadUint64x4Slice(y) - k := simd.LoadInt64x4Slice(toVect[int64](m)).AsMask64x4() + k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -456,7 +456,7 @@ func testFloat32x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadFloat32x8Slice(x) b := simd.LoadFloat32x8Slice(y) - k := simd.LoadInt32x8Slice(toVect[int32](m)).AsMask32x8() + k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -480,7 +480,7 @@ func testFloat64x4CompareMasked(t *testing.T, t.Helper() a := simd.LoadFloat64x4Slice(x) b := simd.LoadFloat64x4Slice(y) - k := simd.LoadInt64x4Slice(toVect[int64](m)).AsMask64x4() + k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -504,7 +504,7 @@ func testInt8x64CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt8x64Slice(x) b := simd.LoadInt8x64Slice(y) - k := simd.LoadInt8x64Slice(toVect[int8](m)).AsMask8x64() + k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x64().StoreSlice(g) w := want(x, y) @@ -528,7 +528,7 @@ func testInt16x32CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt16x32Slice(x) b := simd.LoadInt16x32Slice(y) - k := simd.LoadInt16x32Slice(toVect[int16](m)).AsMask16x32() + k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x32().StoreSlice(g) w := want(x, y) @@ -552,7 +552,7 @@ func testInt32x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt32x16Slice(x) b := simd.LoadInt32x16Slice(y) - k := simd.LoadInt32x16Slice(toVect[int32](m)).AsMask32x16() + k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -576,7 +576,7 @@ func testInt64x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadInt64x8Slice(x) b := simd.LoadInt64x8Slice(y) - k := simd.LoadInt64x8Slice(toVect[int64](m)).AsMask64x8() + k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x8().StoreSlice(g) w := want(x, y) @@ -600,7 +600,7 @@ func testUint8x64CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint8x64Slice(x) b := simd.LoadUint8x64Slice(y) - k := simd.LoadInt8x64Slice(toVect[int8](m)).AsMask8x64() + k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x64().StoreSlice(g) w := want(x, y) @@ -624,7 +624,7 @@ func testUint16x32CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint16x32Slice(x) b := simd.LoadUint16x32Slice(y) - k := simd.LoadInt16x32Slice(toVect[int16](m)).AsMask16x32() + k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x32().StoreSlice(g) w := want(x, y) @@ -648,7 +648,7 @@ func testUint32x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint32x16Slice(x) b := simd.LoadUint32x16Slice(y) - k := simd.LoadInt32x16Slice(toVect[int32](m)).AsMask32x16() + k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -672,7 +672,7 @@ func testUint64x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadUint64x8Slice(x) b := simd.LoadUint64x8Slice(y) - k := simd.LoadInt64x8Slice(toVect[int64](m)).AsMask64x8() + k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x8().StoreSlice(g) w := want(x, y) @@ -696,7 +696,7 @@ func testFloat32x16CompareMasked(t *testing.T, t.Helper() a := simd.LoadFloat32x16Slice(x) b := simd.LoadFloat32x16Slice(y) - k := simd.LoadInt32x16Slice(toVect[int32](m)).AsMask32x16() + k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -720,7 +720,7 @@ func testFloat64x8CompareMasked(t *testing.T, t.Helper() a := simd.LoadFloat64x8Slice(x) b := simd.LoadFloat64x8Slice(y) - k := simd.LoadInt64x8Slice(toVect[int64](m)).AsMask64x8() + k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x8().StoreSlice(g) w := want(x, y) diff --git a/src/simd/genfiles.go b/src/simd/genfiles.go index a1da5ad0561..be149ef637c 100644 --- a/src/simd/genfiles.go +++ b/src/simd/genfiles.go @@ -387,7 +387,7 @@ func test{{.Vec}}CompareMasked(t *testing.T, t.Helper() a := simd.Load{{.Vec}}Slice(x) b := simd.Load{{.Vec}}Slice(y) - k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.Width}}](m)).AsMask{{.WxC}}() + k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.Width}}](m)).ToMask() g := make([]int{{.Width}}, n) f(a, b, k).AsInt{{.WxC}}().StoreSlice(g) w := want(x, y) @@ -449,7 +449,7 @@ func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { return x } mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:] - return LoadMasked{{.Vec}}(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).AsMask{{.WxC}}()) + return LoadMasked{{.Vec}}(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).asMask()) } // StoreSlicePart stores the {{.Count}} elements of x into the slice s. @@ -465,7 +465,7 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { return } mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:] - x.StoreMasked(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).AsMask{{.WxC}}()) + x.StoreMasked(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).asMask()) } `) @@ -519,7 +519,7 @@ func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { ones := x.Equal(x).AsInt{{.WxC}}() - return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() + return y.Greater(x).AsInt{{.WxC}}().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -527,7 +527,7 @@ func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { ones := x.Equal(x).AsInt{{.WxC}}() - return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() + return x.Greater(y).AsInt{{.WxC}}().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -535,7 +535,7 @@ func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { ones := x.Equal(x).AsInt{{.WxC}}() - return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() + return x.Equal(y).AsInt{{.WxC}}().Xor(ones).asMask() } `) @@ -591,7 +591,7 @@ func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { {{- else}} signs := ones.ShiftAllLeft({{.Width}}-1) {{- end }} - return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() + return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask() } // LessEqual returns a mask whose elements indicate whether x <= y @@ -605,7 +605,7 @@ func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { {{- else}} signs := ones.ShiftAllLeft({{.Width}}-1) {{- end }} - return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() + return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask() } // NotEqual returns a mask whose elements indicate whether x != y @@ -614,7 +614,7 @@ func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}() - return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() + return a.Equal(b).AsInt{{.WxC}}().Xor(ones).asMask() } `) @@ -705,6 +705,13 @@ func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} { } `) +var maskCvtTemplate = templateOf("Mask conversions", ` +// ToMask converts from {{.Base}}{{.WxC}} to Mask{{.WxC}}, mask element is set to true when the corresponding vector element is non-zero. +func (from {{.Base}}{{.WxC}}) ToMask() (to Mask{{.WxC}}) { + return from.NotEqual({{.Base}}{{.WxC}}{}) +} +`) + func main() { sl := flag.String("sl", "slice_gen_amd64.go", "file name for slice operations") cm := flag.String("cm", "compare_gen_amd64.go", "file name for comparison operations") @@ -741,6 +748,7 @@ func main() { if *op != "" { one(*op, prologue, broadcastTemplate, + maskCvtTemplate, ) } if *ush != "" { diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 8da3cd18175..d6fcd065bbb 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -13488,121 +13488,121 @@ func (from Uint64x8) AsUint16x32() (to Uint16x32) // Uint32x16 converts from Uint64x8 to Uint32x16 func (from Uint64x8) AsUint32x16() (to Uint32x16) -// converts from Mask8x16 to Int8x16 +// AsInt8x16 converts from Mask8x16 to Int8x16 func (from Mask8x16) AsInt8x16() (to Int8x16) -// converts from Int8x16 to Mask8x16 -func (from Int8x16) AsMask8x16() (to Mask8x16) +// asMask converts from Int8x16 to Mask8x16 +func (from Int8x16) asMask() (to Mask8x16) func (x Mask8x16) And(y Mask8x16) Mask8x16 func (x Mask8x16) Or(y Mask8x16) Mask8x16 -// converts from Mask8x32 to Int8x32 +// AsInt8x32 converts from Mask8x32 to Int8x32 func (from Mask8x32) AsInt8x32() (to Int8x32) -// converts from Int8x32 to Mask8x32 -func (from Int8x32) AsMask8x32() (to Mask8x32) +// asMask converts from Int8x32 to Mask8x32 +func (from Int8x32) asMask() (to Mask8x32) func (x Mask8x32) And(y Mask8x32) Mask8x32 func (x Mask8x32) Or(y Mask8x32) Mask8x32 -// converts from Mask8x64 to Int8x64 +// AsInt8x64 converts from Mask8x64 to Int8x64 func (from Mask8x64) AsInt8x64() (to Int8x64) -// converts from Int8x64 to Mask8x64 -func (from Int8x64) AsMask8x64() (to Mask8x64) +// asMask converts from Int8x64 to Mask8x64 +func (from Int8x64) asMask() (to Mask8x64) func (x Mask8x64) And(y Mask8x64) Mask8x64 func (x Mask8x64) Or(y Mask8x64) Mask8x64 -// converts from Mask16x8 to Int16x8 +// AsInt16x8 converts from Mask16x8 to Int16x8 func (from Mask16x8) AsInt16x8() (to Int16x8) -// converts from Int16x8 to Mask16x8 -func (from Int16x8) AsMask16x8() (to Mask16x8) +// asMask converts from Int16x8 to Mask16x8 +func (from Int16x8) asMask() (to Mask16x8) func (x Mask16x8) And(y Mask16x8) Mask16x8 func (x Mask16x8) Or(y Mask16x8) Mask16x8 -// converts from Mask16x16 to Int16x16 +// AsInt16x16 converts from Mask16x16 to Int16x16 func (from Mask16x16) AsInt16x16() (to Int16x16) -// converts from Int16x16 to Mask16x16 -func (from Int16x16) AsMask16x16() (to Mask16x16) +// asMask converts from Int16x16 to Mask16x16 +func (from Int16x16) asMask() (to Mask16x16) func (x Mask16x16) And(y Mask16x16) Mask16x16 func (x Mask16x16) Or(y Mask16x16) Mask16x16 -// converts from Mask16x32 to Int16x32 +// AsInt16x32 converts from Mask16x32 to Int16x32 func (from Mask16x32) AsInt16x32() (to Int16x32) -// converts from Int16x32 to Mask16x32 -func (from Int16x32) AsMask16x32() (to Mask16x32) +// asMask converts from Int16x32 to Mask16x32 +func (from Int16x32) asMask() (to Mask16x32) func (x Mask16x32) And(y Mask16x32) Mask16x32 func (x Mask16x32) Or(y Mask16x32) Mask16x32 -// converts from Mask32x4 to Int32x4 +// AsInt32x4 converts from Mask32x4 to Int32x4 func (from Mask32x4) AsInt32x4() (to Int32x4) -// converts from Int32x4 to Mask32x4 -func (from Int32x4) AsMask32x4() (to Mask32x4) +// asMask converts from Int32x4 to Mask32x4 +func (from Int32x4) asMask() (to Mask32x4) func (x Mask32x4) And(y Mask32x4) Mask32x4 func (x Mask32x4) Or(y Mask32x4) Mask32x4 -// converts from Mask32x8 to Int32x8 +// AsInt32x8 converts from Mask32x8 to Int32x8 func (from Mask32x8) AsInt32x8() (to Int32x8) -// converts from Int32x8 to Mask32x8 -func (from Int32x8) AsMask32x8() (to Mask32x8) +// asMask converts from Int32x8 to Mask32x8 +func (from Int32x8) asMask() (to Mask32x8) func (x Mask32x8) And(y Mask32x8) Mask32x8 func (x Mask32x8) Or(y Mask32x8) Mask32x8 -// converts from Mask32x16 to Int32x16 +// AsInt32x16 converts from Mask32x16 to Int32x16 func (from Mask32x16) AsInt32x16() (to Int32x16) -// converts from Int32x16 to Mask32x16 -func (from Int32x16) AsMask32x16() (to Mask32x16) +// asMask converts from Int32x16 to Mask32x16 +func (from Int32x16) asMask() (to Mask32x16) func (x Mask32x16) And(y Mask32x16) Mask32x16 func (x Mask32x16) Or(y Mask32x16) Mask32x16 -// converts from Mask64x2 to Int64x2 +// AsInt64x2 converts from Mask64x2 to Int64x2 func (from Mask64x2) AsInt64x2() (to Int64x2) -// converts from Int64x2 to Mask64x2 -func (from Int64x2) AsMask64x2() (to Mask64x2) +// asMask converts from Int64x2 to Mask64x2 +func (from Int64x2) asMask() (to Mask64x2) func (x Mask64x2) And(y Mask64x2) Mask64x2 func (x Mask64x2) Or(y Mask64x2) Mask64x2 -// converts from Mask64x4 to Int64x4 +// AsInt64x4 converts from Mask64x4 to Int64x4 func (from Mask64x4) AsInt64x4() (to Int64x4) -// converts from Int64x4 to Mask64x4 -func (from Int64x4) AsMask64x4() (to Mask64x4) +// asMask converts from Int64x4 to Mask64x4 +func (from Int64x4) asMask() (to Mask64x4) func (x Mask64x4) And(y Mask64x4) Mask64x4 func (x Mask64x4) Or(y Mask64x4) Mask64x4 -// converts from Mask64x8 to Int64x8 +// AsInt64x8 converts from Mask64x8 to Int64x8 func (from Mask64x8) AsInt64x8() (to Int64x8) -// converts from Int64x8 to Mask64x8 -func (from Int64x8) AsMask64x8() (to Mask64x8) +// asMask converts from Int64x8 to Mask64x8 +func (from Int64x8) asMask() (to Mask64x8) func (x Mask64x8) And(y Mask64x8) Mask64x8 diff --git a/src/simd/other_gen_amd64.go b/src/simd/other_gen_amd64.go index ed9394cf7d3..4a9049a2b90 100644 --- a/src/simd/other_gen_amd64.go +++ b/src/simd/other_gen_amd64.go @@ -273,3 +273,153 @@ func BroadcastFloat64x8(x float64) Float64x8 { var z Float64x2 return z.SetElem(0, x).Broadcast512() } + +// ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Int8x16) ToMask() (to Mask8x16) { + return from.NotEqual(Int8x16{}) +} + +// ToMask converts from Int16x8 to Mask16x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Int16x8) ToMask() (to Mask16x8) { + return from.NotEqual(Int16x8{}) +} + +// ToMask converts from Int32x4 to Mask32x4, mask element is set to true when the corresponding vector element is non-zero. +func (from Int32x4) ToMask() (to Mask32x4) { + return from.NotEqual(Int32x4{}) +} + +// ToMask converts from Int64x2 to Mask64x2, mask element is set to true when the corresponding vector element is non-zero. +func (from Int64x2) ToMask() (to Mask64x2) { + return from.NotEqual(Int64x2{}) +} + +// ToMask converts from Uint8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint8x16) ToMask() (to Mask8x16) { + return from.NotEqual(Uint8x16{}) +} + +// ToMask converts from Uint16x8 to Mask16x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint16x8) ToMask() (to Mask16x8) { + return from.NotEqual(Uint16x8{}) +} + +// ToMask converts from Uint32x4 to Mask32x4, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint32x4) ToMask() (to Mask32x4) { + return from.NotEqual(Uint32x4{}) +} + +// ToMask converts from Uint64x2 to Mask64x2, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint64x2) ToMask() (to Mask64x2) { + return from.NotEqual(Uint64x2{}) +} + +// ToMask converts from Float32x4 to Mask32x4, mask element is set to true when the corresponding vector element is non-zero. +func (from Float32x4) ToMask() (to Mask32x4) { + return from.NotEqual(Float32x4{}) +} + +// ToMask converts from Float64x2 to Mask64x2, mask element is set to true when the corresponding vector element is non-zero. +func (from Float64x2) ToMask() (to Mask64x2) { + return from.NotEqual(Float64x2{}) +} + +// ToMask converts from Int8x32 to Mask8x32, mask element is set to true when the corresponding vector element is non-zero. +func (from Int8x32) ToMask() (to Mask8x32) { + return from.NotEqual(Int8x32{}) +} + +// ToMask converts from Int16x16 to Mask16x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Int16x16) ToMask() (to Mask16x16) { + return from.NotEqual(Int16x16{}) +} + +// ToMask converts from Int32x8 to Mask32x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Int32x8) ToMask() (to Mask32x8) { + return from.NotEqual(Int32x8{}) +} + +// ToMask converts from Int64x4 to Mask64x4, mask element is set to true when the corresponding vector element is non-zero. +func (from Int64x4) ToMask() (to Mask64x4) { + return from.NotEqual(Int64x4{}) +} + +// ToMask converts from Uint8x32 to Mask8x32, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint8x32) ToMask() (to Mask8x32) { + return from.NotEqual(Uint8x32{}) +} + +// ToMask converts from Uint16x16 to Mask16x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint16x16) ToMask() (to Mask16x16) { + return from.NotEqual(Uint16x16{}) +} + +// ToMask converts from Uint32x8 to Mask32x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint32x8) ToMask() (to Mask32x8) { + return from.NotEqual(Uint32x8{}) +} + +// ToMask converts from Uint64x4 to Mask64x4, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint64x4) ToMask() (to Mask64x4) { + return from.NotEqual(Uint64x4{}) +} + +// ToMask converts from Float32x8 to Mask32x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Float32x8) ToMask() (to Mask32x8) { + return from.NotEqual(Float32x8{}) +} + +// ToMask converts from Float64x4 to Mask64x4, mask element is set to true when the corresponding vector element is non-zero. +func (from Float64x4) ToMask() (to Mask64x4) { + return from.NotEqual(Float64x4{}) +} + +// ToMask converts from Int8x64 to Mask8x64, mask element is set to true when the corresponding vector element is non-zero. +func (from Int8x64) ToMask() (to Mask8x64) { + return from.NotEqual(Int8x64{}) +} + +// ToMask converts from Int16x32 to Mask16x32, mask element is set to true when the corresponding vector element is non-zero. +func (from Int16x32) ToMask() (to Mask16x32) { + return from.NotEqual(Int16x32{}) +} + +// ToMask converts from Int32x16 to Mask32x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Int32x16) ToMask() (to Mask32x16) { + return from.NotEqual(Int32x16{}) +} + +// ToMask converts from Int64x8 to Mask64x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Int64x8) ToMask() (to Mask64x8) { + return from.NotEqual(Int64x8{}) +} + +// ToMask converts from Uint8x64 to Mask8x64, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint8x64) ToMask() (to Mask8x64) { + return from.NotEqual(Uint8x64{}) +} + +// ToMask converts from Uint16x32 to Mask16x32, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint16x32) ToMask() (to Mask16x32) { + return from.NotEqual(Uint16x32{}) +} + +// ToMask converts from Uint32x16 to Mask32x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint32x16) ToMask() (to Mask32x16) { + return from.NotEqual(Uint32x16{}) +} + +// ToMask converts from Uint64x8 to Mask64x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Uint64x8) ToMask() (to Mask64x8) { + return from.NotEqual(Uint64x8{}) +} + +// ToMask converts from Float32x16 to Mask32x16, mask element is set to true when the corresponding vector element is non-zero. +func (from Float32x16) ToMask() (to Mask32x16) { + return from.NotEqual(Float32x16{}) +} + +// ToMask converts from Float64x8 to Mask64x8, mask element is set to true when the corresponding vector element is non-zero. +func (from Float64x8) ToMask() (to Mask64x8) { + return from.NotEqual(Float64x8{}) +} diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index ce982409ea9..3faeeaccfde 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -33,7 +33,6 @@ func TestType(t *testing.T) { vals := [4]int32{1, 2, 3, 4} v := myStruct{x: simd.LoadInt32x4(&vals)} // masking elements 1 and 2. - maskv := [4]int32{-1, -1, 0, 0} want := []int32{2, 4, 0, 0} y := simd.LoadInt32x4(&vals) v.y = &y @@ -43,7 +42,7 @@ func TestType(t *testing.T) { t.Skip("Test requires HasAVX512, not available on this hardware") return } - v.z = maskT(simd.LoadInt32x4(&maskv).AsMask32x4()) + v.z = maskT(simd.Mask32x4FromBits(0b0011)) *v.y = v.y.AddMasked(v.x, simd.Mask32x4(v.z)) got := [4]int32{} @@ -120,18 +119,15 @@ func TestMaskConversion(t *testing.T) { t.Skip("Test requires HasAVX512, not available on this hardware") return } - v := [4]int32{1, 0, 1, 0} - x := simd.LoadInt32x4(&v) - var y simd.Int32x4 - mask := y.Sub(x).AsMask32x4() - v = [4]int32{5, 6, 7, 8} - y = simd.LoadInt32x4(&v) - y = y.AddMasked(x, mask) - got := [4]int32{6, 0, 8, 0} - y.Store(&v) + x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0}) + mask := simd.Int32x4{}.Sub(x).ToMask() + y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).AddMasked(x, mask) + want := [4]int32{6, 0, 10, 0} + got := make([]int32, 4) + y.StoreSlice(got) for i := range 4 { - if v[i] != got[i] { - t.Errorf("Result at %d incorrect: want %d, got %d", i, v[i], got[i]) + if want[i] != got[i] { + t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) } } } @@ -177,8 +173,7 @@ func TestCompress(t *testing.T) { return } v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) - v0101 := simd.LoadInt32x4Slice([]int32{0, -1, 0, -1}) - v2400 := v1234.Compress(v0101.AsMask32x4()) + v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010)) got := make([]int32, 4) v2400.StoreSlice(got) want := []int32{2, 4, 0, 0} @@ -193,8 +188,7 @@ func TestExpand(t *testing.T) { return } v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0}) - v0101 := simd.LoadInt32x4Slice([]int32{0, -1, 0, -1}) - v2400 := v3400.Expand(v0101.AsMask32x4()) + v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010)) got := make([]int32, 4) v2400.StoreSlice(got) want := []int32{0, 3, 0, 4} @@ -378,7 +372,7 @@ func TestBitMaskToBits(t *testing.T) { t.Skip("Test requires HasAVX512, not available on this hardware") return } - if v := simd.LoadInt16x8Slice([]int16{-1, 0, -1, 0, 0, 0, 0, 0}).AsMask16x8().ToBits(); v != 0b101 { + if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { t.Errorf("Want 0b101, got %b", v) } } diff --git a/src/simd/slice_gen_amd64.go b/src/simd/slice_gen_amd64.go index 45e95be9bf9..7d70cfb94d0 100644 --- a/src/simd/slice_gen_amd64.go +++ b/src/simd/slice_gen_amd64.go @@ -639,7 +639,7 @@ func LoadInt32x4SlicePart(s []int32) Int32x4 { return x } mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) + return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).asMask()) } // StoreSlicePart stores the 4 elements of x into the slice s. @@ -655,7 +655,7 @@ func (x Int32x4) StoreSlicePart(s []int32) { return } mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) + x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).asMask()) } // LoadInt64x2SlicePart loads a Int64x2 from the slice s. @@ -671,7 +671,7 @@ func LoadInt64x2SlicePart(s []int64) Int64x2 { return x } mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedInt64x2(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) + return LoadMaskedInt64x2(paInt64x2(s), LoadInt64x2Slice(mask).asMask()) } // StoreSlicePart stores the 2 elements of x into the slice s. @@ -687,7 +687,7 @@ func (x Int64x2) StoreSlicePart(s []int64) { return } mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) + x.StoreMasked(paInt64x2(s), LoadInt64x2Slice(mask).asMask()) } // LoadUint32x4SlicePart loads a Uint32x4 from the slice s. @@ -703,7 +703,7 @@ func LoadUint32x4SlicePart(s []uint32) Uint32x4 { return x } mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedUint32x4(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) + return LoadMaskedUint32x4(paUint32x4(s), LoadInt32x4Slice(mask).asMask()) } // StoreSlicePart stores the 4 elements of x into the slice s. @@ -719,7 +719,7 @@ func (x Uint32x4) StoreSlicePart(s []uint32) { return } mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) + x.StoreMasked(paUint32x4(s), LoadInt32x4Slice(mask).asMask()) } // LoadUint64x2SlicePart loads a Uint64x2 from the slice s. @@ -735,7 +735,7 @@ func LoadUint64x2SlicePart(s []uint64) Uint64x2 { return x } mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedUint64x2(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) + return LoadMaskedUint64x2(paUint64x2(s), LoadInt64x2Slice(mask).asMask()) } // StoreSlicePart stores the 2 elements of x into the slice s. @@ -751,7 +751,7 @@ func (x Uint64x2) StoreSlicePart(s []uint64) { return } mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) + x.StoreMasked(paUint64x2(s), LoadInt64x2Slice(mask).asMask()) } // LoadFloat32x4SlicePart loads a Float32x4 from the slice s. @@ -767,7 +767,7 @@ func LoadFloat32x4SlicePart(s []float32) Float32x4 { return x } mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedFloat32x4(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) + return LoadMaskedFloat32x4(paFloat32x4(s), LoadInt32x4Slice(mask).asMask()) } // StoreSlicePart stores the 4 elements of x into the slice s. @@ -783,7 +783,7 @@ func (x Float32x4) StoreSlicePart(s []float32) { return } mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) + x.StoreMasked(paFloat32x4(s), LoadInt32x4Slice(mask).asMask()) } // LoadFloat64x2SlicePart loads a Float64x2 from the slice s. @@ -799,7 +799,7 @@ func LoadFloat64x2SlicePart(s []float64) Float64x2 { return x } mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedFloat64x2(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) + return LoadMaskedFloat64x2(paFloat64x2(s), LoadInt64x2Slice(mask).asMask()) } // StoreSlicePart stores the 2 elements of x into the slice s. @@ -815,7 +815,7 @@ func (x Float64x2) StoreSlicePart(s []float64) { return } mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) + x.StoreMasked(paFloat64x2(s), LoadInt64x2Slice(mask).asMask()) } // LoadInt32x8SlicePart loads a Int32x8 from the slice s. @@ -831,7 +831,7 @@ func LoadInt32x8SlicePart(s []int32) Int32x8 { return x } mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedInt32x8(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) + return LoadMaskedInt32x8(paInt32x8(s), LoadInt32x8Slice(mask).asMask()) } // StoreSlicePart stores the 8 elements of x into the slice s. @@ -847,7 +847,7 @@ func (x Int32x8) StoreSlicePart(s []int32) { return } mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) + x.StoreMasked(paInt32x8(s), LoadInt32x8Slice(mask).asMask()) } // LoadInt64x4SlicePart loads a Int64x4 from the slice s. @@ -863,7 +863,7 @@ func LoadInt64x4SlicePart(s []int64) Int64x4 { return x } mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedInt64x4(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) + return LoadMaskedInt64x4(paInt64x4(s), LoadInt64x4Slice(mask).asMask()) } // StoreSlicePart stores the 4 elements of x into the slice s. @@ -879,7 +879,7 @@ func (x Int64x4) StoreSlicePart(s []int64) { return } mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) + x.StoreMasked(paInt64x4(s), LoadInt64x4Slice(mask).asMask()) } // LoadUint32x8SlicePart loads a Uint32x8 from the slice s. @@ -895,7 +895,7 @@ func LoadUint32x8SlicePart(s []uint32) Uint32x8 { return x } mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedUint32x8(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) + return LoadMaskedUint32x8(paUint32x8(s), LoadInt32x8Slice(mask).asMask()) } // StoreSlicePart stores the 8 elements of x into the slice s. @@ -911,7 +911,7 @@ func (x Uint32x8) StoreSlicePart(s []uint32) { return } mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) + x.StoreMasked(paUint32x8(s), LoadInt32x8Slice(mask).asMask()) } // LoadUint64x4SlicePart loads a Uint64x4 from the slice s. @@ -927,7 +927,7 @@ func LoadUint64x4SlicePart(s []uint64) Uint64x4 { return x } mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedUint64x4(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) + return LoadMaskedUint64x4(paUint64x4(s), LoadInt64x4Slice(mask).asMask()) } // StoreSlicePart stores the 4 elements of x into the slice s. @@ -943,7 +943,7 @@ func (x Uint64x4) StoreSlicePart(s []uint64) { return } mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) + x.StoreMasked(paUint64x4(s), LoadInt64x4Slice(mask).asMask()) } // LoadFloat32x8SlicePart loads a Float32x8 from the slice s. @@ -959,7 +959,7 @@ func LoadFloat32x8SlicePart(s []float32) Float32x8 { return x } mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedFloat32x8(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) + return LoadMaskedFloat32x8(paFloat32x8(s), LoadInt32x8Slice(mask).asMask()) } // StoreSlicePart stores the 8 elements of x into the slice s. @@ -975,7 +975,7 @@ func (x Float32x8) StoreSlicePart(s []float32) { return } mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) + x.StoreMasked(paFloat32x8(s), LoadInt32x8Slice(mask).asMask()) } // LoadFloat64x4SlicePart loads a Float64x4 from the slice s. @@ -991,7 +991,7 @@ func LoadFloat64x4SlicePart(s []float64) Float64x4 { return x } mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedFloat64x4(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) + return LoadMaskedFloat64x4(paFloat64x4(s), LoadInt64x4Slice(mask).asMask()) } // StoreSlicePart stores the 4 elements of x into the slice s. @@ -1007,7 +1007,7 @@ func (x Float64x4) StoreSlicePart(s []float64) { return } mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) + x.StoreMasked(paFloat64x4(s), LoadInt64x4Slice(mask).asMask()) } // LoadUint8x16SlicePart loads a Uint8x16 from the slice s.