go/src/simd/simd_test.go
Junyang Shao 0f660d675f [dev.simd] simd: make OpMasked machine ops only
Right now we can expect the `Op(...).Masked` idiom to lack many parts that will
make the API incomplete. But to make the API sizes smaller, we are removing these ops' frontend types and interfaces for now. We will have the peepholes and a new pass
checking the CPU features check domination relations to make these ops
picked for the right `Op(...).Masked` idiom.

Change-Id: I77f72a198b3d8b1880dcb911470db5e0089ac1ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/697155
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Bypass: Junyang Shao <shaojunyang@google.com>
2025-08-19 13:46:58 -07:00

442 lines
12 KiB
Go

// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.simd && amd64
package simd_test
import (
"reflect"
"simd"
"slices"
"testing"
)
var sink any
func TestType(t *testing.T) {
// Testing:
// - Defined as another struct's field is ok
// - Pointer is ok
// - Type defition is ok
// - Type alias is ok
// - Type conversion is ok
// - Conversion to interface is ok
type alias = simd.Int32x4
type maskT simd.Mask32x4
type myStruct struct {
x alias
y *simd.Int32x4
z maskT
}
vals := [4]int32{1, 2, 3, 4}
v := myStruct{x: simd.LoadInt32x4(&vals)}
// masking elements 1 and 2.
want := []int32{2, 4, 0, 0}
y := simd.LoadInt32x4(&vals)
v.y = &y
sink = y
if !simd.HasAVX512GFNI() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
v.z = maskT(simd.Mask32x4FromBits(0b0011))
*v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
got := [4]int32{}
v.y.Store(&got)
for i := range 4 {
if want[i] != got[i] {
t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
}
}
}
func TestFuncValue(t *testing.T) {
// Test that simd intrinsic can be used as a function value.
xv := [4]int32{1, 2, 3, 4}
yv := [4]int32{5, 6, 7, 8}
want := []int32{6, 8, 10, 12}
x := simd.LoadInt32x4(&xv)
y := simd.LoadInt32x4(&yv)
fn := simd.Int32x4.Add
sink = fn
x = fn(x, y)
got := [4]int32{}
x.Store(&got)
for i := range 4 {
if want[i] != got[i] {
t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
}
}
}
func TestReflectMethod(t *testing.T) {
// Test that simd intrinsic can be accessed via reflection.
// NOTE: we don't yet support reflect method.Call.
xv := [4]int32{1, 2, 3, 4}
yv := [4]int32{5, 6, 7, 8}
want := []int32{6, 8, 10, 12}
x := simd.LoadInt32x4(&xv)
y := simd.LoadInt32x4(&yv)
m, ok := reflect.TypeOf(x).MethodByName("Add")
if !ok {
t.Fatal("Add method not found")
}
fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4)
x = fn(x, y)
got := [4]int32{}
x.Store(&got)
for i := range 4 {
if want[i] != got[i] {
t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
}
}
}
func TestVectorConversion(t *testing.T) {
if !simd.HasAVX512GFNI() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
xv := [4]int32{1, 2, 3, 4}
x := simd.LoadInt32x4(&xv)
xPromoted := x.AsInt64x2()
xPromotedDemoted := xPromoted.AsInt32x4()
got := [4]int32{}
xPromotedDemoted.Store(&got)
for i := range 4 {
if xv[i] != got[i] {
t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i])
}
}
}
func TestMaskConversion(t *testing.T) {
if !simd.HasAVX512GFNI() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
mask := simd.Int32x4{}.Sub(x).ToMask()
y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
want := [4]int32{6, 0, 10, 0}
got := make([]int32, 4)
y.StoreSlice(got)
for i := range 4 {
if want[i] != got[i] {
t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
}
}
}
func TestPermute(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
got := make([]int64, 8)
simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got)
for i := range 8 {
if want[i] != got[i] {
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
}
}
}
func TestPermute2(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
got := make([]int64, 8)
simd.LoadInt64x8Slice(x).Permute2(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
for i := range 8 {
if want[i] != got[i] {
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
}
}
}
func TestCompress(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010))
got := make([]int32, 4)
v2400.StoreSlice(got)
want := []int32{2, 4, 0, 0}
if !slices.Equal(got, want) {
t.Errorf("want and got differ, want=%v, got=%v", want, got)
}
}
func TestExpand(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010))
got := make([]int32, 4)
v2400.StoreSlice(got)
want := []int32{0, 3, 0, 4}
if !slices.Equal(got, want) {
t.Errorf("want and got differ, want=%v, got=%v", want, got)
}
}
var testShiftAllVal uint64 = 3
func TestShiftAll(t *testing.T) {
got := make([]int32, 4)
simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
for _, v := range got {
if v != 0b1100 {
t.Errorf("expect 0b1100, got %b", v)
}
}
simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
for _, v := range got {
if v != 0b11000 {
t.Errorf("expect 0b11000, got %b", v)
}
}
}
func TestSlicesInt8(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x32Slice(a)
b := make([]int8, 32, 32)
v.StoreSlice(b)
checkSlices(t, a, b)
}
func TestSlicesInt8SetElem(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x16Slice(a)
v = v.SetElem(3, 13)
a[3] = 13
b := make([]int8, 16, 16)
v.StoreSlice(b)
checkSlices(t, a, b)
}
func TestSlicesInt8GetElem(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x16Slice(a)
e := v.GetElem(2)
if e != a[2] {
t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
}
}
func TestSlicesInt8TooShortLoad(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Logf("Saw EXPECTED panic %v", r)
} else {
t.Errorf("Did not see expected panic")
}
}()
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic
v := simd.LoadInt8x32Slice(a)
b := make([]int8, 32, 32)
v.StoreSlice(b)
checkSlices(t, a, b)
}
func TestSlicesInt8TooShortStore(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Logf("Saw EXPECTED panic %v", r)
} else {
t.Errorf("Did not see expected panic")
}
}()
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x32Slice(a)
b := make([]int8, 31) // TOO SHORT, should panic
v.StoreSlice(b)
checkSlices(t, a, b)
}
func TestSlicesFloat64(t *testing.T) {
a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine
v := simd.LoadFloat64x4Slice(a)
b := make([]float64, 4, 4)
v.StoreSlice(b)
for i := range b {
if a[i] != b[i] {
t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
}
}
}
// TODO: try to reduce this test to be smaller.
func TestMergeLocals(t *testing.T) {
testMergeLocalswrapper(t, simd.Int64x4.Add)
}
//go:noinline
func forceSpill() {}
func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) {
t.Helper()
s0 := []int64{0, 1, 2, 3}
s1 := []int64{-1, 0, -1, 0}
want := []int64{-1, 1, 1, 3}
v := simd.LoadInt64x4Slice(s0)
m := simd.LoadInt64x4Slice(s1)
forceSpill()
got := make([]int64, 4)
gotv := op(v, m)
gotv.StoreSlice(got)
for i := range len(want) {
if !(got[i] == want[i]) {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func TestBitMaskLoad(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
var bits uint64 = 0b10
results := [2]int64{}
want := [2]int64{0, 6}
m := simd.LoadMask64x2FromBits(&bits)
simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
for i := range 2 {
if results[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
}
}
}
func TestBitMaskStore(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
var want uint64 = 0b101
var got uint64
x := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
y := simd.LoadInt32x4Slice([]int32{5, 0, 5, 0})
m := y.Greater(x)
m.StoreToBits(&got)
if got != want {
t.Errorf("Result incorrect: want %b, got %b", want, got)
}
}
func TestBitMaskFromBits(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
results := [2]int64{}
want := [2]int64{0, 6}
m := simd.Mask64x2FromBits(0b10)
simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
for i := range 2 {
if results[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
}
}
}
func TestBitMaskToBits(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
t.Errorf("Want 0b101, got %b", v)
}
}
func TestMergeFloat(t *testing.T) {
a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
g := a.Greater(b)
k := make([]int64, 4, 4)
g.AsInt64x4().StoreSlice(k)
checkSlices[int64](t, k, []int64{0, 0, 0, -1})
c := a.Merge(b, g)
s := make([]float64, 4, 4)
c.StoreSlice(s)
checkSlices[float64](t, s, []float64{4, 2, 3, 4})
}
func TestMergeFloat512(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
g := a.Greater(b)
k := make([]int64, 8, 8)
g.AsInt64x8().StoreSlice(k)
checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1})
c := a.Merge(b, g)
d := a.Masked(g)
s := make([]float64, 8, 8)
c.StoreSlice(s)
checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8})
d.StoreSlice(s)
checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8})
}
var ro uint8 = 2
func TestRotateAllVariable(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
return
}
got := make([]int32, 4)
simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
for _, v := range got {
if v != 0b1100 {
t.Errorf("Want 0b1100, got %b", v)
}
}
}
func TestBroadcastUint32x4(t *testing.T) {
s := make([]uint32, 4, 4)
simd.BroadcastUint32x4(123456789).StoreSlice(s)
checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
}
func TestBroadcastFloat32x8(t *testing.T) {
s := make([]float32, 8, 8)
simd.BroadcastFloat32x8(123456789).StoreSlice(s)
checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
}