diff --git a/src/simd/compare_gen_amd64.go b/src/simd/compare_gen_amd64.go new file mode 100644 index 00000000000..65919fe4031 --- /dev/null +++ b/src/simd/compare_gen_amd64.go @@ -0,0 +1,641 @@ +// Code generated by 'go run genfiles.go'; DO NOT EDIT. + +//go:build goexperiment.simd + +package simd + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int8x16) Less(y Int8x16) Mask8x16 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { + ones := x.Equal(x).AsInt8x16() + return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { + ones := x.Equal(x).AsInt8x16() + return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int8x16) NotEqual(y Int8x16) Mask8x16 { + ones := x.Equal(x).AsInt8x16() + return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int16x8) Less(y Int16x8) Mask16x8 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { + ones := x.Equal(x).AsInt16x8() + return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { + ones := x.Equal(x).AsInt16x8() + return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int16x8) NotEqual(y Int16x8) Mask16x8 { + ones := x.Equal(x).AsInt16x8() + return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int32x4) Less(y Int32x4) Mask32x4 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { + ones := x.Equal(x).AsInt32x4() + return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { + ones := x.Equal(x).AsInt32x4() + return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int32x4) NotEqual(y Int32x4) Mask32x4 { + ones := x.Equal(x).AsInt32x4() + return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int64x2) Less(y Int64x2) Mask64x2 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { + ones := x.Equal(x).AsInt64x2() + return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { + ones := x.Equal(x).AsInt64x2() + return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int64x2) NotEqual(y Int64x2) Mask64x2 { + ones := x.Equal(x).AsInt64x2() + return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) Less(y Int8x32) Mask8x32 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { + ones := x.Equal(x).AsInt8x32() + return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { + ones := x.Equal(x).AsInt8x32() + return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) NotEqual(y Int8x32) Mask8x32 { + ones := x.Equal(x).AsInt8x32() + return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) Less(y Int16x16) Mask16x16 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { + ones := x.Equal(x).AsInt16x16() + return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { + ones := x.Equal(x).AsInt16x16() + return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) NotEqual(y Int16x16) Mask16x16 { + ones := x.Equal(x).AsInt16x16() + return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) Less(y Int32x8) Mask32x8 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { + ones := x.Equal(x).AsInt32x8() + return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { + ones := x.Equal(x).AsInt32x8() + return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) NotEqual(y Int32x8) Mask32x8 { + ones := x.Equal(x).AsInt32x8() + return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) Less(y Int64x4) Mask64x4 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { + ones := x.Equal(x).AsInt64x4() + return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { + ones := x.Equal(x).AsInt64x4() + return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) NotEqual(y Int64x4) Mask64x4 { + ones := x.Equal(x).AsInt64x4() + return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) Greater(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) Less(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + ones := x.Equal(x).AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + ones := x.Equal(x).AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + ones := x.Equal(x).AsInt8x16() + return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) Less(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) Less(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) Less(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) Greater(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) Less(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + ones := x.Equal(x).AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + ones := x.Equal(x).AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + ones := x.Equal(x).AsInt8x32() + return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) Less(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) Less(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) Less(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4() +} diff --git a/src/simd/genfiles.go b/src/simd/genfiles.go index 022ddd16813..a1da5ad0561 100644 --- a/src/simd/genfiles.go +++ b/src/simd/genfiles.go @@ -175,8 +175,6 @@ func prologue(s string, out io.Writer) { package simd -import "unsafe" - `, s) } @@ -708,7 +706,10 @@ func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} { `) func main() { - sl := flag.String("sl", "slice_amd64.go", "file name for slice operations") + sl := flag.String("sl", "slice_gen_amd64.go", "file name for slice operations") + cm := flag.String("cm", "compare_gen_amd64.go", "file name for comparison operations") + mm := flag.String("mm", "maskmerge_gen_amd64.go", "file name for mask/merge operations") + op := flag.String("op", "other_gen_amd64.go", "file name for other operations") ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers") bh := flag.String("bh", "binary_helpers_test.go", "file name for binary test helpers") uh := flag.String("uh", "unary_helpers_test.go", "file name for unary test helpers") @@ -718,15 +719,27 @@ func main() { flag.Parse() if *sl != "" { - one(*sl, prologue, + one(*sl, unsafePrologue, sliceTemplate, avx512MaskedLoadSlicePartTemplate, avx2MaskedLoadSlicePartTemplate, avx2SmallLoadSlicePartTemplate, - avx2MaskedTemplate, - avx512MaskedTemplate, + ) + } + if *cm != "" { + one(*cm, prologue, avx2SignedComparisonsTemplate, avx2UnsignedComparisonsTemplate, + ) + } + if *mm != "" { + one(*mm, prologue, + avx2MaskedTemplate, + avx512MaskedTemplate, + ) + } + if *op != "" { + one(*op, prologue, broadcastTemplate, ) } diff --git a/src/simd/maskmerge_gen_amd64.go b/src/simd/maskmerge_gen_amd64.go new file mode 100644 index 00000000000..71a617c4250 --- /dev/null +++ b/src/simd/maskmerge_gen_amd64.go @@ -0,0 +1,403 @@ +// Code generated by 'go run genfiles.go'; DO NOT EDIT. + +//go:build goexperiment.simd + +package simd + +// Masked returns x but with elements zeroed where mask is false. +func (x Int8x16) Masked(mask Mask8x16) Int8x16 { + im := mask.AsInt8x16() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int8x16) Merge(y Int8x16, mask Mask8x16) Int8x16 { + im := mask.AsInt8x16() + return y.blend(x, im) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int16x8) Masked(mask Mask16x8) Int16x8 { + im := mask.AsInt16x8() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 { + im := mask.AsInt16x8().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsInt16x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int32x4) Masked(mask Mask32x4) Int32x4 { + im := mask.AsInt32x4() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 { + im := mask.AsInt32x4().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsInt32x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int64x2) Masked(mask Mask64x2) Int64x2 { + im := mask.AsInt64x2() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 { + im := mask.AsInt64x2().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsInt64x2() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint8x16) Masked(mask Mask8x16) Uint8x16 { + im := mask.AsInt8x16() + return x.AsInt8x16().And(im).AsUint8x16() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 { + im := mask.AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint8x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint16x8) Masked(mask Mask16x8) Uint16x8 { + im := mask.AsInt16x8() + return x.AsInt16x8().And(im).AsUint16x8() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 { + im := mask.AsInt16x8().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint16x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint32x4) Masked(mask Mask32x4) Uint32x4 { + im := mask.AsInt32x4() + return x.AsInt32x4().And(im).AsUint32x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 { + im := mask.AsInt32x4().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint32x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint64x2) Masked(mask Mask64x2) Uint64x2 { + im := mask.AsInt64x2() + return x.AsInt64x2().And(im).AsUint64x2() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 { + im := mask.AsInt64x2().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint64x2() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float32x4) Masked(mask Mask32x4) Float32x4 { + im := mask.AsInt32x4() + return x.AsInt32x4().And(im).AsFloat32x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 { + im := mask.AsInt32x4().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsFloat32x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float64x2) Masked(mask Mask64x2) Float64x2 { + im := mask.AsInt64x2() + return x.AsInt64x2().And(im).AsFloat64x2() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 { + im := mask.AsInt64x2().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsFloat64x2() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int8x32) Masked(mask Mask8x32) Int8x32 { + im := mask.AsInt8x32() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int8x32) Merge(y Int8x32, mask Mask8x32) Int8x32 { + im := mask.AsInt8x32() + return y.blend(x, im) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int16x16) Masked(mask Mask16x16) Int16x16 { + im := mask.AsInt16x16() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 { + im := mask.AsInt16x16().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsInt16x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int32x8) Masked(mask Mask32x8) Int32x8 { + im := mask.AsInt32x8() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 { + im := mask.AsInt32x8().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsInt32x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int64x4) Masked(mask Mask64x4) Int64x4 { + im := mask.AsInt64x4() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 { + im := mask.AsInt64x4().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsInt64x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint8x32) Masked(mask Mask8x32) Uint8x32 { + im := mask.AsInt8x32() + return x.AsInt8x32().And(im).AsUint8x32() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 { + im := mask.AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint8x32() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint16x16) Masked(mask Mask16x16) Uint16x16 { + im := mask.AsInt16x16() + return x.AsInt16x16().And(im).AsUint16x16() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 { + im := mask.AsInt16x16().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint16x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint32x8) Masked(mask Mask32x8) Uint32x8 { + im := mask.AsInt32x8() + return x.AsInt32x8().And(im).AsUint32x8() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 { + im := mask.AsInt32x8().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint32x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint64x4) Masked(mask Mask64x4) Uint64x4 { + im := mask.AsInt64x4() + return x.AsInt64x4().And(im).AsUint64x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 { + im := mask.AsInt64x4().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint64x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float32x8) Masked(mask Mask32x8) Float32x8 { + im := mask.AsInt32x8() + return x.AsInt32x8().And(im).AsFloat32x8() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 { + im := mask.AsInt32x8().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsFloat32x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float64x4) Masked(mask Mask64x4) Float64x4 { + im := mask.AsInt64x4() + return x.AsInt64x4().And(im).AsFloat64x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 { + im := mask.AsInt64x4().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsFloat64x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int8x64) Masked(mask Mask8x64) Int8x64 { + im := mask.AsInt8x64() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int16x32) Masked(mask Mask16x32) Int16x32 { + im := mask.AsInt16x32() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int32x16) Masked(mask Mask32x16) Int32x16 { + im := mask.AsInt32x16() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int64x8) Masked(mask Mask64x8) Int64x8 { + im := mask.AsInt64x8() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 { + im := mask.AsInt8x64() + return x.AsInt8x64().And(im).AsUint8x64() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { + ix := x.AsInt8x64() + iy := y.AsInt8x64() + return iy.blendMasked(ix, mask).AsUint8x64() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 { + im := mask.AsInt16x32() + return x.AsInt16x32().And(im).AsUint16x32() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { + ix := x.AsInt16x32() + iy := y.AsInt16x32() + return iy.blendMasked(ix, mask).AsUint16x32() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 { + im := mask.AsInt32x16() + return x.AsInt32x16().And(im).AsUint32x16() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { + ix := x.AsInt32x16() + iy := y.AsInt32x16() + return iy.blendMasked(ix, mask).AsUint32x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 { + im := mask.AsInt64x8() + return x.AsInt64x8().And(im).AsUint64x8() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { + ix := x.AsInt64x8() + iy := y.AsInt64x8() + return iy.blendMasked(ix, mask).AsUint64x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float32x16) Masked(mask Mask32x16) Float32x16 { + im := mask.AsInt32x16() + return x.AsInt32x16().And(im).AsFloat32x16() +} + +// Merge returns x but with elements set to y where m is false. +func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { + ix := x.AsInt32x16() + iy := y.AsInt32x16() + return iy.blendMasked(ix, mask).AsFloat32x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float64x8) Masked(mask Mask64x8) Float64x8 { + im := mask.AsInt64x8() + return x.AsInt64x8().And(im).AsFloat64x8() +} + +// Merge returns x but with elements set to y where m is false. +func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { + ix := x.AsInt64x8() + iy := y.AsInt64x8() + return iy.blendMasked(ix, mask).AsFloat64x8() +} diff --git a/src/simd/other_gen_amd64.go b/src/simd/other_gen_amd64.go new file mode 100644 index 00000000000..ed9394cf7d3 --- /dev/null +++ b/src/simd/other_gen_amd64.go @@ -0,0 +1,275 @@ +// Code generated by 'go run genfiles.go'; DO NOT EDIT. + +//go:build goexperiment.simd + +package simd + +// BroadcastInt8x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt8x16(x int8) Int8x16 { + var z Int8x16 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastInt16x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt16x8(x int16) Int16x8 { + var z Int16x8 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastInt32x4 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt32x4(x int32) Int32x4 { + var z Int32x4 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastInt64x2 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt64x2(x int64) Int64x2 { + var z Int64x2 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastUint8x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint8x16(x uint8) Uint8x16 { + var z Uint8x16 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastUint16x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint16x8(x uint16) Uint16x8 { + var z Uint16x8 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastUint32x4 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint32x4(x uint32) Uint32x4 { + var z Uint32x4 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastUint64x2 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint64x2(x uint64) Uint64x2 { + var z Uint64x2 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastFloat32x4 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastFloat32x4(x float32) Float32x4 { + var z Float32x4 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastFloat64x2 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastFloat64x2(x float64) Float64x2 { + var z Float64x2 + return z.SetElem(0, x).Broadcast128() +} + +// BroadcastInt8x32 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt8x32(x int8) Int8x32 { + var z Int8x16 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastInt16x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt16x16(x int16) Int16x16 { + var z Int16x8 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastInt32x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt32x8(x int32) Int32x8 { + var z Int32x4 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastInt64x4 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastInt64x4(x int64) Int64x4 { + var z Int64x2 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastUint8x32 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint8x32(x uint8) Uint8x32 { + var z Uint8x16 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastUint16x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint16x16(x uint16) Uint16x16 { + var z Uint16x8 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastUint32x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint32x8(x uint32) Uint32x8 { + var z Uint32x4 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastUint64x4 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastUint64x4(x uint64) Uint64x4 { + var z Uint64x2 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastFloat32x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastFloat32x8(x float32) Float32x8 { + var z Float32x4 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastFloat64x4 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX2 +func BroadcastFloat64x4(x float64) Float64x4 { + var z Float64x2 + return z.SetElem(0, x).Broadcast256() +} + +// BroadcastInt8x64 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512BW +func BroadcastInt8x64(x int8) Int8x64 { + var z Int8x16 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastInt16x32 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512BW +func BroadcastInt16x32(x int16) Int16x32 { + var z Int16x8 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastInt32x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512F +func BroadcastInt32x16(x int32) Int32x16 { + var z Int32x4 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastInt64x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512F +func BroadcastInt64x8(x int64) Int64x8 { + var z Int64x2 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastUint8x64 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512BW +func BroadcastUint8x64(x uint8) Uint8x64 { + var z Uint8x16 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastUint16x32 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512BW +func BroadcastUint16x32(x uint16) Uint16x32 { + var z Uint16x8 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastUint32x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512F +func BroadcastUint32x16(x uint32) Uint32x16 { + var z Uint32x4 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastUint64x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512F +func BroadcastUint64x8(x uint64) Uint64x8 { + var z Uint64x2 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastFloat32x16 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512F +func BroadcastFloat32x16(x float32) Float32x16 { + var z Float32x4 + return z.SetElem(0, x).Broadcast512() +} + +// BroadcastFloat64x8 returns a vector with the input +// x assigned to all elements of the output. +// +// Emulated, CPU Feature AVX512F +func BroadcastFloat64x8(x float64) Float64x8 { + var z Float64x2 + return z.SetElem(0, x).Broadcast512() +} diff --git a/src/simd/slice_amd64.go b/src/simd/slice_amd64.go deleted file mode 100644 index 3ad2672a05b..00000000000 --- a/src/simd/slice_amd64.go +++ /dev/null @@ -1,2407 +0,0 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. - -//go:build goexperiment.simd - -package simd - -import "unsafe" - -// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s -func LoadInt8x16Slice(s []int8) Int8x16 { - return LoadInt8x16((*[16]int8)(s)) -} - -// StoreSlice stores x into a slice of at least 16 int8s -func (x Int8x16) StoreSlice(s []int8) { - x.Store((*[16]int8)(s)) -} - -// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s -func LoadInt16x8Slice(s []int16) Int16x8 { - return LoadInt16x8((*[8]int16)(s)) -} - -// StoreSlice stores x into a slice of at least 8 int16s -func (x Int16x8) StoreSlice(s []int16) { - x.Store((*[8]int16)(s)) -} - -// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s -func LoadInt32x4Slice(s []int32) Int32x4 { - return LoadInt32x4((*[4]int32)(s)) -} - -// StoreSlice stores x into a slice of at least 4 int32s -func (x Int32x4) StoreSlice(s []int32) { - x.Store((*[4]int32)(s)) -} - -// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s -func LoadInt64x2Slice(s []int64) Int64x2 { - return LoadInt64x2((*[2]int64)(s)) -} - -// StoreSlice stores x into a slice of at least 2 int64s -func (x Int64x2) StoreSlice(s []int64) { - x.Store((*[2]int64)(s)) -} - -// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s -func LoadUint8x16Slice(s []uint8) Uint8x16 { - return LoadUint8x16((*[16]uint8)(s)) -} - -// StoreSlice stores x into a slice of at least 16 uint8s -func (x Uint8x16) StoreSlice(s []uint8) { - x.Store((*[16]uint8)(s)) -} - -// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s -func LoadUint16x8Slice(s []uint16) Uint16x8 { - return LoadUint16x8((*[8]uint16)(s)) -} - -// StoreSlice stores x into a slice of at least 8 uint16s -func (x Uint16x8) StoreSlice(s []uint16) { - x.Store((*[8]uint16)(s)) -} - -// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s -func LoadUint32x4Slice(s []uint32) Uint32x4 { - return LoadUint32x4((*[4]uint32)(s)) -} - -// StoreSlice stores x into a slice of at least 4 uint32s -func (x Uint32x4) StoreSlice(s []uint32) { - x.Store((*[4]uint32)(s)) -} - -// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s -func LoadUint64x2Slice(s []uint64) Uint64x2 { - return LoadUint64x2((*[2]uint64)(s)) -} - -// StoreSlice stores x into a slice of at least 2 uint64s -func (x Uint64x2) StoreSlice(s []uint64) { - x.Store((*[2]uint64)(s)) -} - -// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s -func LoadFloat32x4Slice(s []float32) Float32x4 { - return LoadFloat32x4((*[4]float32)(s)) -} - -// StoreSlice stores x into a slice of at least 4 float32s -func (x Float32x4) StoreSlice(s []float32) { - x.Store((*[4]float32)(s)) -} - -// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s -func LoadFloat64x2Slice(s []float64) Float64x2 { - return LoadFloat64x2((*[2]float64)(s)) -} - -// StoreSlice stores x into a slice of at least 2 float64s -func (x Float64x2) StoreSlice(s []float64) { - x.Store((*[2]float64)(s)) -} - -// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s -func LoadInt8x32Slice(s []int8) Int8x32 { - return LoadInt8x32((*[32]int8)(s)) -} - -// StoreSlice stores x into a slice of at least 32 int8s -func (x Int8x32) StoreSlice(s []int8) { - x.Store((*[32]int8)(s)) -} - -// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s -func LoadInt16x16Slice(s []int16) Int16x16 { - return LoadInt16x16((*[16]int16)(s)) -} - -// StoreSlice stores x into a slice of at least 16 int16s -func (x Int16x16) StoreSlice(s []int16) { - x.Store((*[16]int16)(s)) -} - -// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s -func LoadInt32x8Slice(s []int32) Int32x8 { - return LoadInt32x8((*[8]int32)(s)) -} - -// StoreSlice stores x into a slice of at least 8 int32s -func (x Int32x8) StoreSlice(s []int32) { - x.Store((*[8]int32)(s)) -} - -// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s -func LoadInt64x4Slice(s []int64) Int64x4 { - return LoadInt64x4((*[4]int64)(s)) -} - -// StoreSlice stores x into a slice of at least 4 int64s -func (x Int64x4) StoreSlice(s []int64) { - x.Store((*[4]int64)(s)) -} - -// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s -func LoadUint8x32Slice(s []uint8) Uint8x32 { - return LoadUint8x32((*[32]uint8)(s)) -} - -// StoreSlice stores x into a slice of at least 32 uint8s -func (x Uint8x32) StoreSlice(s []uint8) { - x.Store((*[32]uint8)(s)) -} - -// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s -func LoadUint16x16Slice(s []uint16) Uint16x16 { - return LoadUint16x16((*[16]uint16)(s)) -} - -// StoreSlice stores x into a slice of at least 16 uint16s -func (x Uint16x16) StoreSlice(s []uint16) { - x.Store((*[16]uint16)(s)) -} - -// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s -func LoadUint32x8Slice(s []uint32) Uint32x8 { - return LoadUint32x8((*[8]uint32)(s)) -} - -// StoreSlice stores x into a slice of at least 8 uint32s -func (x Uint32x8) StoreSlice(s []uint32) { - x.Store((*[8]uint32)(s)) -} - -// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s -func LoadUint64x4Slice(s []uint64) Uint64x4 { - return LoadUint64x4((*[4]uint64)(s)) -} - -// StoreSlice stores x into a slice of at least 4 uint64s -func (x Uint64x4) StoreSlice(s []uint64) { - x.Store((*[4]uint64)(s)) -} - -// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s -func LoadFloat32x8Slice(s []float32) Float32x8 { - return LoadFloat32x8((*[8]float32)(s)) -} - -// StoreSlice stores x into a slice of at least 8 float32s -func (x Float32x8) StoreSlice(s []float32) { - x.Store((*[8]float32)(s)) -} - -// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s -func LoadFloat64x4Slice(s []float64) Float64x4 { - return LoadFloat64x4((*[4]float64)(s)) -} - -// StoreSlice stores x into a slice of at least 4 float64s -func (x Float64x4) StoreSlice(s []float64) { - x.Store((*[4]float64)(s)) -} - -// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s -func LoadInt8x64Slice(s []int8) Int8x64 { - return LoadInt8x64((*[64]int8)(s)) -} - -// StoreSlice stores x into a slice of at least 64 int8s -func (x Int8x64) StoreSlice(s []int8) { - x.Store((*[64]int8)(s)) -} - -// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s -func LoadInt16x32Slice(s []int16) Int16x32 { - return LoadInt16x32((*[32]int16)(s)) -} - -// StoreSlice stores x into a slice of at least 32 int16s -func (x Int16x32) StoreSlice(s []int16) { - x.Store((*[32]int16)(s)) -} - -// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s -func LoadInt32x16Slice(s []int32) Int32x16 { - return LoadInt32x16((*[16]int32)(s)) -} - -// StoreSlice stores x into a slice of at least 16 int32s -func (x Int32x16) StoreSlice(s []int32) { - x.Store((*[16]int32)(s)) -} - -// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s -func LoadInt64x8Slice(s []int64) Int64x8 { - return LoadInt64x8((*[8]int64)(s)) -} - -// StoreSlice stores x into a slice of at least 8 int64s -func (x Int64x8) StoreSlice(s []int64) { - x.Store((*[8]int64)(s)) -} - -// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s -func LoadUint8x64Slice(s []uint8) Uint8x64 { - return LoadUint8x64((*[64]uint8)(s)) -} - -// StoreSlice stores x into a slice of at least 64 uint8s -func (x Uint8x64) StoreSlice(s []uint8) { - x.Store((*[64]uint8)(s)) -} - -// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s -func LoadUint16x32Slice(s []uint16) Uint16x32 { - return LoadUint16x32((*[32]uint16)(s)) -} - -// StoreSlice stores x into a slice of at least 32 uint16s -func (x Uint16x32) StoreSlice(s []uint16) { - x.Store((*[32]uint16)(s)) -} - -// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s -func LoadUint32x16Slice(s []uint32) Uint32x16 { - return LoadUint32x16((*[16]uint32)(s)) -} - -// StoreSlice stores x into a slice of at least 16 uint32s -func (x Uint32x16) StoreSlice(s []uint32) { - x.Store((*[16]uint32)(s)) -} - -// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s -func LoadUint64x8Slice(s []uint64) Uint64x8 { - return LoadUint64x8((*[8]uint64)(s)) -} - -// StoreSlice stores x into a slice of at least 8 uint64s -func (x Uint64x8) StoreSlice(s []uint64) { - x.Store((*[8]uint64)(s)) -} - -// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s -func LoadFloat32x16Slice(s []float32) Float32x16 { - return LoadFloat32x16((*[16]float32)(s)) -} - -// StoreSlice stores x into a slice of at least 16 float32s -func (x Float32x16) StoreSlice(s []float32) { - x.Store((*[16]float32)(s)) -} - -// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s -func LoadFloat64x8Slice(s []float64) Float64x8 { - return LoadFloat64x8((*[8]float64)(s)) -} - -// StoreSlice stores x into a slice of at least 8 float64s -func (x Float64x8) StoreSlice(s []float64) { - x.Store((*[8]float64)(s)) -} - -// LoadInt8x64SlicePart loads a Int8x64 from the slice s. -// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes. -// If s has 64 or more elements, the function is equivalent to LoadInt8x64Slice. -func LoadInt8x64SlicePart(s []int8) Int8x64 { - l := len(s) - if l >= 64 { - return LoadInt8x64Slice(s) - } - if l == 0 { - var x Int8x64 - return x - } - mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) - return LoadMaskedInt8x64(paInt8x64(s), mask) -} - -// StoreSlicePart stores the 64 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 64 or more elements, the method is equivalent to x.StoreSlice. -func (x Int8x64) StoreSlicePart(s []int8) { - l := len(s) - if l >= 64 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) - x.StoreMasked(paInt8x64(s), mask) -} - -// LoadInt16x32SlicePart loads a Int16x32 from the slice s. -// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes. -// If s has 32 or more elements, the function is equivalent to LoadInt16x32Slice. -func LoadInt16x32SlicePart(s []int16) Int16x32 { - l := len(s) - if l >= 32 { - return LoadInt16x32Slice(s) - } - if l == 0 { - var x Int16x32 - return x - } - mask := Mask16x32FromBits(0xffffffff >> (32 - l)) - return LoadMaskedInt16x32(paInt16x32(s), mask) -} - -// StoreSlicePart stores the 32 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 32 or more elements, the method is equivalent to x.StoreSlice. -func (x Int16x32) StoreSlicePart(s []int16) { - l := len(s) - if l >= 32 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask16x32FromBits(0xffffffff >> (32 - l)) - x.StoreMasked(paInt16x32(s), mask) -} - -// LoadInt32x16SlicePart loads a Int32x16 from the slice s. -// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. -// If s has 16 or more elements, the function is equivalent to LoadInt32x16Slice. -func LoadInt32x16SlicePart(s []int32) Int32x16 { - l := len(s) - if l >= 16 { - return LoadInt32x16Slice(s) - } - if l == 0 { - var x Int32x16 - return x - } - mask := Mask32x16FromBits(0xffff >> (16 - l)) - return LoadMaskedInt32x16(paInt32x16(s), mask) -} - -// StoreSlicePart stores the 16 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 16 or more elements, the method is equivalent to x.StoreSlice. -func (x Int32x16) StoreSlicePart(s []int32) { - l := len(s) - if l >= 16 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask32x16FromBits(0xffff >> (16 - l)) - x.StoreMasked(paInt32x16(s), mask) -} - -// LoadInt64x8SlicePart loads a Int64x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadInt64x8Slice. -func LoadInt64x8SlicePart(s []int64) Int64x8 { - l := len(s) - if l >= 8 { - return LoadInt64x8Slice(s) - } - if l == 0 { - var x Int64x8 - return x - } - mask := Mask64x8FromBits(0xff >> (8 - l)) - return LoadMaskedInt64x8(paInt64x8(s), mask) -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Int64x8) StoreSlicePart(s []int64) { - l := len(s) - if l >= 8 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask64x8FromBits(0xff >> (8 - l)) - x.StoreMasked(paInt64x8(s), mask) -} - -// LoadUint8x64SlicePart loads a Uint8x64 from the slice s. -// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes. -// If s has 64 or more elements, the function is equivalent to LoadUint8x64Slice. -func LoadUint8x64SlicePart(s []uint8) Uint8x64 { - l := len(s) - if l >= 64 { - return LoadUint8x64Slice(s) - } - if l == 0 { - var x Uint8x64 - return x - } - mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) - return LoadMaskedUint8x64(paUint8x64(s), mask) -} - -// StoreSlicePart stores the 64 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 64 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint8x64) StoreSlicePart(s []uint8) { - l := len(s) - if l >= 64 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) - x.StoreMasked(paUint8x64(s), mask) -} - -// LoadUint16x32SlicePart loads a Uint16x32 from the slice s. -// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes. -// If s has 32 or more elements, the function is equivalent to LoadUint16x32Slice. -func LoadUint16x32SlicePart(s []uint16) Uint16x32 { - l := len(s) - if l >= 32 { - return LoadUint16x32Slice(s) - } - if l == 0 { - var x Uint16x32 - return x - } - mask := Mask16x32FromBits(0xffffffff >> (32 - l)) - return LoadMaskedUint16x32(paUint16x32(s), mask) -} - -// StoreSlicePart stores the 32 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 32 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint16x32) StoreSlicePart(s []uint16) { - l := len(s) - if l >= 32 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask16x32FromBits(0xffffffff >> (32 - l)) - x.StoreMasked(paUint16x32(s), mask) -} - -// LoadUint32x16SlicePart loads a Uint32x16 from the slice s. -// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. -// If s has 16 or more elements, the function is equivalent to LoadUint32x16Slice. -func LoadUint32x16SlicePart(s []uint32) Uint32x16 { - l := len(s) - if l >= 16 { - return LoadUint32x16Slice(s) - } - if l == 0 { - var x Uint32x16 - return x - } - mask := Mask32x16FromBits(0xffff >> (16 - l)) - return LoadMaskedUint32x16(paUint32x16(s), mask) -} - -// StoreSlicePart stores the 16 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 16 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint32x16) StoreSlicePart(s []uint32) { - l := len(s) - if l >= 16 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask32x16FromBits(0xffff >> (16 - l)) - x.StoreMasked(paUint32x16(s), mask) -} - -// LoadUint64x8SlicePart loads a Uint64x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadUint64x8Slice. -func LoadUint64x8SlicePart(s []uint64) Uint64x8 { - l := len(s) - if l >= 8 { - return LoadUint64x8Slice(s) - } - if l == 0 { - var x Uint64x8 - return x - } - mask := Mask64x8FromBits(0xff >> (8 - l)) - return LoadMaskedUint64x8(paUint64x8(s), mask) -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint64x8) StoreSlicePart(s []uint64) { - l := len(s) - if l >= 8 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask64x8FromBits(0xff >> (8 - l)) - x.StoreMasked(paUint64x8(s), mask) -} - -// LoadFloat32x16SlicePart loads a Float32x16 from the slice s. -// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. -// If s has 16 or more elements, the function is equivalent to LoadFloat32x16Slice. -func LoadFloat32x16SlicePart(s []float32) Float32x16 { - l := len(s) - if l >= 16 { - return LoadFloat32x16Slice(s) - } - if l == 0 { - var x Float32x16 - return x - } - mask := Mask32x16FromBits(0xffff >> (16 - l)) - return LoadMaskedFloat32x16(paFloat32x16(s), mask) -} - -// StoreSlicePart stores the 16 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 16 or more elements, the method is equivalent to x.StoreSlice. -func (x Float32x16) StoreSlicePart(s []float32) { - l := len(s) - if l >= 16 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask32x16FromBits(0xffff >> (16 - l)) - x.StoreMasked(paFloat32x16(s), mask) -} - -// LoadFloat64x8SlicePart loads a Float64x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadFloat64x8Slice. -func LoadFloat64x8SlicePart(s []float64) Float64x8 { - l := len(s) - if l >= 8 { - return LoadFloat64x8Slice(s) - } - if l == 0 { - var x Float64x8 - return x - } - mask := Mask64x8FromBits(0xff >> (8 - l)) - return LoadMaskedFloat64x8(paFloat64x8(s), mask) -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Float64x8) StoreSlicePart(s []float64) { - l := len(s) - if l >= 8 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := Mask64x8FromBits(0xff >> (8 - l)) - x.StoreMasked(paFloat64x8(s), mask) -} - -// LoadInt32x4SlicePart loads a Int32x4 from the slice s. -// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. -// If s has 4 or more elements, the function is equivalent to LoadInt32x4Slice. -func LoadInt32x4SlicePart(s []int32) Int32x4 { - l := len(s) - if l >= 4 { - return LoadInt32x4Slice(s) - } - if l == 0 { - var x Int32x4 - return x - } - mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) -} - -// StoreSlicePart stores the 4 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 4 or more elements, the method is equivalent to x.StoreSlice. -func (x Int32x4) StoreSlicePart(s []int32) { - l := len(s) - if l >= 4 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) -} - -// LoadInt64x2SlicePart loads a Int64x2 from the slice s. -// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. -// If s has 2 or more elements, the function is equivalent to LoadInt64x2Slice. -func LoadInt64x2SlicePart(s []int64) Int64x2 { - l := len(s) - if l >= 2 { - return LoadInt64x2Slice(s) - } - if l == 0 { - var x Int64x2 - return x - } - mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedInt64x2(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) -} - -// StoreSlicePart stores the 2 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 2 or more elements, the method is equivalent to x.StoreSlice. -func (x Int64x2) StoreSlicePart(s []int64) { - l := len(s) - if l >= 2 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) -} - -// LoadUint32x4SlicePart loads a Uint32x4 from the slice s. -// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. -// If s has 4 or more elements, the function is equivalent to LoadUint32x4Slice. -func LoadUint32x4SlicePart(s []uint32) Uint32x4 { - l := len(s) - if l >= 4 { - return LoadUint32x4Slice(s) - } - if l == 0 { - var x Uint32x4 - return x - } - mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedUint32x4(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) -} - -// StoreSlicePart stores the 4 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 4 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint32x4) StoreSlicePart(s []uint32) { - l := len(s) - if l >= 4 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) -} - -// LoadUint64x2SlicePart loads a Uint64x2 from the slice s. -// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. -// If s has 2 or more elements, the function is equivalent to LoadUint64x2Slice. -func LoadUint64x2SlicePart(s []uint64) Uint64x2 { - l := len(s) - if l >= 2 { - return LoadUint64x2Slice(s) - } - if l == 0 { - var x Uint64x2 - return x - } - mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedUint64x2(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) -} - -// StoreSlicePart stores the 2 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 2 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint64x2) StoreSlicePart(s []uint64) { - l := len(s) - if l >= 2 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) -} - -// LoadFloat32x4SlicePart loads a Float32x4 from the slice s. -// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. -// If s has 4 or more elements, the function is equivalent to LoadFloat32x4Slice. -func LoadFloat32x4SlicePart(s []float32) Float32x4 { - l := len(s) - if l >= 4 { - return LoadFloat32x4Slice(s) - } - if l == 0 { - var x Float32x4 - return x - } - mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedFloat32x4(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) -} - -// StoreSlicePart stores the 4 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 4 or more elements, the method is equivalent to x.StoreSlice. -func (x Float32x4) StoreSlicePart(s []float32) { - l := len(s) - if l >= 4 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) -} - -// LoadFloat64x2SlicePart loads a Float64x2 from the slice s. -// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. -// If s has 2 or more elements, the function is equivalent to LoadFloat64x2Slice. -func LoadFloat64x2SlicePart(s []float64) Float64x2 { - l := len(s) - if l >= 2 { - return LoadFloat64x2Slice(s) - } - if l == 0 { - var x Float64x2 - return x - } - mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedFloat64x2(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) -} - -// StoreSlicePart stores the 2 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 2 or more elements, the method is equivalent to x.StoreSlice. -func (x Float64x2) StoreSlicePart(s []float64) { - l := len(s) - if l >= 2 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) -} - -// LoadInt32x8SlicePart loads a Int32x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadInt32x8Slice. -func LoadInt32x8SlicePart(s []int32) Int32x8 { - l := len(s) - if l >= 8 { - return LoadInt32x8Slice(s) - } - if l == 0 { - var x Int32x8 - return x - } - mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedInt32x8(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Int32x8) StoreSlicePart(s []int32) { - l := len(s) - if l >= 8 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) -} - -// LoadInt64x4SlicePart loads a Int64x4 from the slice s. -// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. -// If s has 4 or more elements, the function is equivalent to LoadInt64x4Slice. -func LoadInt64x4SlicePart(s []int64) Int64x4 { - l := len(s) - if l >= 4 { - return LoadInt64x4Slice(s) - } - if l == 0 { - var x Int64x4 - return x - } - mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedInt64x4(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) -} - -// StoreSlicePart stores the 4 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 4 or more elements, the method is equivalent to x.StoreSlice. -func (x Int64x4) StoreSlicePart(s []int64) { - l := len(s) - if l >= 4 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) -} - -// LoadUint32x8SlicePart loads a Uint32x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadUint32x8Slice. -func LoadUint32x8SlicePart(s []uint32) Uint32x8 { - l := len(s) - if l >= 8 { - return LoadUint32x8Slice(s) - } - if l == 0 { - var x Uint32x8 - return x - } - mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedUint32x8(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint32x8) StoreSlicePart(s []uint32) { - l := len(s) - if l >= 8 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) -} - -// LoadUint64x4SlicePart loads a Uint64x4 from the slice s. -// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. -// If s has 4 or more elements, the function is equivalent to LoadUint64x4Slice. -func LoadUint64x4SlicePart(s []uint64) Uint64x4 { - l := len(s) - if l >= 4 { - return LoadUint64x4Slice(s) - } - if l == 0 { - var x Uint64x4 - return x - } - mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedUint64x4(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) -} - -// StoreSlicePart stores the 4 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 4 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint64x4) StoreSlicePart(s []uint64) { - l := len(s) - if l >= 4 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) -} - -// LoadFloat32x8SlicePart loads a Float32x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadFloat32x8Slice. -func LoadFloat32x8SlicePart(s []float32) Float32x8 { - l := len(s) - if l >= 8 { - return LoadFloat32x8Slice(s) - } - if l == 0 { - var x Float32x8 - return x - } - mask := vecMask32[len(vecMask32)/2-l:] - return LoadMaskedFloat32x8(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Float32x8) StoreSlicePart(s []float32) { - l := len(s) - if l >= 8 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask32[len(vecMask32)/2-l:] - x.StoreMasked(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) -} - -// LoadFloat64x4SlicePart loads a Float64x4 from the slice s. -// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. -// If s has 4 or more elements, the function is equivalent to LoadFloat64x4Slice. -func LoadFloat64x4SlicePart(s []float64) Float64x4 { - l := len(s) - if l >= 4 { - return LoadFloat64x4Slice(s) - } - if l == 0 { - var x Float64x4 - return x - } - mask := vecMask64[len(vecMask64)/2-l:] - return LoadMaskedFloat64x4(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) -} - -// StoreSlicePart stores the 4 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 4 or more elements, the method is equivalent to x.StoreSlice. -func (x Float64x4) StoreSlicePart(s []float64) { - l := len(s) - if l >= 4 { - x.StoreSlice(s) - return - } - if l == 0 { - return - } - mask := vecMask64[len(vecMask64)/2-l:] - x.StoreMasked(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) -} - -// LoadUint8x16SlicePart loads a Uint8x16 from the slice s. -// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. -// If s has 16 or more elements, the function is equivalent to LoadUint8x16Slice. -func LoadUint8x16SlicePart(s []uint8) Uint8x16 { - if len(s) == 0 { - var zero Uint8x16 - return zero - } - t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) - return LoadInt8x16SlicePart(t).AsUint8x16() -} - -// StoreSlicePart stores the 16 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 16 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint8x16) StoreSlicePart(s []uint8) { - if len(s) == 0 { - return - } - t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) - x.AsInt8x16().StoreSlicePart(t) -} - -// LoadUint16x8SlicePart loads a Uint16x8 from the slice s. -// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. -// If s has 8 or more elements, the function is equivalent to LoadUint16x8Slice. -func LoadUint16x8SlicePart(s []uint16) Uint16x8 { - if len(s) == 0 { - var zero Uint16x8 - return zero - } - t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) - return LoadInt16x8SlicePart(t).AsUint16x8() -} - -// StoreSlicePart stores the 8 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 8 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint16x8) StoreSlicePart(s []uint16) { - if len(s) == 0 { - return - } - t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) - x.AsInt16x8().StoreSlicePart(t) -} - -// LoadUint8x32SlicePart loads a Uint8x32 from the slice s. -// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes. -// If s has 32 or more elements, the function is equivalent to LoadUint8x32Slice. -func LoadUint8x32SlicePart(s []uint8) Uint8x32 { - if len(s) == 0 { - var zero Uint8x32 - return zero - } - t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) - return LoadInt8x32SlicePart(t).AsUint8x32() -} - -// StoreSlicePart stores the 32 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 32 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint8x32) StoreSlicePart(s []uint8) { - if len(s) == 0 { - return - } - t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) - x.AsInt8x32().StoreSlicePart(t) -} - -// LoadUint16x16SlicePart loads a Uint16x16 from the slice s. -// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. -// If s has 16 or more elements, the function is equivalent to LoadUint16x16Slice. -func LoadUint16x16SlicePart(s []uint16) Uint16x16 { - if len(s) == 0 { - var zero Uint16x16 - return zero - } - t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) - return LoadInt16x16SlicePart(t).AsUint16x16() -} - -// StoreSlicePart stores the 16 elements of x into the slice s. -// It stores as many elements as will fit in s. -// If s has 16 or more elements, the method is equivalent to x.StoreSlice. -func (x Uint16x16) StoreSlicePart(s []uint16) { - if len(s) == 0 { - return - } - t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) - x.AsInt16x16().StoreSlicePart(t) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int8x16) Masked(mask Mask8x16) Int8x16 { - im := mask.AsInt8x16() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int8x16) Merge(y Int8x16, mask Mask8x16) Int8x16 { - im := mask.AsInt8x16() - return y.blend(x, im) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int16x8) Masked(mask Mask16x8) Int16x8 { - im := mask.AsInt16x8() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 { - im := mask.AsInt16x8().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsInt16x8() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int32x4) Masked(mask Mask32x4) Int32x4 { - im := mask.AsInt32x4() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 { - im := mask.AsInt32x4().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsInt32x4() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int64x2) Masked(mask Mask64x2) Int64x2 { - im := mask.AsInt64x2() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 { - im := mask.AsInt64x2().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsInt64x2() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint8x16) Masked(mask Mask8x16) Uint8x16 { - im := mask.AsInt8x16() - return x.AsInt8x16().And(im).AsUint8x16() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 { - im := mask.AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsUint8x16() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint16x8) Masked(mask Mask16x8) Uint16x8 { - im := mask.AsInt16x8() - return x.AsInt16x8().And(im).AsUint16x8() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 { - im := mask.AsInt16x8().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsUint16x8() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint32x4) Masked(mask Mask32x4) Uint32x4 { - im := mask.AsInt32x4() - return x.AsInt32x4().And(im).AsUint32x4() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 { - im := mask.AsInt32x4().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsUint32x4() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint64x2) Masked(mask Mask64x2) Uint64x2 { - im := mask.AsInt64x2() - return x.AsInt64x2().And(im).AsUint64x2() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 { - im := mask.AsInt64x2().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsUint64x2() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Float32x4) Masked(mask Mask32x4) Float32x4 { - im := mask.AsInt32x4() - return x.AsInt32x4().And(im).AsFloat32x4() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 { - im := mask.AsInt32x4().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsFloat32x4() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Float64x2) Masked(mask Mask64x2) Float64x2 { - im := mask.AsInt64x2() - return x.AsInt64x2().And(im).AsFloat64x2() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 { - im := mask.AsInt64x2().AsInt8x16() - ix := x.AsInt8x16() - iy := y.AsInt8x16() - return iy.blend(ix, im).AsFloat64x2() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int8x32) Masked(mask Mask8x32) Int8x32 { - im := mask.AsInt8x32() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int8x32) Merge(y Int8x32, mask Mask8x32) Int8x32 { - im := mask.AsInt8x32() - return y.blend(x, im) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int16x16) Masked(mask Mask16x16) Int16x16 { - im := mask.AsInt16x16() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 { - im := mask.AsInt16x16().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsInt16x16() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int32x8) Masked(mask Mask32x8) Int32x8 { - im := mask.AsInt32x8() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 { - im := mask.AsInt32x8().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsInt32x8() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int64x4) Masked(mask Mask64x4) Int64x4 { - im := mask.AsInt64x4() - return im.And(x) -} - -// Merge returns x but with elements set to y where mask is false. -func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 { - im := mask.AsInt64x4().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsInt64x4() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint8x32) Masked(mask Mask8x32) Uint8x32 { - im := mask.AsInt8x32() - return x.AsInt8x32().And(im).AsUint8x32() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 { - im := mask.AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsUint8x32() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint16x16) Masked(mask Mask16x16) Uint16x16 { - im := mask.AsInt16x16() - return x.AsInt16x16().And(im).AsUint16x16() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 { - im := mask.AsInt16x16().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsUint16x16() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint32x8) Masked(mask Mask32x8) Uint32x8 { - im := mask.AsInt32x8() - return x.AsInt32x8().And(im).AsUint32x8() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 { - im := mask.AsInt32x8().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsUint32x8() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint64x4) Masked(mask Mask64x4) Uint64x4 { - im := mask.AsInt64x4() - return x.AsInt64x4().And(im).AsUint64x4() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 { - im := mask.AsInt64x4().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsUint64x4() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Float32x8) Masked(mask Mask32x8) Float32x8 { - im := mask.AsInt32x8() - return x.AsInt32x8().And(im).AsFloat32x8() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 { - im := mask.AsInt32x8().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsFloat32x8() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Float64x4) Masked(mask Mask64x4) Float64x4 { - im := mask.AsInt64x4() - return x.AsInt64x4().And(im).AsFloat64x4() -} - -// Merge returns x but with elements set to y where mask is false. -func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 { - im := mask.AsInt64x4().AsInt8x32() - ix := x.AsInt8x32() - iy := y.AsInt8x32() - return iy.blend(ix, im).AsFloat64x4() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int8x64) Masked(mask Mask8x64) Int8x64 { - im := mask.AsInt8x64() - return im.And(x) -} - -// Merge returns x but with elements set to y where m is false. -func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 { - return y.blendMasked(x, mask) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int16x32) Masked(mask Mask16x32) Int16x32 { - im := mask.AsInt16x32() - return im.And(x) -} - -// Merge returns x but with elements set to y where m is false. -func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 { - return y.blendMasked(x, mask) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int32x16) Masked(mask Mask32x16) Int32x16 { - im := mask.AsInt32x16() - return im.And(x) -} - -// Merge returns x but with elements set to y where m is false. -func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 { - return y.blendMasked(x, mask) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Int64x8) Masked(mask Mask64x8) Int64x8 { - im := mask.AsInt64x8() - return im.And(x) -} - -// Merge returns x but with elements set to y where m is false. -func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 { - return y.blendMasked(x, mask) -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 { - im := mask.AsInt8x64() - return x.AsInt8x64().And(im).AsUint8x64() -} - -// Merge returns x but with elements set to y where m is false. -func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { - ix := x.AsInt8x64() - iy := y.AsInt8x64() - return iy.blendMasked(ix, mask).AsUint8x64() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 { - im := mask.AsInt16x32() - return x.AsInt16x32().And(im).AsUint16x32() -} - -// Merge returns x but with elements set to y where m is false. -func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { - ix := x.AsInt16x32() - iy := y.AsInt16x32() - return iy.blendMasked(ix, mask).AsUint16x32() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 { - im := mask.AsInt32x16() - return x.AsInt32x16().And(im).AsUint32x16() -} - -// Merge returns x but with elements set to y where m is false. -func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { - ix := x.AsInt32x16() - iy := y.AsInt32x16() - return iy.blendMasked(ix, mask).AsUint32x16() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 { - im := mask.AsInt64x8() - return x.AsInt64x8().And(im).AsUint64x8() -} - -// Merge returns x but with elements set to y where m is false. -func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { - ix := x.AsInt64x8() - iy := y.AsInt64x8() - return iy.blendMasked(ix, mask).AsUint64x8() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Float32x16) Masked(mask Mask32x16) Float32x16 { - im := mask.AsInt32x16() - return x.AsInt32x16().And(im).AsFloat32x16() -} - -// Merge returns x but with elements set to y where m is false. -func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { - ix := x.AsInt32x16() - iy := y.AsInt32x16() - return iy.blendMasked(ix, mask).AsFloat32x16() -} - -// Masked returns x but with elements zeroed where mask is false. -func (x Float64x8) Masked(mask Mask64x8) Float64x8 { - im := mask.AsInt64x8() - return x.AsInt64x8().And(im).AsFloat64x8() -} - -// Merge returns x but with elements set to y where m is false. -func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { - ix := x.AsInt64x8() - iy := y.AsInt64x8() - return iy.blendMasked(ix, mask).AsFloat64x8() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Int8x16) Less(y Int8x16) Mask8x16 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { - ones := x.Equal(x).AsInt8x16() - return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { - ones := x.Equal(x).AsInt8x16() - return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Int8x16) NotEqual(y Int8x16) Mask8x16 { - ones := x.Equal(x).AsInt8x16() - return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Int16x8) Less(y Int16x8) Mask16x8 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { - ones := x.Equal(x).AsInt16x8() - return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { - ones := x.Equal(x).AsInt16x8() - return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Int16x8) NotEqual(y Int16x8) Mask16x8 { - ones := x.Equal(x).AsInt16x8() - return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Int32x4) Less(y Int32x4) Mask32x4 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { - ones := x.Equal(x).AsInt32x4() - return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { - ones := x.Equal(x).AsInt32x4() - return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Int32x4) NotEqual(y Int32x4) Mask32x4 { - ones := x.Equal(x).AsInt32x4() - return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Int64x2) Less(y Int64x2) Mask64x2 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { - ones := x.Equal(x).AsInt64x2() - return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { - ones := x.Equal(x).AsInt64x2() - return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Int64x2) NotEqual(y Int64x2) Mask64x2 { - ones := x.Equal(x).AsInt64x2() - return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Int8x32) Less(y Int8x32) Mask8x32 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { - ones := x.Equal(x).AsInt8x32() - return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { - ones := x.Equal(x).AsInt8x32() - return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Int8x32) NotEqual(y Int8x32) Mask8x32 { - ones := x.Equal(x).AsInt8x32() - return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Int16x16) Less(y Int16x16) Mask16x16 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { - ones := x.Equal(x).AsInt16x16() - return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { - ones := x.Equal(x).AsInt16x16() - return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Int16x16) NotEqual(y Int16x16) Mask16x16 { - ones := x.Equal(x).AsInt16x16() - return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Int32x8) Less(y Int32x8) Mask32x8 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { - ones := x.Equal(x).AsInt32x8() - return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { - ones := x.Equal(x).AsInt32x8() - return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Int32x8) NotEqual(y Int32x8) Mask32x8 { - ones := x.Equal(x).AsInt32x8() - return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8() -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Int64x4) Less(y Int64x4) Mask64x4 { - return y.Greater(x) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { - ones := x.Equal(x).AsInt64x4() - return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { - ones := x.Equal(x).AsInt64x4() - return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Int64x4) NotEqual(y Int64x4) Mask64x4 { - ones := x.Equal(x).AsInt64x4() - return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x16) Greater(y Uint8x16) Mask8x16 { - a, b := x.AsInt8x16(), y.AsInt8x16() - signs := BroadcastInt8x16(-1 << (8 - 1)) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x16) Less(y Uint8x16) Mask8x16 { - a, b := x.AsInt8x16(), y.AsInt8x16() - signs := BroadcastInt8x16(-1 << (8 - 1)) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { - a, b := x.AsInt8x16(), y.AsInt8x16() - ones := x.Equal(x).AsInt8x16() - signs := BroadcastInt8x16(-1 << (8 - 1)) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { - a, b := x.AsInt8x16(), y.AsInt8x16() - ones := x.Equal(x).AsInt8x16() - signs := BroadcastInt8x16(-1 << (8 - 1)) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 { - a, b := x.AsInt8x16(), y.AsInt8x16() - ones := x.Equal(x).AsInt8x16() - return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX -func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { - a, b := x.AsInt16x8(), y.AsInt16x8() - ones := x.Equal(x).AsInt16x8() - signs := ones.ShiftAllLeft(16 - 1) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Uint16x8) Less(y Uint16x8) Mask16x8 { - a, b := x.AsInt16x8(), y.AsInt16x8() - ones := x.Equal(x).AsInt16x8() - signs := ones.ShiftAllLeft(16 - 1) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { - a, b := x.AsInt16x8(), y.AsInt16x8() - ones := x.Equal(x).AsInt16x8() - signs := ones.ShiftAllLeft(16 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { - a, b := x.AsInt16x8(), y.AsInt16x8() - ones := x.Equal(x).AsInt16x8() - signs := ones.ShiftAllLeft(16 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 { - a, b := x.AsInt16x8(), y.AsInt16x8() - ones := x.Equal(x).AsInt16x8() - return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX -func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { - a, b := x.AsInt32x4(), y.AsInt32x4() - ones := x.Equal(x).AsInt32x4() - signs := ones.ShiftAllLeft(32 - 1) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Uint32x4) Less(y Uint32x4) Mask32x4 { - a, b := x.AsInt32x4(), y.AsInt32x4() - ones := x.Equal(x).AsInt32x4() - signs := ones.ShiftAllLeft(32 - 1) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { - a, b := x.AsInt32x4(), y.AsInt32x4() - ones := x.Equal(x).AsInt32x4() - signs := ones.ShiftAllLeft(32 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { - a, b := x.AsInt32x4(), y.AsInt32x4() - ones := x.Equal(x).AsInt32x4() - signs := ones.ShiftAllLeft(32 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 { - a, b := x.AsInt32x4(), y.AsInt32x4() - ones := x.Equal(x).AsInt32x4() - return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX -func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { - a, b := x.AsInt64x2(), y.AsInt64x2() - ones := x.Equal(x).AsInt64x2() - signs := ones.ShiftAllLeft(64 - 1) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX -func (x Uint64x2) Less(y Uint64x2) Mask64x2 { - a, b := x.AsInt64x2(), y.AsInt64x2() - ones := x.Equal(x).AsInt64x2() - signs := ones.ShiftAllLeft(64 - 1) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX -func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { - a, b := x.AsInt64x2(), y.AsInt64x2() - ones := x.Equal(x).AsInt64x2() - signs := ones.ShiftAllLeft(64 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX -func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { - a, b := x.AsInt64x2(), y.AsInt64x2() - ones := x.Equal(x).AsInt64x2() - signs := ones.ShiftAllLeft(64 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX -func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 { - a, b := x.AsInt64x2(), y.AsInt64x2() - ones := x.Equal(x).AsInt64x2() - return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x32) Greater(y Uint8x32) Mask8x32 { - a, b := x.AsInt8x32(), y.AsInt8x32() - signs := BroadcastInt8x32(-1 << (8 - 1)) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x32) Less(y Uint8x32) Mask8x32 { - a, b := x.AsInt8x32(), y.AsInt8x32() - signs := BroadcastInt8x32(-1 << (8 - 1)) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { - a, b := x.AsInt8x32(), y.AsInt8x32() - ones := x.Equal(x).AsInt8x32() - signs := BroadcastInt8x32(-1 << (8 - 1)) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { - a, b := x.AsInt8x32(), y.AsInt8x32() - ones := x.Equal(x).AsInt8x32() - signs := BroadcastInt8x32(-1 << (8 - 1)) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 { - a, b := x.AsInt8x32(), y.AsInt8x32() - ones := x.Equal(x).AsInt8x32() - return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX2 -func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { - a, b := x.AsInt16x16(), y.AsInt16x16() - ones := x.Equal(x).AsInt16x16() - signs := ones.ShiftAllLeft(16 - 1) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Uint16x16) Less(y Uint16x16) Mask16x16 { - a, b := x.AsInt16x16(), y.AsInt16x16() - ones := x.Equal(x).AsInt16x16() - signs := ones.ShiftAllLeft(16 - 1) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { - a, b := x.AsInt16x16(), y.AsInt16x16() - ones := x.Equal(x).AsInt16x16() - signs := ones.ShiftAllLeft(16 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { - a, b := x.AsInt16x16(), y.AsInt16x16() - ones := x.Equal(x).AsInt16x16() - signs := ones.ShiftAllLeft(16 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 { - a, b := x.AsInt16x16(), y.AsInt16x16() - ones := x.Equal(x).AsInt16x16() - return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX2 -func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { - a, b := x.AsInt32x8(), y.AsInt32x8() - ones := x.Equal(x).AsInt32x8() - signs := ones.ShiftAllLeft(32 - 1) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Uint32x8) Less(y Uint32x8) Mask32x8 { - a, b := x.AsInt32x8(), y.AsInt32x8() - ones := x.Equal(x).AsInt32x8() - signs := ones.ShiftAllLeft(32 - 1) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { - a, b := x.AsInt32x8(), y.AsInt32x8() - ones := x.Equal(x).AsInt32x8() - signs := ones.ShiftAllLeft(32 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { - a, b := x.AsInt32x8(), y.AsInt32x8() - ones := x.Equal(x).AsInt32x8() - signs := ones.ShiftAllLeft(32 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 { - a, b := x.AsInt32x8(), y.AsInt32x8() - ones := x.Equal(x).AsInt32x8() - return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8() -} - -// Greater returns a mask whose elements indicate whether x > y -// -// Emulated, CPU Feature AVX2 -func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { - a, b := x.AsInt64x4(), y.AsInt64x4() - ones := x.Equal(x).AsInt64x4() - signs := ones.ShiftAllLeft(64 - 1) - return a.Xor(signs).Greater(b.Xor(signs)) -} - -// Less returns a mask whose elements indicate whether x < y -// -// Emulated, CPU Feature AVX2 -func (x Uint64x4) Less(y Uint64x4) Mask64x4 { - a, b := x.AsInt64x4(), y.AsInt64x4() - ones := x.Equal(x).AsInt64x4() - signs := ones.ShiftAllLeft(64 - 1) - return b.Xor(signs).Greater(a.Xor(signs)) -} - -// GreaterEqual returns a mask whose elements indicate whether x >= y -// -// Emulated, CPU Feature AVX2 -func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { - a, b := x.AsInt64x4(), y.AsInt64x4() - ones := x.Equal(x).AsInt64x4() - signs := ones.ShiftAllLeft(64 - 1) - return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() -} - -// LessEqual returns a mask whose elements indicate whether x <= y -// -// Emulated, CPU Feature AVX2 -func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { - a, b := x.AsInt64x4(), y.AsInt64x4() - ones := x.Equal(x).AsInt64x4() - signs := ones.ShiftAllLeft(64 - 1) - return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() -} - -// NotEqual returns a mask whose elements indicate whether x != y -// -// Emulated, CPU Feature AVX2 -func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 { - a, b := x.AsInt64x4(), y.AsInt64x4() - ones := x.Equal(x).AsInt64x4() - return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4() -} - -// BroadcastInt8x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt8x16(x int8) Int8x16 { - var z Int8x16 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastInt16x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt16x8(x int16) Int16x8 { - var z Int16x8 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastInt32x4 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt32x4(x int32) Int32x4 { - var z Int32x4 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastInt64x2 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt64x2(x int64) Int64x2 { - var z Int64x2 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastUint8x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint8x16(x uint8) Uint8x16 { - var z Uint8x16 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastUint16x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint16x8(x uint16) Uint16x8 { - var z Uint16x8 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastUint32x4 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint32x4(x uint32) Uint32x4 { - var z Uint32x4 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastUint64x2 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint64x2(x uint64) Uint64x2 { - var z Uint64x2 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastFloat32x4 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastFloat32x4(x float32) Float32x4 { - var z Float32x4 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastFloat64x2 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastFloat64x2(x float64) Float64x2 { - var z Float64x2 - return z.SetElem(0, x).Broadcast128() -} - -// BroadcastInt8x32 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt8x32(x int8) Int8x32 { - var z Int8x16 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastInt16x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt16x16(x int16) Int16x16 { - var z Int16x8 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastInt32x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt32x8(x int32) Int32x8 { - var z Int32x4 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastInt64x4 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastInt64x4(x int64) Int64x4 { - var z Int64x2 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastUint8x32 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint8x32(x uint8) Uint8x32 { - var z Uint8x16 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastUint16x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint16x16(x uint16) Uint16x16 { - var z Uint16x8 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastUint32x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint32x8(x uint32) Uint32x8 { - var z Uint32x4 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastUint64x4 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastUint64x4(x uint64) Uint64x4 { - var z Uint64x2 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastFloat32x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastFloat32x8(x float32) Float32x8 { - var z Float32x4 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastFloat64x4 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX2 -func BroadcastFloat64x4(x float64) Float64x4 { - var z Float64x2 - return z.SetElem(0, x).Broadcast256() -} - -// BroadcastInt8x64 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512BW -func BroadcastInt8x64(x int8) Int8x64 { - var z Int8x16 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastInt16x32 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512BW -func BroadcastInt16x32(x int16) Int16x32 { - var z Int16x8 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastInt32x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512F -func BroadcastInt32x16(x int32) Int32x16 { - var z Int32x4 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastInt64x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512F -func BroadcastInt64x8(x int64) Int64x8 { - var z Int64x2 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastUint8x64 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512BW -func BroadcastUint8x64(x uint8) Uint8x64 { - var z Uint8x16 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastUint16x32 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512BW -func BroadcastUint16x32(x uint16) Uint16x32 { - var z Uint16x8 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastUint32x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512F -func BroadcastUint32x16(x uint32) Uint32x16 { - var z Uint32x4 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastUint64x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512F -func BroadcastUint64x8(x uint64) Uint64x8 { - var z Uint64x2 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastFloat32x16 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512F -func BroadcastFloat32x16(x float32) Float32x16 { - var z Float32x4 - return z.SetElem(0, x).Broadcast512() -} - -// BroadcastFloat64x8 returns a vector with the input -// x assigned to all elements of the output. -// -// Emulated, CPU Feature AVX512F -func BroadcastFloat64x8(x float64) Float64x8 { - var z Float64x2 - return z.SetElem(0, x).Broadcast512() -} diff --git a/src/simd/slice_gen_amd64.go b/src/simd/slice_gen_amd64.go new file mode 100644 index 00000000000..45e95be9bf9 --- /dev/null +++ b/src/simd/slice_gen_amd64.go @@ -0,0 +1,1103 @@ +// Code generated by 'go run genfiles.go'; DO NOT EDIT. + +//go:build goexperiment.simd + +package simd + +import "unsafe" + +// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s +func LoadInt8x16Slice(s []int8) Int8x16 { + return LoadInt8x16((*[16]int8)(s)) +} + +// StoreSlice stores x into a slice of at least 16 int8s +func (x Int8x16) StoreSlice(s []int8) { + x.Store((*[16]int8)(s)) +} + +// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s +func LoadInt16x8Slice(s []int16) Int16x8 { + return LoadInt16x8((*[8]int16)(s)) +} + +// StoreSlice stores x into a slice of at least 8 int16s +func (x Int16x8) StoreSlice(s []int16) { + x.Store((*[8]int16)(s)) +} + +// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s +func LoadInt32x4Slice(s []int32) Int32x4 { + return LoadInt32x4((*[4]int32)(s)) +} + +// StoreSlice stores x into a slice of at least 4 int32s +func (x Int32x4) StoreSlice(s []int32) { + x.Store((*[4]int32)(s)) +} + +// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s +func LoadInt64x2Slice(s []int64) Int64x2 { + return LoadInt64x2((*[2]int64)(s)) +} + +// StoreSlice stores x into a slice of at least 2 int64s +func (x Int64x2) StoreSlice(s []int64) { + x.Store((*[2]int64)(s)) +} + +// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s +func LoadUint8x16Slice(s []uint8) Uint8x16 { + return LoadUint8x16((*[16]uint8)(s)) +} + +// StoreSlice stores x into a slice of at least 16 uint8s +func (x Uint8x16) StoreSlice(s []uint8) { + x.Store((*[16]uint8)(s)) +} + +// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s +func LoadUint16x8Slice(s []uint16) Uint16x8 { + return LoadUint16x8((*[8]uint16)(s)) +} + +// StoreSlice stores x into a slice of at least 8 uint16s +func (x Uint16x8) StoreSlice(s []uint16) { + x.Store((*[8]uint16)(s)) +} + +// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s +func LoadUint32x4Slice(s []uint32) Uint32x4 { + return LoadUint32x4((*[4]uint32)(s)) +} + +// StoreSlice stores x into a slice of at least 4 uint32s +func (x Uint32x4) StoreSlice(s []uint32) { + x.Store((*[4]uint32)(s)) +} + +// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s +func LoadUint64x2Slice(s []uint64) Uint64x2 { + return LoadUint64x2((*[2]uint64)(s)) +} + +// StoreSlice stores x into a slice of at least 2 uint64s +func (x Uint64x2) StoreSlice(s []uint64) { + x.Store((*[2]uint64)(s)) +} + +// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s +func LoadFloat32x4Slice(s []float32) Float32x4 { + return LoadFloat32x4((*[4]float32)(s)) +} + +// StoreSlice stores x into a slice of at least 4 float32s +func (x Float32x4) StoreSlice(s []float32) { + x.Store((*[4]float32)(s)) +} + +// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s +func LoadFloat64x2Slice(s []float64) Float64x2 { + return LoadFloat64x2((*[2]float64)(s)) +} + +// StoreSlice stores x into a slice of at least 2 float64s +func (x Float64x2) StoreSlice(s []float64) { + x.Store((*[2]float64)(s)) +} + +// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s +func LoadInt8x32Slice(s []int8) Int8x32 { + return LoadInt8x32((*[32]int8)(s)) +} + +// StoreSlice stores x into a slice of at least 32 int8s +func (x Int8x32) StoreSlice(s []int8) { + x.Store((*[32]int8)(s)) +} + +// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s +func LoadInt16x16Slice(s []int16) Int16x16 { + return LoadInt16x16((*[16]int16)(s)) +} + +// StoreSlice stores x into a slice of at least 16 int16s +func (x Int16x16) StoreSlice(s []int16) { + x.Store((*[16]int16)(s)) +} + +// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s +func LoadInt32x8Slice(s []int32) Int32x8 { + return LoadInt32x8((*[8]int32)(s)) +} + +// StoreSlice stores x into a slice of at least 8 int32s +func (x Int32x8) StoreSlice(s []int32) { + x.Store((*[8]int32)(s)) +} + +// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s +func LoadInt64x4Slice(s []int64) Int64x4 { + return LoadInt64x4((*[4]int64)(s)) +} + +// StoreSlice stores x into a slice of at least 4 int64s +func (x Int64x4) StoreSlice(s []int64) { + x.Store((*[4]int64)(s)) +} + +// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s +func LoadUint8x32Slice(s []uint8) Uint8x32 { + return LoadUint8x32((*[32]uint8)(s)) +} + +// StoreSlice stores x into a slice of at least 32 uint8s +func (x Uint8x32) StoreSlice(s []uint8) { + x.Store((*[32]uint8)(s)) +} + +// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s +func LoadUint16x16Slice(s []uint16) Uint16x16 { + return LoadUint16x16((*[16]uint16)(s)) +} + +// StoreSlice stores x into a slice of at least 16 uint16s +func (x Uint16x16) StoreSlice(s []uint16) { + x.Store((*[16]uint16)(s)) +} + +// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s +func LoadUint32x8Slice(s []uint32) Uint32x8 { + return LoadUint32x8((*[8]uint32)(s)) +} + +// StoreSlice stores x into a slice of at least 8 uint32s +func (x Uint32x8) StoreSlice(s []uint32) { + x.Store((*[8]uint32)(s)) +} + +// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s +func LoadUint64x4Slice(s []uint64) Uint64x4 { + return LoadUint64x4((*[4]uint64)(s)) +} + +// StoreSlice stores x into a slice of at least 4 uint64s +func (x Uint64x4) StoreSlice(s []uint64) { + x.Store((*[4]uint64)(s)) +} + +// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s +func LoadFloat32x8Slice(s []float32) Float32x8 { + return LoadFloat32x8((*[8]float32)(s)) +} + +// StoreSlice stores x into a slice of at least 8 float32s +func (x Float32x8) StoreSlice(s []float32) { + x.Store((*[8]float32)(s)) +} + +// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s +func LoadFloat64x4Slice(s []float64) Float64x4 { + return LoadFloat64x4((*[4]float64)(s)) +} + +// StoreSlice stores x into a slice of at least 4 float64s +func (x Float64x4) StoreSlice(s []float64) { + x.Store((*[4]float64)(s)) +} + +// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s +func LoadInt8x64Slice(s []int8) Int8x64 { + return LoadInt8x64((*[64]int8)(s)) +} + +// StoreSlice stores x into a slice of at least 64 int8s +func (x Int8x64) StoreSlice(s []int8) { + x.Store((*[64]int8)(s)) +} + +// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s +func LoadInt16x32Slice(s []int16) Int16x32 { + return LoadInt16x32((*[32]int16)(s)) +} + +// StoreSlice stores x into a slice of at least 32 int16s +func (x Int16x32) StoreSlice(s []int16) { + x.Store((*[32]int16)(s)) +} + +// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s +func LoadInt32x16Slice(s []int32) Int32x16 { + return LoadInt32x16((*[16]int32)(s)) +} + +// StoreSlice stores x into a slice of at least 16 int32s +func (x Int32x16) StoreSlice(s []int32) { + x.Store((*[16]int32)(s)) +} + +// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s +func LoadInt64x8Slice(s []int64) Int64x8 { + return LoadInt64x8((*[8]int64)(s)) +} + +// StoreSlice stores x into a slice of at least 8 int64s +func (x Int64x8) StoreSlice(s []int64) { + x.Store((*[8]int64)(s)) +} + +// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s +func LoadUint8x64Slice(s []uint8) Uint8x64 { + return LoadUint8x64((*[64]uint8)(s)) +} + +// StoreSlice stores x into a slice of at least 64 uint8s +func (x Uint8x64) StoreSlice(s []uint8) { + x.Store((*[64]uint8)(s)) +} + +// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s +func LoadUint16x32Slice(s []uint16) Uint16x32 { + return LoadUint16x32((*[32]uint16)(s)) +} + +// StoreSlice stores x into a slice of at least 32 uint16s +func (x Uint16x32) StoreSlice(s []uint16) { + x.Store((*[32]uint16)(s)) +} + +// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s +func LoadUint32x16Slice(s []uint32) Uint32x16 { + return LoadUint32x16((*[16]uint32)(s)) +} + +// StoreSlice stores x into a slice of at least 16 uint32s +func (x Uint32x16) StoreSlice(s []uint32) { + x.Store((*[16]uint32)(s)) +} + +// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s +func LoadUint64x8Slice(s []uint64) Uint64x8 { + return LoadUint64x8((*[8]uint64)(s)) +} + +// StoreSlice stores x into a slice of at least 8 uint64s +func (x Uint64x8) StoreSlice(s []uint64) { + x.Store((*[8]uint64)(s)) +} + +// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s +func LoadFloat32x16Slice(s []float32) Float32x16 { + return LoadFloat32x16((*[16]float32)(s)) +} + +// StoreSlice stores x into a slice of at least 16 float32s +func (x Float32x16) StoreSlice(s []float32) { + x.Store((*[16]float32)(s)) +} + +// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s +func LoadFloat64x8Slice(s []float64) Float64x8 { + return LoadFloat64x8((*[8]float64)(s)) +} + +// StoreSlice stores x into a slice of at least 8 float64s +func (x Float64x8) StoreSlice(s []float64) { + x.Store((*[8]float64)(s)) +} + +// LoadInt8x64SlicePart loads a Int8x64 from the slice s. +// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes. +// If s has 64 or more elements, the function is equivalent to LoadInt8x64Slice. +func LoadInt8x64SlicePart(s []int8) Int8x64 { + l := len(s) + if l >= 64 { + return LoadInt8x64Slice(s) + } + if l == 0 { + var x Int8x64 + return x + } + mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) + return LoadMaskedInt8x64(paInt8x64(s), mask) +} + +// StoreSlicePart stores the 64 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 64 or more elements, the method is equivalent to x.StoreSlice. +func (x Int8x64) StoreSlicePart(s []int8) { + l := len(s) + if l >= 64 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) + x.StoreMasked(paInt8x64(s), mask) +} + +// LoadInt16x32SlicePart loads a Int16x32 from the slice s. +// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes. +// If s has 32 or more elements, the function is equivalent to LoadInt16x32Slice. +func LoadInt16x32SlicePart(s []int16) Int16x32 { + l := len(s) + if l >= 32 { + return LoadInt16x32Slice(s) + } + if l == 0 { + var x Int16x32 + return x + } + mask := Mask16x32FromBits(0xffffffff >> (32 - l)) + return LoadMaskedInt16x32(paInt16x32(s), mask) +} + +// StoreSlicePart stores the 32 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 32 or more elements, the method is equivalent to x.StoreSlice. +func (x Int16x32) StoreSlicePart(s []int16) { + l := len(s) + if l >= 32 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask16x32FromBits(0xffffffff >> (32 - l)) + x.StoreMasked(paInt16x32(s), mask) +} + +// LoadInt32x16SlicePart loads a Int32x16 from the slice s. +// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. +// If s has 16 or more elements, the function is equivalent to LoadInt32x16Slice. +func LoadInt32x16SlicePart(s []int32) Int32x16 { + l := len(s) + if l >= 16 { + return LoadInt32x16Slice(s) + } + if l == 0 { + var x Int32x16 + return x + } + mask := Mask32x16FromBits(0xffff >> (16 - l)) + return LoadMaskedInt32x16(paInt32x16(s), mask) +} + +// StoreSlicePart stores the 16 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 16 or more elements, the method is equivalent to x.StoreSlice. +func (x Int32x16) StoreSlicePart(s []int32) { + l := len(s) + if l >= 16 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask32x16FromBits(0xffff >> (16 - l)) + x.StoreMasked(paInt32x16(s), mask) +} + +// LoadInt64x8SlicePart loads a Int64x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadInt64x8Slice. +func LoadInt64x8SlicePart(s []int64) Int64x8 { + l := len(s) + if l >= 8 { + return LoadInt64x8Slice(s) + } + if l == 0 { + var x Int64x8 + return x + } + mask := Mask64x8FromBits(0xff >> (8 - l)) + return LoadMaskedInt64x8(paInt64x8(s), mask) +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Int64x8) StoreSlicePart(s []int64) { + l := len(s) + if l >= 8 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask64x8FromBits(0xff >> (8 - l)) + x.StoreMasked(paInt64x8(s), mask) +} + +// LoadUint8x64SlicePart loads a Uint8x64 from the slice s. +// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes. +// If s has 64 or more elements, the function is equivalent to LoadUint8x64Slice. +func LoadUint8x64SlicePart(s []uint8) Uint8x64 { + l := len(s) + if l >= 64 { + return LoadUint8x64Slice(s) + } + if l == 0 { + var x Uint8x64 + return x + } + mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) + return LoadMaskedUint8x64(paUint8x64(s), mask) +} + +// StoreSlicePart stores the 64 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 64 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint8x64) StoreSlicePart(s []uint8) { + l := len(s) + if l >= 64 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) + x.StoreMasked(paUint8x64(s), mask) +} + +// LoadUint16x32SlicePart loads a Uint16x32 from the slice s. +// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes. +// If s has 32 or more elements, the function is equivalent to LoadUint16x32Slice. +func LoadUint16x32SlicePart(s []uint16) Uint16x32 { + l := len(s) + if l >= 32 { + return LoadUint16x32Slice(s) + } + if l == 0 { + var x Uint16x32 + return x + } + mask := Mask16x32FromBits(0xffffffff >> (32 - l)) + return LoadMaskedUint16x32(paUint16x32(s), mask) +} + +// StoreSlicePart stores the 32 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 32 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint16x32) StoreSlicePart(s []uint16) { + l := len(s) + if l >= 32 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask16x32FromBits(0xffffffff >> (32 - l)) + x.StoreMasked(paUint16x32(s), mask) +} + +// LoadUint32x16SlicePart loads a Uint32x16 from the slice s. +// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. +// If s has 16 or more elements, the function is equivalent to LoadUint32x16Slice. +func LoadUint32x16SlicePart(s []uint32) Uint32x16 { + l := len(s) + if l >= 16 { + return LoadUint32x16Slice(s) + } + if l == 0 { + var x Uint32x16 + return x + } + mask := Mask32x16FromBits(0xffff >> (16 - l)) + return LoadMaskedUint32x16(paUint32x16(s), mask) +} + +// StoreSlicePart stores the 16 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 16 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint32x16) StoreSlicePart(s []uint32) { + l := len(s) + if l >= 16 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask32x16FromBits(0xffff >> (16 - l)) + x.StoreMasked(paUint32x16(s), mask) +} + +// LoadUint64x8SlicePart loads a Uint64x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadUint64x8Slice. +func LoadUint64x8SlicePart(s []uint64) Uint64x8 { + l := len(s) + if l >= 8 { + return LoadUint64x8Slice(s) + } + if l == 0 { + var x Uint64x8 + return x + } + mask := Mask64x8FromBits(0xff >> (8 - l)) + return LoadMaskedUint64x8(paUint64x8(s), mask) +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint64x8) StoreSlicePart(s []uint64) { + l := len(s) + if l >= 8 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask64x8FromBits(0xff >> (8 - l)) + x.StoreMasked(paUint64x8(s), mask) +} + +// LoadFloat32x16SlicePart loads a Float32x16 from the slice s. +// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. +// If s has 16 or more elements, the function is equivalent to LoadFloat32x16Slice. +func LoadFloat32x16SlicePart(s []float32) Float32x16 { + l := len(s) + if l >= 16 { + return LoadFloat32x16Slice(s) + } + if l == 0 { + var x Float32x16 + return x + } + mask := Mask32x16FromBits(0xffff >> (16 - l)) + return LoadMaskedFloat32x16(paFloat32x16(s), mask) +} + +// StoreSlicePart stores the 16 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 16 or more elements, the method is equivalent to x.StoreSlice. +func (x Float32x16) StoreSlicePart(s []float32) { + l := len(s) + if l >= 16 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask32x16FromBits(0xffff >> (16 - l)) + x.StoreMasked(paFloat32x16(s), mask) +} + +// LoadFloat64x8SlicePart loads a Float64x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadFloat64x8Slice. +func LoadFloat64x8SlicePart(s []float64) Float64x8 { + l := len(s) + if l >= 8 { + return LoadFloat64x8Slice(s) + } + if l == 0 { + var x Float64x8 + return x + } + mask := Mask64x8FromBits(0xff >> (8 - l)) + return LoadMaskedFloat64x8(paFloat64x8(s), mask) +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Float64x8) StoreSlicePart(s []float64) { + l := len(s) + if l >= 8 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := Mask64x8FromBits(0xff >> (8 - l)) + x.StoreMasked(paFloat64x8(s), mask) +} + +// LoadInt32x4SlicePart loads a Int32x4 from the slice s. +// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. +// If s has 4 or more elements, the function is equivalent to LoadInt32x4Slice. +func LoadInt32x4SlicePart(s []int32) Int32x4 { + l := len(s) + if l >= 4 { + return LoadInt32x4Slice(s) + } + if l == 0 { + var x Int32x4 + return x + } + mask := vecMask32[len(vecMask32)/2-l:] + return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) +} + +// StoreSlicePart stores the 4 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 4 or more elements, the method is equivalent to x.StoreSlice. +func (x Int32x4) StoreSlicePart(s []int32) { + l := len(s) + if l >= 4 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask32[len(vecMask32)/2-l:] + x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) +} + +// LoadInt64x2SlicePart loads a Int64x2 from the slice s. +// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. +// If s has 2 or more elements, the function is equivalent to LoadInt64x2Slice. +func LoadInt64x2SlicePart(s []int64) Int64x2 { + l := len(s) + if l >= 2 { + return LoadInt64x2Slice(s) + } + if l == 0 { + var x Int64x2 + return x + } + mask := vecMask64[len(vecMask64)/2-l:] + return LoadMaskedInt64x2(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) +} + +// StoreSlicePart stores the 2 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 2 or more elements, the method is equivalent to x.StoreSlice. +func (x Int64x2) StoreSlicePart(s []int64) { + l := len(s) + if l >= 2 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask64[len(vecMask64)/2-l:] + x.StoreMasked(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) +} + +// LoadUint32x4SlicePart loads a Uint32x4 from the slice s. +// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. +// If s has 4 or more elements, the function is equivalent to LoadUint32x4Slice. +func LoadUint32x4SlicePart(s []uint32) Uint32x4 { + l := len(s) + if l >= 4 { + return LoadUint32x4Slice(s) + } + if l == 0 { + var x Uint32x4 + return x + } + mask := vecMask32[len(vecMask32)/2-l:] + return LoadMaskedUint32x4(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) +} + +// StoreSlicePart stores the 4 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 4 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint32x4) StoreSlicePart(s []uint32) { + l := len(s) + if l >= 4 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask32[len(vecMask32)/2-l:] + x.StoreMasked(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) +} + +// LoadUint64x2SlicePart loads a Uint64x2 from the slice s. +// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. +// If s has 2 or more elements, the function is equivalent to LoadUint64x2Slice. +func LoadUint64x2SlicePart(s []uint64) Uint64x2 { + l := len(s) + if l >= 2 { + return LoadUint64x2Slice(s) + } + if l == 0 { + var x Uint64x2 + return x + } + mask := vecMask64[len(vecMask64)/2-l:] + return LoadMaskedUint64x2(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) +} + +// StoreSlicePart stores the 2 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 2 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint64x2) StoreSlicePart(s []uint64) { + l := len(s) + if l >= 2 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask64[len(vecMask64)/2-l:] + x.StoreMasked(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) +} + +// LoadFloat32x4SlicePart loads a Float32x4 from the slice s. +// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. +// If s has 4 or more elements, the function is equivalent to LoadFloat32x4Slice. +func LoadFloat32x4SlicePart(s []float32) Float32x4 { + l := len(s) + if l >= 4 { + return LoadFloat32x4Slice(s) + } + if l == 0 { + var x Float32x4 + return x + } + mask := vecMask32[len(vecMask32)/2-l:] + return LoadMaskedFloat32x4(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) +} + +// StoreSlicePart stores the 4 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 4 or more elements, the method is equivalent to x.StoreSlice. +func (x Float32x4) StoreSlicePart(s []float32) { + l := len(s) + if l >= 4 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask32[len(vecMask32)/2-l:] + x.StoreMasked(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4()) +} + +// LoadFloat64x2SlicePart loads a Float64x2 from the slice s. +// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. +// If s has 2 or more elements, the function is equivalent to LoadFloat64x2Slice. +func LoadFloat64x2SlicePart(s []float64) Float64x2 { + l := len(s) + if l >= 2 { + return LoadFloat64x2Slice(s) + } + if l == 0 { + var x Float64x2 + return x + } + mask := vecMask64[len(vecMask64)/2-l:] + return LoadMaskedFloat64x2(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) +} + +// StoreSlicePart stores the 2 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 2 or more elements, the method is equivalent to x.StoreSlice. +func (x Float64x2) StoreSlicePart(s []float64) { + l := len(s) + if l >= 2 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask64[len(vecMask64)/2-l:] + x.StoreMasked(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2()) +} + +// LoadInt32x8SlicePart loads a Int32x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadInt32x8Slice. +func LoadInt32x8SlicePart(s []int32) Int32x8 { + l := len(s) + if l >= 8 { + return LoadInt32x8Slice(s) + } + if l == 0 { + var x Int32x8 + return x + } + mask := vecMask32[len(vecMask32)/2-l:] + return LoadMaskedInt32x8(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Int32x8) StoreSlicePart(s []int32) { + l := len(s) + if l >= 8 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask32[len(vecMask32)/2-l:] + x.StoreMasked(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) +} + +// LoadInt64x4SlicePart loads a Int64x4 from the slice s. +// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. +// If s has 4 or more elements, the function is equivalent to LoadInt64x4Slice. +func LoadInt64x4SlicePart(s []int64) Int64x4 { + l := len(s) + if l >= 4 { + return LoadInt64x4Slice(s) + } + if l == 0 { + var x Int64x4 + return x + } + mask := vecMask64[len(vecMask64)/2-l:] + return LoadMaskedInt64x4(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) +} + +// StoreSlicePart stores the 4 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 4 or more elements, the method is equivalent to x.StoreSlice. +func (x Int64x4) StoreSlicePart(s []int64) { + l := len(s) + if l >= 4 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask64[len(vecMask64)/2-l:] + x.StoreMasked(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) +} + +// LoadUint32x8SlicePart loads a Uint32x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadUint32x8Slice. +func LoadUint32x8SlicePart(s []uint32) Uint32x8 { + l := len(s) + if l >= 8 { + return LoadUint32x8Slice(s) + } + if l == 0 { + var x Uint32x8 + return x + } + mask := vecMask32[len(vecMask32)/2-l:] + return LoadMaskedUint32x8(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint32x8) StoreSlicePart(s []uint32) { + l := len(s) + if l >= 8 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask32[len(vecMask32)/2-l:] + x.StoreMasked(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) +} + +// LoadUint64x4SlicePart loads a Uint64x4 from the slice s. +// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. +// If s has 4 or more elements, the function is equivalent to LoadUint64x4Slice. +func LoadUint64x4SlicePart(s []uint64) Uint64x4 { + l := len(s) + if l >= 4 { + return LoadUint64x4Slice(s) + } + if l == 0 { + var x Uint64x4 + return x + } + mask := vecMask64[len(vecMask64)/2-l:] + return LoadMaskedUint64x4(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) +} + +// StoreSlicePart stores the 4 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 4 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint64x4) StoreSlicePart(s []uint64) { + l := len(s) + if l >= 4 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask64[len(vecMask64)/2-l:] + x.StoreMasked(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) +} + +// LoadFloat32x8SlicePart loads a Float32x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadFloat32x8Slice. +func LoadFloat32x8SlicePart(s []float32) Float32x8 { + l := len(s) + if l >= 8 { + return LoadFloat32x8Slice(s) + } + if l == 0 { + var x Float32x8 + return x + } + mask := vecMask32[len(vecMask32)/2-l:] + return LoadMaskedFloat32x8(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Float32x8) StoreSlicePart(s []float32) { + l := len(s) + if l >= 8 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask32[len(vecMask32)/2-l:] + x.StoreMasked(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8()) +} + +// LoadFloat64x4SlicePart loads a Float64x4 from the slice s. +// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. +// If s has 4 or more elements, the function is equivalent to LoadFloat64x4Slice. +func LoadFloat64x4SlicePart(s []float64) Float64x4 { + l := len(s) + if l >= 4 { + return LoadFloat64x4Slice(s) + } + if l == 0 { + var x Float64x4 + return x + } + mask := vecMask64[len(vecMask64)/2-l:] + return LoadMaskedFloat64x4(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) +} + +// StoreSlicePart stores the 4 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 4 or more elements, the method is equivalent to x.StoreSlice. +func (x Float64x4) StoreSlicePart(s []float64) { + l := len(s) + if l >= 4 { + x.StoreSlice(s) + return + } + if l == 0 { + return + } + mask := vecMask64[len(vecMask64)/2-l:] + x.StoreMasked(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4()) +} + +// LoadUint8x16SlicePart loads a Uint8x16 from the slice s. +// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. +// If s has 16 or more elements, the function is equivalent to LoadUint8x16Slice. +func LoadUint8x16SlicePart(s []uint8) Uint8x16 { + if len(s) == 0 { + var zero Uint8x16 + return zero + } + t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) + return LoadInt8x16SlicePart(t).AsUint8x16() +} + +// StoreSlicePart stores the 16 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 16 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint8x16) StoreSlicePart(s []uint8) { + if len(s) == 0 { + return + } + t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) + x.AsInt8x16().StoreSlicePart(t) +} + +// LoadUint16x8SlicePart loads a Uint16x8 from the slice s. +// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes. +// If s has 8 or more elements, the function is equivalent to LoadUint16x8Slice. +func LoadUint16x8SlicePart(s []uint16) Uint16x8 { + if len(s) == 0 { + var zero Uint16x8 + return zero + } + t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) + return LoadInt16x8SlicePart(t).AsUint16x8() +} + +// StoreSlicePart stores the 8 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 8 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint16x8) StoreSlicePart(s []uint16) { + if len(s) == 0 { + return + } + t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) + x.AsInt16x8().StoreSlicePart(t) +} + +// LoadUint8x32SlicePart loads a Uint8x32 from the slice s. +// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes. +// If s has 32 or more elements, the function is equivalent to LoadUint8x32Slice. +func LoadUint8x32SlicePart(s []uint8) Uint8x32 { + if len(s) == 0 { + var zero Uint8x32 + return zero + } + t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) + return LoadInt8x32SlicePart(t).AsUint8x32() +} + +// StoreSlicePart stores the 32 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 32 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint8x32) StoreSlicePart(s []uint8) { + if len(s) == 0 { + return + } + t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s)) + x.AsInt8x32().StoreSlicePart(t) +} + +// LoadUint16x16SlicePart loads a Uint16x16 from the slice s. +// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes. +// If s has 16 or more elements, the function is equivalent to LoadUint16x16Slice. +func LoadUint16x16SlicePart(s []uint16) Uint16x16 { + if len(s) == 0 { + var zero Uint16x16 + return zero + } + t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) + return LoadInt16x16SlicePart(t).AsUint16x16() +} + +// StoreSlicePart stores the 16 elements of x into the slice s. +// It stores as many elements as will fit in s. +// If s has 16 or more elements, the method is equivalent to x.StoreSlice. +func (x Uint16x16) StoreSlicePart(s []uint16) { + if len(s) == 0 { + return + } + t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) + x.AsInt16x16().StoreSlicePart(t) +}