mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: Generated code for AVX2 SIMD masked load/store
This adds to the change in the earlier dev.simd CL. Generated by arch/internal/simdgen CL 689276 . Also includes one test for "it at least works once". Change-Id: I44a268cfc3bea06c5522ac2cfa04fe13a833e1dd Reviewed-on: https://go-review.googlesource.com/c/go/+/689335 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
a0b87a7478
commit
acc1492b7d
5 changed files with 279 additions and 10 deletions
|
|
@ -2132,6 +2132,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x4.Store", simdStore(), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Store", simdStore(), sys.AMD64)
|
||||||
addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
|
addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
|
addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedFloat32x4", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedFloat32x8", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedFloat64x2", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float64x2.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedFloat64x4", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float64x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedInt32x4", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedInt32x8", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedInt64x2", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x2.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedInt64x4", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedUint32x4", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedUint32x8", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedUint64x2", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x2.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMaskedUint64x4", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x16.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask8x16.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.AsMask8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int8x16.AsMask8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -206,16 +206,6 @@ func TestPairDotProdAccumulate(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkInt8Slices ensures that b and a are equal, to the end of b.
|
|
||||||
// also serves to use the slices, to prevent accidental optimization.
|
|
||||||
func checkInt8Slices(t *testing.T, a, b []int8) {
|
|
||||||
for i := range b {
|
|
||||||
if a[i] != b[i] {
|
|
||||||
t.Errorf("a and b differ at index %d, a=%d, b=%d", i, a[i], b[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSlicesInt8(t *testing.T) {
|
func TestSlicesInt8(t *testing.T) {
|
||||||
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
||||||
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
|
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,10 @@ func int64atP32(p *int32) *int64 {
|
||||||
return (*int64)(unsafe.Pointer(p))
|
return (*int64)(unsafe.Pointer(p))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func int32atP64(p *int64) *int32 {
|
||||||
|
return (*int32)(unsafe.Pointer(p))
|
||||||
|
}
|
||||||
|
|
||||||
/* unsigned versions of integer slice part loads */
|
/* unsigned versions of integer slice part loads */
|
||||||
|
|
||||||
// LoadUint8x16SlicePart loads a Uint8x16 from the slice s.
|
// LoadUint8x16SlicePart loads a Uint8x16 from the slice s.
|
||||||
|
|
@ -385,3 +389,70 @@ func (x Int16x8) StoreSlicePart(s []int16) {
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var vecMask64 = [16]int64{
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
// paInt32x4 is an unchecked cast from a slice to an
|
||||||
|
// pointer-to-array type, for used in a masked
|
||||||
|
// load/store. In practice, the slice will be too
|
||||||
|
// short, so this has to be unsafe, and its only
|
||||||
|
// use must be with an instruction with masked
|
||||||
|
// load/store effect (including faults).
|
||||||
|
func paInt32x4(s []int32) *[4]int32 {
|
||||||
|
return (*[4]int32)(unsafe.Pointer(&s[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 32 and 64-bit slice-part loads for AVX2 (128 and 256 bit) */
|
||||||
|
|
||||||
|
func LoadInt32x4SlicePart(s []int32) Int32x4 {
|
||||||
|
l := len(s)
|
||||||
|
if l >= 4 {
|
||||||
|
return LoadInt32x4Slice(s)
|
||||||
|
}
|
||||||
|
if l == 0 {
|
||||||
|
var x Int32x4
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
p := int32atP64(&vecMask64[0])
|
||||||
|
mask := unsafe.Slice(p, 32)[16-l:]
|
||||||
|
return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x Int32x4) StoreSlicePart(s []int32) {
|
||||||
|
l := len(s)
|
||||||
|
if l >= 4 {
|
||||||
|
x.StoreSlice(s)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if l == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
p := int32atP64(&vecMask64[0])
|
||||||
|
mask := unsafe.Slice(p, 32)[16-l:]
|
||||||
|
x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
|
||||||
|
}
|
||||||
|
|
||||||
|
// func LoadInt32x8SlicePart(s []int32) Int32x8 {
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func LoadInt64x2SlicePart(s []int64) Int64x2 {
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func LoadInt64x4SlicePart(s []int64) Int64x4 {
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func (x Int32x8) StoreSlicePart(s []int32) {
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func (x Int64x4) StoreSlicePart(s []int64) {
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func (x Int64x8) StoreSlicePart(s []int64) {
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Handle float32, float64, uint32, and uint64 with ugly casts.
|
||||||
|
|
|
||||||
|
|
@ -177,3 +177,43 @@ func TestSlicesPartStoreUint8x32(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSlicePartInt32(t *testing.T) {
|
||||||
|
L := 4
|
||||||
|
c := []int32{1, 2, 3, 4, 5, -1, -1, -1, -1}
|
||||||
|
a := c[:L+1]
|
||||||
|
for i := range a {
|
||||||
|
// Test the load first
|
||||||
|
// e is a partial slice.
|
||||||
|
e := a[i:]
|
||||||
|
v := simd.LoadInt32x4SlicePart(e)
|
||||||
|
// d contains what a ought to contain
|
||||||
|
d := make([]int32, L)
|
||||||
|
for j := 0; j < len(e) && j < len(d); j++ {
|
||||||
|
d[j] = e[j]
|
||||||
|
}
|
||||||
|
|
||||||
|
b := make([]int32, L)
|
||||||
|
v.StoreSlice(b)
|
||||||
|
// test the load
|
||||||
|
checkSlices(t, d, b)
|
||||||
|
|
||||||
|
// Test the store
|
||||||
|
f := make([]int32, L+1)
|
||||||
|
for i := range f {
|
||||||
|
f[i] = 99
|
||||||
|
}
|
||||||
|
|
||||||
|
v.StoreSlicePart(f[:len(e)])
|
||||||
|
if len(e) < len(b) {
|
||||||
|
checkSlices(t, f, b[:len(e)])
|
||||||
|
} else {
|
||||||
|
checkSlices(t, f, b)
|
||||||
|
}
|
||||||
|
for i := len(e); i < len(f); i++ {
|
||||||
|
if f[i] != 99 {
|
||||||
|
t.Errorf("StoreSlicePart altered f[%d], expected 99, saw %d", i, f[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,18 @@ func LoadFloat32x4(y *[4]float32) Float32x4
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Float32x4) Store(y *[4]float32)
|
func (x Float32x4) Store(y *[4]float32)
|
||||||
|
|
||||||
|
// LoadMaskedFloat32x4 loads a Float32x4 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedFloat32x4(y *[4]float32, mask Mask32x4) Float32x4
|
||||||
|
|
||||||
|
// StoreMasked stores a Float32x4 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Float32x4) StoreMasked(y *[4]float32, mask Mask32x4)
|
||||||
|
|
||||||
// Float64x2 is a 128-bit SIMD vector of 2 float64
|
// Float64x2 is a 128-bit SIMD vector of 2 float64
|
||||||
type Float64x2 struct {
|
type Float64x2 struct {
|
||||||
float64x2 v128
|
float64x2 v128
|
||||||
|
|
@ -47,6 +59,18 @@ func LoadFloat64x2(y *[2]float64) Float64x2
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Float64x2) Store(y *[2]float64)
|
func (x Float64x2) Store(y *[2]float64)
|
||||||
|
|
||||||
|
// LoadMaskedFloat64x2 loads a Float64x2 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedFloat64x2(y *[2]float64, mask Mask64x2) Float64x2
|
||||||
|
|
||||||
|
// StoreMasked stores a Float64x2 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Float64x2) StoreMasked(y *[2]float64, mask Mask64x2)
|
||||||
|
|
||||||
// Int8x16 is a 128-bit SIMD vector of 16 int8
|
// Int8x16 is a 128-bit SIMD vector of 16 int8
|
||||||
type Int8x16 struct {
|
type Int8x16 struct {
|
||||||
int8x16 v128
|
int8x16 v128
|
||||||
|
|
@ -104,6 +128,18 @@ func LoadInt32x4(y *[4]int32) Int32x4
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Int32x4) Store(y *[4]int32)
|
func (x Int32x4) Store(y *[4]int32)
|
||||||
|
|
||||||
|
// LoadMaskedInt32x4 loads a Int32x4 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedInt32x4(y *[4]int32, mask Mask32x4) Int32x4
|
||||||
|
|
||||||
|
// StoreMasked stores a Int32x4 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Int32x4) StoreMasked(y *[4]int32, mask Mask32x4)
|
||||||
|
|
||||||
// Int64x2 is a 128-bit SIMD vector of 2 int64
|
// Int64x2 is a 128-bit SIMD vector of 2 int64
|
||||||
type Int64x2 struct {
|
type Int64x2 struct {
|
||||||
int64x2 v128
|
int64x2 v128
|
||||||
|
|
@ -123,6 +159,18 @@ func LoadInt64x2(y *[2]int64) Int64x2
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Int64x2) Store(y *[2]int64)
|
func (x Int64x2) Store(y *[2]int64)
|
||||||
|
|
||||||
|
// LoadMaskedInt64x2 loads a Int64x2 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedInt64x2(y *[2]int64, mask Mask64x2) Int64x2
|
||||||
|
|
||||||
|
// StoreMasked stores a Int64x2 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Int64x2) StoreMasked(y *[2]int64, mask Mask64x2)
|
||||||
|
|
||||||
// Uint8x16 is a 128-bit SIMD vector of 16 uint8
|
// Uint8x16 is a 128-bit SIMD vector of 16 uint8
|
||||||
type Uint8x16 struct {
|
type Uint8x16 struct {
|
||||||
uint8x16 v128
|
uint8x16 v128
|
||||||
|
|
@ -180,6 +228,18 @@ func LoadUint32x4(y *[4]uint32) Uint32x4
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Uint32x4) Store(y *[4]uint32)
|
func (x Uint32x4) Store(y *[4]uint32)
|
||||||
|
|
||||||
|
// LoadMaskedUint32x4 loads a Uint32x4 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedUint32x4(y *[4]uint32, mask Mask32x4) Uint32x4
|
||||||
|
|
||||||
|
// StoreMasked stores a Uint32x4 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Uint32x4) StoreMasked(y *[4]uint32, mask Mask32x4)
|
||||||
|
|
||||||
// Uint64x2 is a 128-bit SIMD vector of 2 uint64
|
// Uint64x2 is a 128-bit SIMD vector of 2 uint64
|
||||||
type Uint64x2 struct {
|
type Uint64x2 struct {
|
||||||
uint64x2 v128
|
uint64x2 v128
|
||||||
|
|
@ -199,6 +259,18 @@ func LoadUint64x2(y *[2]uint64) Uint64x2
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Uint64x2) Store(y *[2]uint64)
|
func (x Uint64x2) Store(y *[2]uint64)
|
||||||
|
|
||||||
|
// LoadMaskedUint64x2 loads a Uint64x2 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedUint64x2(y *[2]uint64, mask Mask64x2) Uint64x2
|
||||||
|
|
||||||
|
// StoreMasked stores a Uint64x2 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Uint64x2) StoreMasked(y *[2]uint64, mask Mask64x2)
|
||||||
|
|
||||||
// Mask8x16 is a 128-bit SIMD vector of 16 int8
|
// Mask8x16 is a 128-bit SIMD vector of 16 int8
|
||||||
type Mask8x16 struct {
|
type Mask8x16 struct {
|
||||||
int8x16 v128
|
int8x16 v128
|
||||||
|
|
@ -311,6 +383,18 @@ func LoadFloat32x8(y *[8]float32) Float32x8
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Float32x8) Store(y *[8]float32)
|
func (x Float32x8) Store(y *[8]float32)
|
||||||
|
|
||||||
|
// LoadMaskedFloat32x8 loads a Float32x8 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedFloat32x8(y *[8]float32, mask Mask32x8) Float32x8
|
||||||
|
|
||||||
|
// StoreMasked stores a Float32x8 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Float32x8) StoreMasked(y *[8]float32, mask Mask32x8)
|
||||||
|
|
||||||
// Float64x4 is a 256-bit SIMD vector of 4 float64
|
// Float64x4 is a 256-bit SIMD vector of 4 float64
|
||||||
type Float64x4 struct {
|
type Float64x4 struct {
|
||||||
float64x4 v256
|
float64x4 v256
|
||||||
|
|
@ -330,6 +414,18 @@ func LoadFloat64x4(y *[4]float64) Float64x4
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Float64x4) Store(y *[4]float64)
|
func (x Float64x4) Store(y *[4]float64)
|
||||||
|
|
||||||
|
// LoadMaskedFloat64x4 loads a Float64x4 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedFloat64x4(y *[4]float64, mask Mask64x4) Float64x4
|
||||||
|
|
||||||
|
// StoreMasked stores a Float64x4 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Float64x4) StoreMasked(y *[4]float64, mask Mask64x4)
|
||||||
|
|
||||||
// Int8x32 is a 256-bit SIMD vector of 32 int8
|
// Int8x32 is a 256-bit SIMD vector of 32 int8
|
||||||
type Int8x32 struct {
|
type Int8x32 struct {
|
||||||
int8x32 v256
|
int8x32 v256
|
||||||
|
|
@ -387,6 +483,18 @@ func LoadInt32x8(y *[8]int32) Int32x8
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Int32x8) Store(y *[8]int32)
|
func (x Int32x8) Store(y *[8]int32)
|
||||||
|
|
||||||
|
// LoadMaskedInt32x8 loads a Int32x8 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedInt32x8(y *[8]int32, mask Mask32x8) Int32x8
|
||||||
|
|
||||||
|
// StoreMasked stores a Int32x8 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Int32x8) StoreMasked(y *[8]int32, mask Mask32x8)
|
||||||
|
|
||||||
// Int64x4 is a 256-bit SIMD vector of 4 int64
|
// Int64x4 is a 256-bit SIMD vector of 4 int64
|
||||||
type Int64x4 struct {
|
type Int64x4 struct {
|
||||||
int64x4 v256
|
int64x4 v256
|
||||||
|
|
@ -406,6 +514,18 @@ func LoadInt64x4(y *[4]int64) Int64x4
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Int64x4) Store(y *[4]int64)
|
func (x Int64x4) Store(y *[4]int64)
|
||||||
|
|
||||||
|
// LoadMaskedInt64x4 loads a Int64x4 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedInt64x4(y *[4]int64, mask Mask64x4) Int64x4
|
||||||
|
|
||||||
|
// StoreMasked stores a Int64x4 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Int64x4) StoreMasked(y *[4]int64, mask Mask64x4)
|
||||||
|
|
||||||
// Uint8x32 is a 256-bit SIMD vector of 32 uint8
|
// Uint8x32 is a 256-bit SIMD vector of 32 uint8
|
||||||
type Uint8x32 struct {
|
type Uint8x32 struct {
|
||||||
uint8x32 v256
|
uint8x32 v256
|
||||||
|
|
@ -463,6 +583,18 @@ func LoadUint32x8(y *[8]uint32) Uint32x8
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Uint32x8) Store(y *[8]uint32)
|
func (x Uint32x8) Store(y *[8]uint32)
|
||||||
|
|
||||||
|
// LoadMaskedUint32x8 loads a Uint32x8 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedUint32x8(y *[8]uint32, mask Mask32x8) Uint32x8
|
||||||
|
|
||||||
|
// StoreMasked stores a Uint32x8 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Uint32x8) StoreMasked(y *[8]uint32, mask Mask32x8)
|
||||||
|
|
||||||
// Uint64x4 is a 256-bit SIMD vector of 4 uint64
|
// Uint64x4 is a 256-bit SIMD vector of 4 uint64
|
||||||
type Uint64x4 struct {
|
type Uint64x4 struct {
|
||||||
uint64x4 v256
|
uint64x4 v256
|
||||||
|
|
@ -482,6 +614,18 @@ func LoadUint64x4(y *[4]uint64) Uint64x4
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func (x Uint64x4) Store(y *[4]uint64)
|
func (x Uint64x4) Store(y *[4]uint64)
|
||||||
|
|
||||||
|
// LoadMaskedUint64x4 loads a Uint64x4 from an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMaskedUint64x4(y *[4]uint64, mask Mask64x4) Uint64x4
|
||||||
|
|
||||||
|
// StoreMasked stores a Uint64x4 to an array,
|
||||||
|
// at those elements enabled by mask
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func (x Uint64x4) StoreMasked(y *[4]uint64, mask Mask64x4)
|
||||||
|
|
||||||
// Mask8x32 is a 256-bit SIMD vector of 32 int8
|
// Mask8x32 is a 256-bit SIMD vector of 32 int8
|
||||||
type Mask8x32 struct {
|
type Mask8x32 struct {
|
||||||
int8x32 v256
|
int8x32 v256
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue