[dev.simd] simd: add functions+methods to load-from/store-to slices

Includes the generator (which is short and uncomplicated)
and a few tests.

Change-Id: Icba9de042935a59bee34b278306c241b7651f5b4
Reviewed-on: https://go-review.googlesource.com/c/go/+/679258
Auto-Submit: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
David Chase 2025-06-05 15:09:19 -04:00 committed by Gopher Robot
parent 8ecbd59ebb
commit 2eaa5a0703
6 changed files with 498 additions and 6 deletions

View file

@ -5,7 +5,6 @@
package comment
import (
"internal/buildcfg"
"internal/diff"
"internal/testenv"
"slices"
@ -25,10 +24,6 @@ func TestStd(t *testing.T) {
list = append(list, pkg)
}
}
// TODO remove this when simd is the default, for now fake its existence
if !buildcfg.Experiment.SIMD {
list = append(list, "simd")
}
slices.Sort(list)
have := strings.Join(stdPkgs, "\n") + "\n"

View file

@ -4,7 +4,7 @@
//go:build goexperiment.simd
// the build condition == if the experiment is not on, cmd/api TestCheck will see this and complain
// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain
// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on.
package simd

117
src/simd/genslice.go Normal file
View file

@ -0,0 +1,117 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
// this generates all the code to load and store simd
// vectors to/from slices.
import (
"bytes"
"flag"
"fmt"
"go/format"
"io"
"os"
"strings"
)
// //go:noescape
// func LoadUint8x16Slice(s []uint8) Uint8x16 {
// return LoadUint8x16((*[16]uint8)(s[:16]))
// }
// //go:noescape
// func (x Uint8x16) StoreSlice(s []uint8) {
// x.Store((*[16]uint8)(s[:16]))
// }
func slice(e string, w, c int, out io.Writer) {
b := w * c
if b < 128 || b > 512 {
return
}
E := strings.ToUpper(e[:1]) + e[1:]
t := fmt.Sprintf("%s%d", e, w)
v := fmt.Sprintf("%s%dx%d", E, w, c)
a := "a"
if strings.Contains("aeiou", e[:1]) {
a = "an"
}
fmt.Fprintf(out,
`
// Load%sSlice loads %s %s from a slice of at least %d %ss
func Load%sSlice(s []%s) %s {
return Load%s((*[%d]%s)(s))
}
`, v, a, v, c, t, v, t, v, v, c, t)
fmt.Fprintf(out,
`
// StoreSlice stores x into a slice of at least %d %ss
func (x %s) StoreSlice(s []%s) {
x.Store((*[%d]%s)(s))
}
`, c, t, v, t, c, t)
}
func prologue(s string, out io.Writer) {
fmt.Fprintf(out,
`// Code generated by '%s'; DO NOT EDIT.
//go:build goexperiment.simd
// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain
// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on.
package simd
`, s)
}
func main() {
filename := flag.String("o", "", "write generated code to this file")
flag.Parse()
ofile := os.Stdout
if *filename != "" {
var err error
ofile, err = os.Create(*filename)
if err != nil {
fmt.Fprintf(os.Stderr, "Could not create the output file for the generated code, %v", err)
os.Exit(1)
}
}
out := new(bytes.Buffer)
prologue("go run genslice.go -o slice_amd64.go", out)
vecs := []int{128, 256, 512}
ints := []int{8, 16, 32, 64}
floats := []int{32, 64}
for _, v := range vecs {
for _, w := range ints {
c := v / w
slice("int", w, c, out)
slice("uint", w, c, out)
}
for _, w := range floats {
c := v / w
slice("float", w, c, out)
}
}
b, err := format.Source(out.Bytes())
if err != nil {
fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code, %v", err)
os.Exit(1)
} else {
ofile.Write(b)
ofile.Close()
}
}

9
src/simd/no_tag.go Normal file
View file

@ -0,0 +1,9 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simd
// This file has no build tag, so that go generate can run without a build tag.
//go:generate go run genslice.go -o slice_amd64.go

View file

@ -163,3 +163,66 @@ func TestSub(t *testing.T) {
}
}
}
// checkInt8Slices ensures that b and a are equal, to the end of b.
// also serves to use the slices, to prevent accidental optimization.
func checkInt8Slices(t *testing.T, a, b []int8) {
for i := range b {
if a[i] != b[i] {
t.Errorf("a and b differ at index %d, a=%d, b=%d", i, a[i], b[i])
}
}
}
func TestSlicesInt8(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x32Slice(a)
b := make([]int8, 32, 32)
v.StoreSlice(b)
checkInt8Slices(t, a, b)
}
func TestSlicesInt8TooShortLoad(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Logf("Saw EXPECTED panic %v", r)
} else {
t.Errorf("Did not see expected panic")
}
}()
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic
v := simd.LoadInt8x32Slice(a)
b := make([]int8, 32, 32)
v.StoreSlice(b)
checkInt8Slices(t, a, b)
}
func TestSlicesInt8TooShortStore(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Logf("Saw EXPECTED panic %v", r)
} else {
t.Errorf("Did not see expected panic")
}
}()
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x32Slice(a)
b := make([]int8, 31) // TOO SHORT, should panic
v.StoreSlice(b)
checkInt8Slices(t, a, b)
}
func TestSlicesFloat64(t *testing.T) {
a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine
v := simd.LoadFloat64x4Slice(a)
b := make([]float64, 4, 4)
v.StoreSlice(b)
for i := range b {
if a[i] != b[i] {
t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
}
}
}

308
src/simd/slice_amd64.go Normal file
View file

@ -0,0 +1,308 @@
// Code generated by 'go run genslice.go -o slice_amd64.go'; DO NOT EDIT.
//go:build goexperiment.simd
// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain
// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on.
package simd
// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s
func LoadInt8x16Slice(s []int8) Int8x16 {
return LoadInt8x16((*[16]int8)(s))
}
// StoreSlice stores x into a slice of at least 16 int8s
func (x Int8x16) StoreSlice(s []int8) {
x.Store((*[16]int8)(s))
}
// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s
func LoadUint8x16Slice(s []uint8) Uint8x16 {
return LoadUint8x16((*[16]uint8)(s))
}
// StoreSlice stores x into a slice of at least 16 uint8s
func (x Uint8x16) StoreSlice(s []uint8) {
x.Store((*[16]uint8)(s))
}
// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s
func LoadInt16x8Slice(s []int16) Int16x8 {
return LoadInt16x8((*[8]int16)(s))
}
// StoreSlice stores x into a slice of at least 8 int16s
func (x Int16x8) StoreSlice(s []int16) {
x.Store((*[8]int16)(s))
}
// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s
func LoadUint16x8Slice(s []uint16) Uint16x8 {
return LoadUint16x8((*[8]uint16)(s))
}
// StoreSlice stores x into a slice of at least 8 uint16s
func (x Uint16x8) StoreSlice(s []uint16) {
x.Store((*[8]uint16)(s))
}
// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s
func LoadInt32x4Slice(s []int32) Int32x4 {
return LoadInt32x4((*[4]int32)(s))
}
// StoreSlice stores x into a slice of at least 4 int32s
func (x Int32x4) StoreSlice(s []int32) {
x.Store((*[4]int32)(s))
}
// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s
func LoadUint32x4Slice(s []uint32) Uint32x4 {
return LoadUint32x4((*[4]uint32)(s))
}
// StoreSlice stores x into a slice of at least 4 uint32s
func (x Uint32x4) StoreSlice(s []uint32) {
x.Store((*[4]uint32)(s))
}
// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s
func LoadInt64x2Slice(s []int64) Int64x2 {
return LoadInt64x2((*[2]int64)(s))
}
// StoreSlice stores x into a slice of at least 2 int64s
func (x Int64x2) StoreSlice(s []int64) {
x.Store((*[2]int64)(s))
}
// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s
func LoadUint64x2Slice(s []uint64) Uint64x2 {
return LoadUint64x2((*[2]uint64)(s))
}
// StoreSlice stores x into a slice of at least 2 uint64s
func (x Uint64x2) StoreSlice(s []uint64) {
x.Store((*[2]uint64)(s))
}
// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s
func LoadFloat32x4Slice(s []float32) Float32x4 {
return LoadFloat32x4((*[4]float32)(s))
}
// StoreSlice stores x into a slice of at least 4 float32s
func (x Float32x4) StoreSlice(s []float32) {
x.Store((*[4]float32)(s))
}
// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s
func LoadFloat64x2Slice(s []float64) Float64x2 {
return LoadFloat64x2((*[2]float64)(s))
}
// StoreSlice stores x into a slice of at least 2 float64s
func (x Float64x2) StoreSlice(s []float64) {
x.Store((*[2]float64)(s))
}
// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s
func LoadInt8x32Slice(s []int8) Int8x32 {
return LoadInt8x32((*[32]int8)(s))
}
// StoreSlice stores x into a slice of at least 32 int8s
func (x Int8x32) StoreSlice(s []int8) {
x.Store((*[32]int8)(s))
}
// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s
func LoadUint8x32Slice(s []uint8) Uint8x32 {
return LoadUint8x32((*[32]uint8)(s))
}
// StoreSlice stores x into a slice of at least 32 uint8s
func (x Uint8x32) StoreSlice(s []uint8) {
x.Store((*[32]uint8)(s))
}
// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s
func LoadInt16x16Slice(s []int16) Int16x16 {
return LoadInt16x16((*[16]int16)(s))
}
// StoreSlice stores x into a slice of at least 16 int16s
func (x Int16x16) StoreSlice(s []int16) {
x.Store((*[16]int16)(s))
}
// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s
func LoadUint16x16Slice(s []uint16) Uint16x16 {
return LoadUint16x16((*[16]uint16)(s))
}
// StoreSlice stores x into a slice of at least 16 uint16s
func (x Uint16x16) StoreSlice(s []uint16) {
x.Store((*[16]uint16)(s))
}
// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s
func LoadInt32x8Slice(s []int32) Int32x8 {
return LoadInt32x8((*[8]int32)(s))
}
// StoreSlice stores x into a slice of at least 8 int32s
func (x Int32x8) StoreSlice(s []int32) {
x.Store((*[8]int32)(s))
}
// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s
func LoadUint32x8Slice(s []uint32) Uint32x8 {
return LoadUint32x8((*[8]uint32)(s))
}
// StoreSlice stores x into a slice of at least 8 uint32s
func (x Uint32x8) StoreSlice(s []uint32) {
x.Store((*[8]uint32)(s))
}
// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s
func LoadInt64x4Slice(s []int64) Int64x4 {
return LoadInt64x4((*[4]int64)(s))
}
// StoreSlice stores x into a slice of at least 4 int64s
func (x Int64x4) StoreSlice(s []int64) {
x.Store((*[4]int64)(s))
}
// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s
func LoadUint64x4Slice(s []uint64) Uint64x4 {
return LoadUint64x4((*[4]uint64)(s))
}
// StoreSlice stores x into a slice of at least 4 uint64s
func (x Uint64x4) StoreSlice(s []uint64) {
x.Store((*[4]uint64)(s))
}
// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s
func LoadFloat32x8Slice(s []float32) Float32x8 {
return LoadFloat32x8((*[8]float32)(s))
}
// StoreSlice stores x into a slice of at least 8 float32s
func (x Float32x8) StoreSlice(s []float32) {
x.Store((*[8]float32)(s))
}
// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s
func LoadFloat64x4Slice(s []float64) Float64x4 {
return LoadFloat64x4((*[4]float64)(s))
}
// StoreSlice stores x into a slice of at least 4 float64s
func (x Float64x4) StoreSlice(s []float64) {
x.Store((*[4]float64)(s))
}
// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s
func LoadInt8x64Slice(s []int8) Int8x64 {
return LoadInt8x64((*[64]int8)(s))
}
// StoreSlice stores x into a slice of at least 64 int8s
func (x Int8x64) StoreSlice(s []int8) {
x.Store((*[64]int8)(s))
}
// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s
func LoadUint8x64Slice(s []uint8) Uint8x64 {
return LoadUint8x64((*[64]uint8)(s))
}
// StoreSlice stores x into a slice of at least 64 uint8s
func (x Uint8x64) StoreSlice(s []uint8) {
x.Store((*[64]uint8)(s))
}
// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s
func LoadInt16x32Slice(s []int16) Int16x32 {
return LoadInt16x32((*[32]int16)(s))
}
// StoreSlice stores x into a slice of at least 32 int16s
func (x Int16x32) StoreSlice(s []int16) {
x.Store((*[32]int16)(s))
}
// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s
func LoadUint16x32Slice(s []uint16) Uint16x32 {
return LoadUint16x32((*[32]uint16)(s))
}
// StoreSlice stores x into a slice of at least 32 uint16s
func (x Uint16x32) StoreSlice(s []uint16) {
x.Store((*[32]uint16)(s))
}
// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s
func LoadInt32x16Slice(s []int32) Int32x16 {
return LoadInt32x16((*[16]int32)(s))
}
// StoreSlice stores x into a slice of at least 16 int32s
func (x Int32x16) StoreSlice(s []int32) {
x.Store((*[16]int32)(s))
}
// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s
func LoadUint32x16Slice(s []uint32) Uint32x16 {
return LoadUint32x16((*[16]uint32)(s))
}
// StoreSlice stores x into a slice of at least 16 uint32s
func (x Uint32x16) StoreSlice(s []uint32) {
x.Store((*[16]uint32)(s))
}
// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s
func LoadInt64x8Slice(s []int64) Int64x8 {
return LoadInt64x8((*[8]int64)(s))
}
// StoreSlice stores x into a slice of at least 8 int64s
func (x Int64x8) StoreSlice(s []int64) {
x.Store((*[8]int64)(s))
}
// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s
func LoadUint64x8Slice(s []uint64) Uint64x8 {
return LoadUint64x8((*[8]uint64)(s))
}
// StoreSlice stores x into a slice of at least 8 uint64s
func (x Uint64x8) StoreSlice(s []uint64) {
x.Store((*[8]uint64)(s))
}
// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s
func LoadFloat32x16Slice(s []float32) Float32x16 {
return LoadFloat32x16((*[16]float32)(s))
}
// StoreSlice stores x into a slice of at least 16 float32s
func (x Float32x16) StoreSlice(s []float32) {
x.Store((*[16]float32)(s))
}
// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s
func LoadFloat64x8Slice(s []float64) Float64x8 {
return LoadFloat64x8((*[8]float64)(s))
}
// StoreSlice stores x into a slice of at least 8 float64s
func (x Float64x8) StoreSlice(s []float64) {
x.Store((*[8]float64)(s))
}