mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd: move lots of slice functions and methods to generated code
Lots of handwritten/stenciled code is now untouched by human hands For certain combinations of operation-arity and type, there is an option to use a flaky version of a test helper, that only requires "close enough". For example: testFloat32x4TernaryFlaky(t, simd.Float32x4.FusedMultiplyAdd, fmaSlice[float32], 0.001) Some of the quirkier operations have their behavior captured in their test-simulation, for example, ceilResidue regards infinities as integers (therefore their residue is zero). Change-Id: I8242914e5ab399edbe226da8586988441cffa83f Reviewed-on: https://go-review.googlesource.com/c/go/+/690575 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
3f92aa1eca
commit
d375b95357
14 changed files with 1624 additions and 676 deletions
|
|
@ -10,6 +10,7 @@ package main
|
|||
// slice operations and tests
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
|
|
@ -44,6 +45,37 @@ var allShapes = &shapes{
|
|||
// these are the shapes that are currently converted to int32
|
||||
// (not all conversions are available, yet)
|
||||
var convert32Shapes = &shapes{
|
||||
|
||||
vecs: []int{128, 256, 512},
|
||||
floats: []int{32},
|
||||
}
|
||||
|
||||
var avx512MaskedLoadShapes = &shapes{
|
||||
vecs: []int{512},
|
||||
ints: []int{8, 16, 32, 64},
|
||||
uints: []int{8, 16, 32, 64},
|
||||
floats: []int{32, 64},
|
||||
}
|
||||
|
||||
var avx2MaskedLoadShapes = &shapes{
|
||||
vecs: []int{128, 256},
|
||||
ints: []int{32, 64},
|
||||
uints: []int{32, 64},
|
||||
floats: []int{32, 64},
|
||||
}
|
||||
|
||||
var avx2SmallLoadPunShapes = &shapes{
|
||||
// ints are done by hand, these are type-punned to int.
|
||||
vecs: []int{128, 256},
|
||||
uints: []int{8, 16},
|
||||
}
|
||||
|
||||
var unaryFlaky = &shapes{
|
||||
vecs: []int{128, 256, 512},
|
||||
floats: []int{32, 64},
|
||||
}
|
||||
|
||||
var ternaryFlaky = &shapes{
|
||||
vecs: []int{128, 256, 512},
|
||||
floats: []int{32},
|
||||
}
|
||||
|
|
@ -61,6 +93,7 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io
|
|||
if strings.Contains("aeiou", baseType[:1]) {
|
||||
aOrAn = "an"
|
||||
}
|
||||
oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1))
|
||||
t.Execute(out, struct {
|
||||
Vec string // the type of the vector, e.g. Float32x4
|
||||
AOrAn string // for documentation, the article "a" or "an"
|
||||
|
|
@ -68,6 +101,7 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io
|
|||
Count int // the number of elements, e.g. 4
|
||||
WxC string // the width-by-type string, e.g., "32x4"
|
||||
Type string // the element type, e.g. "float32"
|
||||
OxFF string // a mask for the lowest 'count' bits
|
||||
}{
|
||||
Vec: vType,
|
||||
AOrAn: aOrAn,
|
||||
|
|
@ -75,6 +109,7 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io
|
|||
Count: count,
|
||||
WxC: wxc,
|
||||
Type: eType,
|
||||
OxFF: oxFF,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -110,6 +145,20 @@ func prologue(s string, out io.Writer) {
|
|||
|
||||
package simd
|
||||
|
||||
import "unsafe"
|
||||
|
||||
`, s)
|
||||
}
|
||||
|
||||
func unsafePrologue(s string, out io.Writer) {
|
||||
fmt.Fprintf(out,
|
||||
`// Code generated by '%s'; DO NOT EDIT.
|
||||
|
||||
//go:build goexperiment.simd
|
||||
|
||||
package simd
|
||||
|
||||
import "unsafe"
|
||||
`, s)
|
||||
}
|
||||
|
||||
|
|
@ -139,16 +188,6 @@ func curryTestPrologue(t string) func(s string, out io.Writer) {
|
|||
}
|
||||
}
|
||||
|
||||
// //go:noescape
|
||||
// func LoadUint8x16Slice(s []uint8) Uint8x16 {
|
||||
// return LoadUint8x16((*[16]uint8)(s[:16]))
|
||||
// }
|
||||
|
||||
// //go:noescape
|
||||
// func (x Uint8x16) StoreSlice(s []uint8) {
|
||||
// x.Store((*[16]uint8)(s[:16]))
|
||||
// }
|
||||
|
||||
func templateOf(name, temp string) shapeAndTemplate {
|
||||
return shapeAndTemplate{s: allShapes,
|
||||
t: template.Must(template.New(name).Parse(temp))}
|
||||
|
|
@ -182,7 +221,24 @@ func test{{.Vec}}Unary(t *testing.T, f func(_ simd.{{.Vec}}) simd.{{.Vec}}, want
|
|||
g := make([]{{.Type}}, n)
|
||||
f(a).StoreSlice(g)
|
||||
w := want(x)
|
||||
return checkSlicesLogInput(t, g, w, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
||||
var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", `
|
||||
// test{{.Vec}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
|
||||
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
|
||||
func test{{.Vec}}UnaryFlaky(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Vec}}, want func(x []{{.Type}}) []{{.Type}}, flakiness float64) {
|
||||
n := {{.Count}}
|
||||
t.Helper()
|
||||
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool {
|
||||
t.Helper()
|
||||
a := simd.Load{{.Vec}}Slice(x)
|
||||
g := make([]{{.Type}}, n)
|
||||
f(a).StoreSlice(g)
|
||||
w := want(x)
|
||||
return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
|
@ -198,7 +254,7 @@ func test{{.Vec}}UnaryToInt32(t *testing.T, f func(x simd.{{.Vec}}) simd.Int32x{
|
|||
g := make([]int32, n)
|
||||
f(a).StoreSlice(g)
|
||||
w := want(x)
|
||||
return checkSlicesLogInput(t, g, w, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
|
@ -214,7 +270,7 @@ func test{{.Vec}}UnaryToUint32(t *testing.T, f func(x simd.{{.Vec}}) simd.Uint32
|
|||
g := make([]uint32, n)
|
||||
f(a).StoreSlice(g)
|
||||
w := want(x)
|
||||
return checkSlicesLogInput(t, g, w, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
|
@ -231,7 +287,7 @@ func test{{.Vec}}Binary(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.{{.Vec}},
|
|||
g := make([]{{.Type}}, n)
|
||||
f(a, b).StoreSlice(g)
|
||||
w := want(x, y)
|
||||
return checkSlicesLogInput(t, g, w, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
|
||||
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
|
@ -249,7 +305,26 @@ func test{{.Vec}}Ternary(t *testing.T, f func(_, _, _ simd.{{.Vec}}) simd.{{.Vec
|
|||
g := make([]{{.Type}}, n)
|
||||
f(a, b, c).StoreSlice(g)
|
||||
w := want(x, y, z)
|
||||
return checkSlicesLogInput(t, g, w, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
|
||||
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
||||
var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", `
|
||||
// test{{.Vec}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
|
||||
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
|
||||
func test{{.Vec}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.Vec}}) simd.{{.Vec}}, want func(x, y, z []{{.Type}}) []{{.Type}}, flakiness float64) {
|
||||
n := {{.Count}}
|
||||
t.Helper()
|
||||
forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool {
|
||||
t.Helper()
|
||||
a := simd.Load{{.Vec}}Slice(x)
|
||||
b := simd.Load{{.Vec}}Slice(y)
|
||||
c := simd.Load{{.Vec}}Slice(z)
|
||||
g := make([]{{.Type}}, n)
|
||||
f(a, b, c).StoreSlice(g)
|
||||
w := want(x, y, z)
|
||||
return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
|
@ -266,7 +341,7 @@ func test{{.Vec}}Compare(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.Mask{{.Wx
|
|||
g := make([]int{{.Width}}, n)
|
||||
f(a, b).AsInt{{.WxC}}().StoreSlice(g)
|
||||
w := want(x, y)
|
||||
return checkSlicesLogInput(t, s64(g), w, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
|
||||
return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
|
@ -293,13 +368,117 @@ func test{{.Vec}}CompareMasked(t *testing.T,
|
|||
w[i] = 0
|
||||
}
|
||||
}
|
||||
return checkSlicesLogInput(t, s64(g), w, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("m=%v", m); })
|
||||
return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("m=%v", m); })
|
||||
})
|
||||
}
|
||||
`)
|
||||
|
||||
var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512MaskedLoadShapes, "avx 512 load slice part", `
|
||||
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s.
|
||||
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
|
||||
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice.
|
||||
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} {
|
||||
l := len(s)
|
||||
if l >= {{.Count}} {
|
||||
return Load{{.Vec}}Slice(s)
|
||||
}
|
||||
if l == 0 {
|
||||
var x {{.Vec}}
|
||||
return x
|
||||
}
|
||||
|
||||
mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l))
|
||||
return LoadMasked{{.Vec}}(pa{{.Vec}}(s), mask)
|
||||
}
|
||||
|
||||
// StoreSlicePart stores the {{.Count}} elements of x into the slice s.
|
||||
// It stores as many elements as will fit in s.
|
||||
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
|
||||
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
||||
l := len(s)
|
||||
if l >= {{.Count}} {
|
||||
x.StoreSlice(s)
|
||||
return
|
||||
}
|
||||
if l == 0 {
|
||||
return
|
||||
}
|
||||
mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l))
|
||||
x.StoreMasked(pa{{.Vec}}(s), mask)
|
||||
}
|
||||
`)
|
||||
|
||||
var avx2MaskedLoadSlicePartTemplate = shapedTemplateOf(avx2MaskedLoadShapes, "avx 2 load slice part", `
|
||||
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s.
|
||||
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
|
||||
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice.
|
||||
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} {
|
||||
l := len(s)
|
||||
if l >= {{.Count}} {
|
||||
return Load{{.Vec}}Slice(s)
|
||||
}
|
||||
if l == 0 {
|
||||
var x {{.Vec}}
|
||||
return x
|
||||
}
|
||||
mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:]
|
||||
return LoadMasked{{.Vec}}(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).AsMask{{.WxC}}())
|
||||
}
|
||||
|
||||
// StoreSlicePart stores the {{.Count}} elements of x into the slice s.
|
||||
// It stores as many elements as will fit in s.
|
||||
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
|
||||
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
||||
l := len(s)
|
||||
if l >= {{.Count}} {
|
||||
x.StoreSlice(s)
|
||||
return
|
||||
}
|
||||
if l == 0 {
|
||||
return
|
||||
}
|
||||
mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:]
|
||||
x.StoreMasked(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).AsMask{{.WxC}}())
|
||||
}
|
||||
`)
|
||||
|
||||
var avx2SmallLoadSlicePartTemplate = shapedTemplateOf(avx2SmallLoadPunShapes, "avx 2 small load slice part", `
|
||||
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s.
|
||||
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
|
||||
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice.
|
||||
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} {
|
||||
if len(s) == 0 {
|
||||
var zero {{.Vec}}
|
||||
return zero
|
||||
}
|
||||
t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s))
|
||||
return LoadInt{{.WxC}}SlicePart(t).As{{.Vec}}()
|
||||
}
|
||||
|
||||
// StoreSlicePart stores the {{.Count}} elements of x into the slice s.
|
||||
// It stores as many elements as will fit in s.
|
||||
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
|
||||
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
||||
if len(s) == 0 {
|
||||
return
|
||||
}
|
||||
t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s))
|
||||
x.AsInt{{.WxC}}().StoreSlicePart(t)
|
||||
}
|
||||
`)
|
||||
|
||||
var unsafePATemplate = templateOf("unsafe PA helper", `
|
||||
// pa{{.Vec}} returns a type-unsafe pointer to array that can
|
||||
// only be used with partial load/store operations that only
|
||||
// access the known-safe portions of the array.
|
||||
func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} {
|
||||
return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0]))
|
||||
}
|
||||
`)
|
||||
|
||||
func main() {
|
||||
sl := flag.String("sl", "slice_amd64.go", "file name for slice operations")
|
||||
ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers")
|
||||
bh := flag.String("bh", "binary_helpers_test.go", "file name for binary test helpers")
|
||||
uh := flag.String("uh", "unary_helpers_test.go", "file name for unary test helpers")
|
||||
th := flag.String("th", "ternary_helpers_test.go", "file name for ternary test helpers")
|
||||
|
|
@ -308,16 +487,19 @@ func main() {
|
|||
flag.Parse()
|
||||
|
||||
if *sl != "" {
|
||||
one(*sl, prologue, sliceTemplate)
|
||||
one(*sl, prologue, sliceTemplate, avx512MaskedLoadSlicePartTemplate, avx2MaskedLoadSlicePartTemplate, avx2SmallLoadSlicePartTemplate)
|
||||
}
|
||||
if *ush != "" {
|
||||
one(*ush, unsafePrologue, unsafePATemplate)
|
||||
}
|
||||
if *uh != "" {
|
||||
one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, unaryTemplateToInt32, unaryTemplateToUint32)
|
||||
one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, unaryTemplateToInt32, unaryTemplateToUint32, unaryFlakyTemplate)
|
||||
}
|
||||
if *bh != "" {
|
||||
one(*bh, curryTestPrologue("binary simd methods"), binaryTemplate)
|
||||
}
|
||||
if *th != "" {
|
||||
one(*th, curryTestPrologue("ternary simd methods"), ternaryTemplate)
|
||||
one(*th, curryTestPrologue("ternary simd methods"), ternaryTemplate, ternaryFlakyTemplate)
|
||||
}
|
||||
if *ch != "" {
|
||||
one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate)
|
||||
|
|
@ -327,6 +509,18 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
// numberLines takes a slice of bytes, and returns a string where each line
|
||||
// is numbered, starting from 1.
|
||||
func numberLines(data []byte) string {
|
||||
var buf bytes.Buffer
|
||||
r := bytes.NewReader(data)
|
||||
s := bufio.NewScanner(r)
|
||||
for i := 1; s.Scan(); i++ {
|
||||
fmt.Fprintf(&buf, "%d: %s\n", i, s.Text())
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func one(filename string, prologue func(s string, out io.Writer), sats ...shapeAndTemplate) {
|
||||
if filename == "" {
|
||||
return
|
||||
|
|
@ -352,7 +546,9 @@ func one(filename string, prologue func(s string, out io.Writer), sats ...shapeA
|
|||
|
||||
b, err := format.Source(out.Bytes())
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v", filename, err)
|
||||
fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v\n", filename, err)
|
||||
fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes()))
|
||||
fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v\n", filename, err)
|
||||
os.Exit(1)
|
||||
} else {
|
||||
ofile.Write(b)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue