mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
and adds some tests of size-changing conversions. IMO the template naming conventions in genfiles are getting grubby, and I plan to change them in an immediately following CL. Change-Id: I4a72e8a8c9e9806fab60570dff4c87a754e427c5 Reviewed-on: https://go-review.googlesource.com/c/go/+/697456 Commit-Queue: David Chase <drchase@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> TryBot-Bypass: David Chase <drchase@google.com>
850 lines
25 KiB
Go
850 lines
25 KiB
Go
// Copyright 2025 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
//go:build ignore
|
|
|
|
package main
|
|
|
|
// this generates type-instantiated boilerplate code for
|
|
// slice operations and tests
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"go/format"
|
|
"io"
|
|
"os"
|
|
"strings"
|
|
"text/template"
|
|
)
|
|
|
|
type resultTypeFunc func(t string, w, c int) (ot string, ow int, oc int)
|
|
|
|
// shapes describes a combination of vector widths and various element types
|
|
type shapes struct {
|
|
vecs []int // Vector bit width for this shape.
|
|
ints []int // Int element bit width(s) for this shape
|
|
uints []int // Unsigned int element bit width(s) for this shape
|
|
floats []int // Float element bit width(s) for this shape
|
|
output resultTypeFunc
|
|
}
|
|
|
|
// shapeAndTemplate is a template and the set of shapes on which it will be expanded
|
|
type shapeAndTemplate struct {
|
|
s *shapes
|
|
t *template.Template
|
|
}
|
|
|
|
func (sat shapeAndTemplate) target(outType string, width int) shapeAndTemplate {
|
|
newSat := sat
|
|
newShape := *sat.s
|
|
newShape.output = func(t string, w, c int) (ot string, ow int, oc int) {
|
|
return outType, width, c
|
|
}
|
|
newSat.s = &newShape
|
|
return newSat
|
|
}
|
|
|
|
func (sat shapeAndTemplate) shrinkTo(outType string, by int) shapeAndTemplate {
|
|
newSat := sat
|
|
newShape := *sat.s
|
|
newShape.output = func(t string, w, c int) (ot string, ow int, oc int) {
|
|
return outType, w / by, c * by
|
|
}
|
|
newSat.s = &newShape
|
|
return newSat
|
|
}
|
|
|
|
var allShapes = &shapes{
|
|
vecs: []int{128, 256, 512},
|
|
ints: []int{8, 16, 32, 64},
|
|
uints: []int{8, 16, 32, 64},
|
|
floats: []int{32, 64},
|
|
}
|
|
|
|
var avx512Shapes = &shapes{
|
|
vecs: []int{512},
|
|
ints: []int{8, 16, 32, 64},
|
|
uints: []int{8, 16, 32, 64},
|
|
floats: []int{32, 64},
|
|
}
|
|
|
|
var avx2Shapes = &shapes{
|
|
vecs: []int{128, 256},
|
|
ints: []int{8, 16, 32, 64},
|
|
uints: []int{8, 16, 32, 64},
|
|
floats: []int{32, 64},
|
|
}
|
|
|
|
var avx2MaskedLoadShapes = &shapes{
|
|
vecs: []int{128, 256},
|
|
ints: []int{32, 64},
|
|
uints: []int{32, 64},
|
|
floats: []int{32, 64},
|
|
}
|
|
|
|
var avx2SmallLoadPunShapes = &shapes{
|
|
// ints are done by hand, these are type-punned to int.
|
|
vecs: []int{128, 256},
|
|
uints: []int{8, 16},
|
|
}
|
|
|
|
var unaryFlaky = &shapes{ // for tests that support flaky equality
|
|
vecs: []int{128, 256, 512},
|
|
floats: []int{32, 64},
|
|
}
|
|
|
|
var ternaryFlaky = &shapes{ // for tests that support flaky equality
|
|
vecs: []int{128, 256, 512},
|
|
floats: []int{32},
|
|
}
|
|
|
|
var avx2SignedComparisons = &shapes{
|
|
vecs: []int{128, 256},
|
|
ints: []int{8, 16, 32, 64},
|
|
}
|
|
|
|
var avx2UnsignedComparisons = &shapes{
|
|
vecs: []int{128, 256},
|
|
uints: []int{8, 16, 32, 64},
|
|
}
|
|
|
|
type templateData struct {
|
|
Vec string // the type of the vector, e.g. Float32x4
|
|
AOrAn string // for documentation, the article "a" or "an"
|
|
Width int // the bit width of the element type, e.g. 32
|
|
Vwidth int // the width of the vector type, e.g. 128
|
|
Count int // the number of elements, e.g. 4
|
|
WxC string // the width-by-type string, e.g., "32x4"
|
|
BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
|
|
Base string // the capitalized Base Type of the vector, e.g., "Float"
|
|
Type string // the element type, e.g. "float32"
|
|
OxFF string // a mask for the lowest 'count' bits
|
|
|
|
Ovec string
|
|
Otype string
|
|
OType string
|
|
Ocount int
|
|
}
|
|
|
|
func (t templateData) As128BitVec() string {
|
|
return fmt.Sprintf("%s%dx%d", t.Base, t.Width, 128/t.Width)
|
|
}
|
|
|
|
func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer, rtf resultTypeFunc) {
|
|
b := width * count
|
|
if b < 128 || b > 512 {
|
|
return
|
|
}
|
|
|
|
ot, ow, oc := baseType, width, count
|
|
if rtf != nil {
|
|
ot, ow, oc = rtf(ot, ow, oc)
|
|
if ow*oc > 512 || ow*oc < 128 || ow < 8 || ow > 64 {
|
|
return
|
|
}
|
|
// TODO someday we will support conversions to 16-bit floats
|
|
if ot == "float" && ow < 32 {
|
|
return
|
|
}
|
|
}
|
|
ovType := fmt.Sprintf("%s%dx%d", strings.ToUpper(ot[:1])+ot[1:], ow, oc)
|
|
oeType := fmt.Sprintf("%s%d", ot, ow)
|
|
oEType := fmt.Sprintf("%s%d", strings.ToUpper(ot[:1])+ot[1:], ow)
|
|
|
|
wxc := fmt.Sprintf("%dx%d", width, count)
|
|
BaseType := strings.ToUpper(baseType[:1]) + baseType[1:]
|
|
vType := fmt.Sprintf("%s%s", BaseType, wxc)
|
|
eType := fmt.Sprintf("%s%d", baseType, width)
|
|
|
|
bxc := fmt.Sprintf("%dx%d", 8, count*(width/8))
|
|
aOrAn := "a"
|
|
if strings.Contains("aeiou", baseType[:1]) {
|
|
aOrAn = "an"
|
|
}
|
|
oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1))
|
|
t.Execute(out, templateData{
|
|
Vec: vType,
|
|
AOrAn: aOrAn,
|
|
Width: width,
|
|
Vwidth: b,
|
|
Count: count,
|
|
WxC: wxc,
|
|
BxC: bxc,
|
|
Base: BaseType,
|
|
Type: eType,
|
|
OxFF: oxFF,
|
|
Ovec: ovType,
|
|
Otype: oeType,
|
|
Ocount: oc,
|
|
OType: oEType,
|
|
})
|
|
}
|
|
|
|
// forTemplates expands the template sat.t for each shape
|
|
// in sat.s, writing to out.
|
|
func (sat shapeAndTemplate) forTemplates(out io.Writer) {
|
|
t, s := sat.t, sat.s
|
|
vecs := s.vecs
|
|
ints := s.ints
|
|
uints := s.uints
|
|
floats := s.floats
|
|
for _, v := range vecs {
|
|
for _, w := range ints {
|
|
c := v / w
|
|
oneTemplate(t, "int", w, c, out, sat.s.output)
|
|
}
|
|
for _, w := range uints {
|
|
c := v / w
|
|
oneTemplate(t, "uint", w, c, out, sat.s.output)
|
|
}
|
|
for _, w := range floats {
|
|
c := v / w
|
|
oneTemplate(t, "float", w, c, out, sat.s.output)
|
|
}
|
|
}
|
|
}
|
|
|
|
func prologue(s string, out io.Writer) {
|
|
fmt.Fprintf(out,
|
|
`// Code generated by '%s'; DO NOT EDIT.
|
|
|
|
//go:build goexperiment.simd
|
|
|
|
package simd
|
|
|
|
`, s)
|
|
}
|
|
|
|
func unsafePrologue(s string, out io.Writer) {
|
|
fmt.Fprintf(out,
|
|
`// Code generated by '%s'; DO NOT EDIT.
|
|
|
|
//go:build goexperiment.simd
|
|
|
|
package simd
|
|
|
|
import "unsafe"
|
|
`, s)
|
|
}
|
|
|
|
func testPrologue(t, s string, out io.Writer) {
|
|
fmt.Fprintf(out,
|
|
`// Code generated by '%s'; DO NOT EDIT.
|
|
|
|
//go:build goexperiment.simd
|
|
|
|
// This file contains functions testing %s.
|
|
// Each function in this file is specialized for a
|
|
// particular simd type <BaseType><Width>x<Count>.
|
|
|
|
package simd_test
|
|
|
|
import (
|
|
"simd"
|
|
"testing"
|
|
)
|
|
|
|
`, s, t)
|
|
}
|
|
|
|
func curryTestPrologue(t string) func(s string, out io.Writer) {
|
|
return func(s string, out io.Writer) {
|
|
testPrologue(t, s, out)
|
|
}
|
|
}
|
|
|
|
func templateOf(name, temp string) shapeAndTemplate {
|
|
return shapeAndTemplate{s: allShapes,
|
|
t: template.Must(template.New(name).Parse(temp))}
|
|
}
|
|
|
|
func shapedTemplateOf(s *shapes, name, temp string) shapeAndTemplate {
|
|
return shapeAndTemplate{s: s,
|
|
t: template.Must(template.New(name).Parse(temp))}
|
|
}
|
|
|
|
var sliceTemplate = templateOf("slice", `
|
|
// Load{{.Vec}}Slice loads {{.AOrAn}} {{.Vec}} from a slice of at least {{.Count}} {{.Type}}s
|
|
func Load{{.Vec}}Slice(s []{{.Type}}) {{.Vec}} {
|
|
return Load{{.Vec}}((*[{{.Count}}]{{.Type}})(s))
|
|
}
|
|
|
|
// StoreSlice stores x into a slice of at least {{.Count}} {{.Type}}s
|
|
func (x {{.Vec}}) StoreSlice(s []{{.Type}}) {
|
|
x.Store((*[{{.Count}}]{{.Type}})(s))
|
|
}
|
|
`)
|
|
|
|
var unaryTemplate = templateOf("unary_helpers", `
|
|
// test{{.Vec}}Unary tests the simd unary method f against the expected behavior generated by want
|
|
func test{{.Vec}}Unary(t *testing.T, f func(_ simd.{{.Vec}}) simd.{{.Vec}}, want func(_ []{{.Type}}) []{{.Type}}) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
g := make([]{{.Type}}, n)
|
|
f(a).StoreSlice(g)
|
|
w := want(x)
|
|
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
|
|
})
|
|
}
|
|
`)
|
|
|
|
var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", `
|
|
// test{{.Vec}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
|
|
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
|
|
func test{{.Vec}}UnaryFlaky(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Vec}}, want func(x []{{.Type}}) []{{.Type}}, flakiness float64) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
g := make([]{{.Type}}, n)
|
|
f(a).StoreSlice(g)
|
|
w := want(x)
|
|
return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x)})
|
|
})
|
|
}
|
|
`)
|
|
|
|
var convertTemplate = templateOf("convert_helpers", `
|
|
// test{{.Vec}}ConvertTo{{.OType}} tests the simd conversion method f against the expected behavior generated by want
|
|
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
|
|
func test{{.Vec}}ConvertTo{{.OType}}(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Ovec}}, want func(x []{{.Type}}) []{{.Otype}}) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
g := make([]{{.Otype}}, n)
|
|
f(a).StoreSlice(g)
|
|
w := want(x)
|
|
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
|
|
})
|
|
}
|
|
`)
|
|
|
|
var unaryToInt32 = convertTemplate.target("int", 32)
|
|
var unaryToUint32 = convertTemplate.target("uint", 32)
|
|
var unaryToUint16 = convertTemplate.target("uint", 16)
|
|
|
|
var binaryTemplate = templateOf("binary_helpers", `
|
|
// test{{.Vec}}Binary tests the simd binary method f against the expected behavior generated by want
|
|
func test{{.Vec}}Binary(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.{{.Vec}}, want func(_, _ []{{.Type}}) []{{.Type}}) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSlicePair(t, {{.Type}}s, n, func(x, y []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
b := simd.Load{{.Vec}}Slice(y)
|
|
g := make([]{{.Type}}, n)
|
|
f(a, b).StoreSlice(g)
|
|
w := want(x, y)
|
|
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
|
|
})
|
|
}
|
|
`)
|
|
|
|
var ternaryTemplate = templateOf("ternary_helpers", `
|
|
// test{{.Vec}}Ternary tests the simd ternary method f against the expected behavior generated by want
|
|
func test{{.Vec}}Ternary(t *testing.T, f func(_, _, _ simd.{{.Vec}}) simd.{{.Vec}}, want func(_, _, _ []{{.Type}}) []{{.Type}}) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
b := simd.Load{{.Vec}}Slice(y)
|
|
c := simd.Load{{.Vec}}Slice(z)
|
|
g := make([]{{.Type}}, n)
|
|
f(a, b, c).StoreSlice(g)
|
|
w := want(x, y, z)
|
|
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
|
|
})
|
|
}
|
|
`)
|
|
|
|
var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", `
|
|
// test{{.Vec}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
|
|
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
|
|
func test{{.Vec}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.Vec}}) simd.{{.Vec}}, want func(x, y, z []{{.Type}}) []{{.Type}}, flakiness float64) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
b := simd.Load{{.Vec}}Slice(y)
|
|
c := simd.Load{{.Vec}}Slice(z)
|
|
g := make([]{{.Type}}, n)
|
|
f(a, b, c).StoreSlice(g)
|
|
w := want(x, y, z)
|
|
return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
|
|
})
|
|
}
|
|
`)
|
|
|
|
var compareTemplate = templateOf("compare_helpers", `
|
|
// test{{.Vec}}Compare tests the simd comparison method f against the expected behavior generated by want
|
|
func test{{.Vec}}Compare(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Type}}) []int64) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSlicePair(t, {{.Type}}s, n, func(x, y []{{.Type}}) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
b := simd.Load{{.Vec}}Slice(y)
|
|
g := make([]int{{.Width}}, n)
|
|
f(a, b).AsInt{{.WxC}}().StoreSlice(g)
|
|
w := want(x, y)
|
|
return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
|
|
})
|
|
}
|
|
`)
|
|
|
|
// TODO this has not been tested yet.
|
|
var compareMaskedTemplate = templateOf("comparemasked_helpers", `
|
|
// test{{.Vec}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
|
|
// The mask is applied to the output of want; anything not in the mask, is zeroed.
|
|
func test{{.Vec}}CompareMasked(t *testing.T,
|
|
f func(_, _ simd.{{.Vec}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}},
|
|
want func(_, _ []{{.Type}}) []int64) {
|
|
n := {{.Count}}
|
|
t.Helper()
|
|
forSlicePairMasked(t, {{.Type}}s, n, func(x, y []{{.Type}}, m []bool) bool {
|
|
t.Helper()
|
|
a := simd.Load{{.Vec}}Slice(x)
|
|
b := simd.Load{{.Vec}}Slice(y)
|
|
k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.Width}}](m)).ToMask()
|
|
g := make([]int{{.Width}}, n)
|
|
f(a, b, k).AsInt{{.WxC}}().StoreSlice(g)
|
|
w := want(x, y)
|
|
for i := range m {
|
|
if !m[i] {
|
|
w[i] = 0
|
|
}
|
|
}
|
|
return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("m=%v", m); })
|
|
})
|
|
}
|
|
`)
|
|
|
|
var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512Shapes, "avx 512 load slice part", `
|
|
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s.
|
|
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
|
|
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice.
|
|
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} {
|
|
l := len(s)
|
|
if l >= {{.Count}} {
|
|
return Load{{.Vec}}Slice(s)
|
|
}
|
|
if l == 0 {
|
|
var x {{.Vec}}
|
|
return x
|
|
}
|
|
mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l))
|
|
return LoadMasked{{.Vec}}(pa{{.Vec}}(s), mask)
|
|
}
|
|
|
|
// StoreSlicePart stores the {{.Count}} elements of x into the slice s.
|
|
// It stores as many elements as will fit in s.
|
|
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
|
|
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
|
l := len(s)
|
|
if l >= {{.Count}} {
|
|
x.StoreSlice(s)
|
|
return
|
|
}
|
|
if l == 0 {
|
|
return
|
|
}
|
|
mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l))
|
|
x.StoreMasked(pa{{.Vec}}(s), mask)
|
|
}
|
|
`)
|
|
|
|
var avx2MaskedLoadSlicePartTemplate = shapedTemplateOf(avx2MaskedLoadShapes, "avx 2 load slice part", `
|
|
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s.
|
|
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
|
|
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice.
|
|
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} {
|
|
l := len(s)
|
|
if l >= {{.Count}} {
|
|
return Load{{.Vec}}Slice(s)
|
|
}
|
|
if l == 0 {
|
|
var x {{.Vec}}
|
|
return x
|
|
}
|
|
mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:]
|
|
return LoadMasked{{.Vec}}(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).asMask())
|
|
}
|
|
|
|
// StoreSlicePart stores the {{.Count}} elements of x into the slice s.
|
|
// It stores as many elements as will fit in s.
|
|
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
|
|
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
|
l := len(s)
|
|
if l >= {{.Count}} {
|
|
x.StoreSlice(s)
|
|
return
|
|
}
|
|
if l == 0 {
|
|
return
|
|
}
|
|
mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:]
|
|
x.StoreMasked(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).asMask())
|
|
}
|
|
`)
|
|
|
|
var avx2SmallLoadSlicePartTemplate = shapedTemplateOf(avx2SmallLoadPunShapes, "avx 2 small load slice part", `
|
|
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s.
|
|
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
|
|
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice.
|
|
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} {
|
|
if len(s) == 0 {
|
|
var zero {{.Vec}}
|
|
return zero
|
|
}
|
|
t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s))
|
|
return LoadInt{{.WxC}}SlicePart(t).As{{.Vec}}()
|
|
}
|
|
|
|
// StoreSlicePart stores the {{.Count}} elements of x into the slice s.
|
|
// It stores as many elements as will fit in s.
|
|
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
|
|
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
|
if len(s) == 0 {
|
|
return
|
|
}
|
|
t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s))
|
|
x.AsInt{{.WxC}}().StoreSlicePart(t)
|
|
}
|
|
`)
|
|
|
|
func (t templateData) CPUfeature() string {
|
|
switch t.Vwidth {
|
|
case 128:
|
|
return "AVX"
|
|
case 256:
|
|
return "AVX2"
|
|
case 512:
|
|
return "AVX512"
|
|
}
|
|
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
|
|
}
|
|
|
|
var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", `
|
|
// Less returns a mask whose elements indicate whether x < y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeature}}
|
|
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
|
|
return y.Greater(x)
|
|
}
|
|
|
|
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeature}}
|
|
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
return y.Greater(x).AsInt{{.WxC}}().Xor(ones).asMask()
|
|
}
|
|
|
|
// LessEqual returns a mask whose elements indicate whether x <= y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeature}}
|
|
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
return x.Greater(y).AsInt{{.WxC}}().Xor(ones).asMask()
|
|
}
|
|
|
|
// NotEqual returns a mask whose elements indicate whether x != y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeature}}
|
|
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
return x.Equal(y).AsInt{{.WxC}}().Xor(ones).asMask()
|
|
}
|
|
`)
|
|
|
|
// CPUfeatureAVX2if8 return AVX2 if the element width is 8,
|
|
// otherwise, it returns CPUfeature. This is for the cpufeature
|
|
// of unsigned comparison emulation, which uses shifts for all
|
|
// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2)
|
|
// for bytes.
|
|
func (t templateData) CPUfeatureAVX2if8() string {
|
|
if t.Width == 8 {
|
|
return "AVX2"
|
|
}
|
|
return t.CPUfeature()
|
|
}
|
|
|
|
var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", `
|
|
// Greater returns a mask whose elements indicate whether x > y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
|
func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} {
|
|
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
|
{{- if eq .Width 8}}
|
|
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
|
{{- else}}
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
signs := ones.ShiftAllLeft({{.Width}}-1)
|
|
{{- end }}
|
|
return a.Xor(signs).Greater(b.Xor(signs))
|
|
}
|
|
|
|
// Less returns a mask whose elements indicate whether x < y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
|
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
|
|
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
|
{{- if eq .Width 8}}
|
|
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
|
{{- else}}
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
signs := ones.ShiftAllLeft({{.Width}}-1)
|
|
{{- end }}
|
|
return b.Xor(signs).Greater(a.Xor(signs))
|
|
}
|
|
|
|
// GreaterEqual returns a mask whose elements indicate whether x >= y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
|
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
|
|
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
{{- if eq .Width 8}}
|
|
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
|
{{- else}}
|
|
signs := ones.ShiftAllLeft({{.Width}}-1)
|
|
{{- end }}
|
|
return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask()
|
|
}
|
|
|
|
// LessEqual returns a mask whose elements indicate whether x <= y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
|
|
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
|
|
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
{{- if eq .Width 8}}
|
|
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
|
|
{{- else}}
|
|
signs := ones.ShiftAllLeft({{.Width}}-1)
|
|
{{- end }}
|
|
return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask()
|
|
}
|
|
|
|
// NotEqual returns a mask whose elements indicate whether x != y
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeature}}
|
|
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
|
|
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
|
|
ones := x.Equal(x).AsInt{{.WxC}}()
|
|
return a.Equal(b).AsInt{{.WxC}}().Xor(ones).asMask()
|
|
}
|
|
`)
|
|
|
|
var unsafePATemplate = templateOf("unsafe PA helper", `
|
|
// pa{{.Vec}} returns a type-unsafe pointer to array that can
|
|
// only be used with partial load/store operations that only
|
|
// access the known-safe portions of the array.
|
|
func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} {
|
|
return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0]))
|
|
}
|
|
`)
|
|
|
|
var avx2MaskedTemplate = shapedTemplateOf(avx2Shapes, "avx2 .Masked methods", `
|
|
// Masked returns x but with elements zeroed where mask is false.
|
|
func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} {
|
|
im := mask.AsInt{{.WxC}}()
|
|
{{- if eq .Base "Int" }}
|
|
return im.And(x)
|
|
{{- else}}
|
|
return x.AsInt{{.WxC}}().And(im).As{{.Vec}}()
|
|
{{- end -}}
|
|
}
|
|
|
|
// Merge returns x but with elements set to y where mask is false.
|
|
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
|
|
{{- if eq .BxC .WxC -}}
|
|
im := mask.AsInt{{.BxC}}()
|
|
{{- else}}
|
|
im := mask.AsInt{{.WxC}}().AsInt{{.BxC}}()
|
|
{{- end -}}
|
|
{{- if and (eq .Base "Int") (eq .BxC .WxC) }}
|
|
return y.blend(x, im)
|
|
{{- else}}
|
|
ix := x.AsInt{{.BxC}}()
|
|
iy := y.AsInt{{.BxC}}()
|
|
return iy.blend(ix, im).As{{.Vec}}()
|
|
{{- end -}}
|
|
}
|
|
`)
|
|
|
|
// TODO perhaps write these in ways that work better on AVX512
|
|
var avx512MaskedTemplate = shapedTemplateOf(avx512Shapes, "avx512 .Masked methods", `
|
|
// Masked returns x but with elements zeroed where mask is false.
|
|
func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} {
|
|
im := mask.AsInt{{.WxC}}()
|
|
{{- if eq .Base "Int" }}
|
|
return im.And(x)
|
|
{{- else}}
|
|
return x.AsInt{{.WxC}}().And(im).As{{.Vec}}()
|
|
{{- end -}}
|
|
}
|
|
|
|
// Merge returns x but with elements set to y where m is false.
|
|
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
|
|
{{- if eq .Base "Int" }}
|
|
return y.blendMasked(x, mask)
|
|
{{- else}}
|
|
ix := x.AsInt{{.WxC}}()
|
|
iy := y.AsInt{{.WxC}}()
|
|
return iy.blendMasked(ix, mask).As{{.Vec}}()
|
|
{{- end -}}
|
|
}
|
|
`)
|
|
|
|
func (t templateData) CPUfeatureBC() string {
|
|
switch t.Vwidth {
|
|
case 128:
|
|
return "AVX2"
|
|
case 256:
|
|
return "AVX2"
|
|
case 512:
|
|
if t.Width <= 16 {
|
|
return "AVX512BW"
|
|
}
|
|
return "AVX512F"
|
|
}
|
|
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
|
|
}
|
|
|
|
var broadcastTemplate = templateOf("Broadcast functions", `
|
|
// Broadcast{{.Vec}} returns a vector with the input
|
|
// x assigned to all elements of the output.
|
|
//
|
|
// Emulated, CPU Feature {{.CPUfeatureBC}}
|
|
func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} {
|
|
var z {{.As128BitVec }}
|
|
return z.SetElem(0, x).Broadcast{{.Vwidth}}()
|
|
}
|
|
`)
|
|
|
|
var maskCvtTemplate = templateOf("Mask conversions", `
|
|
// ToMask converts from {{.Base}}{{.WxC}} to Mask{{.WxC}}, mask element is set to true when the corresponding vector element is non-zero.
|
|
func (from {{.Base}}{{.WxC}}) ToMask() (to Mask{{.WxC}}) {
|
|
return from.NotEqual({{.Base}}{{.WxC}}{})
|
|
}
|
|
`)
|
|
|
|
func main() {
|
|
sl := flag.String("sl", "slice_gen_amd64.go", "file name for slice operations")
|
|
cm := flag.String("cm", "compare_gen_amd64.go", "file name for comparison operations")
|
|
mm := flag.String("mm", "maskmerge_gen_amd64.go", "file name for mask/merge operations")
|
|
op := flag.String("op", "other_gen_amd64.go", "file name for other operations")
|
|
ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers")
|
|
bh := flag.String("bh", "binary_helpers_test.go", "file name for binary test helpers")
|
|
uh := flag.String("uh", "unary_helpers_test.go", "file name for unary test helpers")
|
|
th := flag.String("th", "ternary_helpers_test.go", "file name for ternary test helpers")
|
|
ch := flag.String("ch", "compare_helpers_test.go", "file name for compare test helpers")
|
|
cmh := flag.String("cmh", "comparemasked_helpers_test.go", "file name for compare-masked test helpers")
|
|
flag.Parse()
|
|
|
|
if *sl != "" {
|
|
one(*sl, unsafePrologue,
|
|
sliceTemplate,
|
|
avx512MaskedLoadSlicePartTemplate,
|
|
avx2MaskedLoadSlicePartTemplate,
|
|
avx2SmallLoadSlicePartTemplate,
|
|
)
|
|
}
|
|
if *cm != "" {
|
|
one(*cm, prologue,
|
|
avx2SignedComparisonsTemplate,
|
|
avx2UnsignedComparisonsTemplate,
|
|
)
|
|
}
|
|
if *mm != "" {
|
|
one(*mm, prologue,
|
|
avx2MaskedTemplate,
|
|
avx512MaskedTemplate,
|
|
)
|
|
}
|
|
if *op != "" {
|
|
one(*op, prologue,
|
|
broadcastTemplate,
|
|
maskCvtTemplate,
|
|
)
|
|
}
|
|
if *ush != "" {
|
|
one(*ush, unsafePrologue, unsafePATemplate)
|
|
}
|
|
if *uh != "" {
|
|
one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, unaryToInt32, unaryToUint32, unaryToUint16, unaryFlakyTemplate)
|
|
}
|
|
if *bh != "" {
|
|
one(*bh, curryTestPrologue("binary simd methods"), binaryTemplate)
|
|
}
|
|
if *th != "" {
|
|
one(*th, curryTestPrologue("ternary simd methods"), ternaryTemplate, ternaryFlakyTemplate)
|
|
}
|
|
if *ch != "" {
|
|
one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate)
|
|
}
|
|
if *cmh != "" {
|
|
one(*cmh, curryTestPrologue("simd methods that compare two operands under a mask"), compareMaskedTemplate)
|
|
}
|
|
}
|
|
|
|
// numberLines takes a slice of bytes, and returns a string where each line
|
|
// is numbered, starting from 1.
|
|
func numberLines(data []byte) string {
|
|
var buf bytes.Buffer
|
|
r := bytes.NewReader(data)
|
|
s := bufio.NewScanner(r)
|
|
for i := 1; s.Scan(); i++ {
|
|
fmt.Fprintf(&buf, "%d: %s\n", i, s.Text())
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
func one(filename string, prologue func(s string, out io.Writer), sats ...shapeAndTemplate) {
|
|
if filename == "" {
|
|
return
|
|
}
|
|
|
|
ofile := os.Stdout
|
|
|
|
if filename != "-" {
|
|
var err error
|
|
ofile, err = os.Create(filename)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Could not create the output file %s for the generated code, %v", filename, err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
out := new(bytes.Buffer)
|
|
|
|
prologue("go run genfiles.go", out)
|
|
for _, sat := range sats {
|
|
sat.forTemplates(out)
|
|
}
|
|
|
|
b, err := format.Source(out.Bytes())
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v\n", filename, err)
|
|
fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes()))
|
|
fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v\n", filename, err)
|
|
os.Exit(1)
|
|
} else {
|
|
ofile.Write(b)
|
|
ofile.Close()
|
|
}
|
|
|
|
}
|