[dev.simd] simd: template field name cleanup in genfiles

things were getting a little too ad hoc

Change-Id: I4298002ae57f5b75159703ceed30a117804eb844
Reviewed-on: https://go-review.googlesource.com/c/go/+/697495
Commit-Queue: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
TryBot-Bypass: David Chase <drchase@google.com>
This commit is contained in:
David Chase 2025-08-19 16:17:58 -04:00
parent af6475df73
commit 1334285862

View file

@ -113,25 +113,25 @@ var avx2UnsignedComparisons = &shapes{
} }
type templateData struct { type templateData struct {
Vec string // the type of the vector, e.g. Float32x4 VType string // the type of the vector, e.g. Float32x4
AOrAn string // for documentation, the article "a" or "an" AOrAn string // for documentation, the article "a" or "an"
Width int // the bit width of the element type, e.g. 32 EWidth int // the bit width of the element type, e.g. 32
Vwidth int // the width of the vector type, e.g. 128 Vwidth int // the width of the vector type, e.g. 128
Count int // the number of elements, e.g. 4 Count int // the number of elements, e.g. 4
WxC string // the width-by-type string, e.g., "32x4" WxC string // the width-by-type string, e.g., "32x4"
BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8) BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
Base string // the capitalized Base Type of the vector, e.g., "Float" Base string // the title-case Base Type of the vector, e.g., "Float"
Type string // the element type, e.g. "float32" Etype string // the element type, e.g. "float32"
OxFF string // a mask for the lowest 'count' bits OxFF string // a mask for the lowest 'count' bits
Ovec string OVType string // type of output vector
Otype string OEtype string // output element type
OType string OEType string // output element type, title-case
Ocount int OCount int // output element count
} }
func (t templateData) As128BitVec() string { func (t templateData) As128BitVec() string {
return fmt.Sprintf("%s%dx%d", t.Base, t.Width, 128/t.Width) return fmt.Sprintf("%s%dx%d", t.Base, t.EWidth, 128/t.EWidth)
} }
func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer, rtf resultTypeFunc) { func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer, rtf resultTypeFunc) {
@ -167,20 +167,20 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io
} }
oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1)) oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1))
t.Execute(out, templateData{ t.Execute(out, templateData{
Vec: vType, VType: vType,
AOrAn: aOrAn, AOrAn: aOrAn,
Width: width, EWidth: width,
Vwidth: b, Vwidth: b,
Count: count, Count: count,
WxC: wxc, WxC: wxc,
BxC: bxc, BxC: bxc,
Base: BaseType, Base: BaseType,
Type: eType, Etype: eType,
OxFF: oxFF, OxFF: oxFF,
Ovec: ovType, OVType: ovType,
Otype: oeType, OEtype: oeType,
Ocount: oc, OCount: oc,
OType: oEType, OEType: oEType,
}) })
} }
@ -268,26 +268,26 @@ func shapedTemplateOf(s *shapes, name, temp string) shapeAndTemplate {
} }
var sliceTemplate = templateOf("slice", ` var sliceTemplate = templateOf("slice", `
// Load{{.Vec}}Slice loads {{.AOrAn}} {{.Vec}} from a slice of at least {{.Count}} {{.Type}}s // Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s
func Load{{.Vec}}Slice(s []{{.Type}}) {{.Vec}} { func Load{{.VType}}Slice(s []{{.Etype}}) {{.VType}} {
return Load{{.Vec}}((*[{{.Count}}]{{.Type}})(s)) return Load{{.VType}}((*[{{.Count}}]{{.Etype}})(s))
} }
// StoreSlice stores x into a slice of at least {{.Count}} {{.Type}}s // StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s
func (x {{.Vec}}) StoreSlice(s []{{.Type}}) { func (x {{.VType}}) StoreSlice(s []{{.Etype}}) {
x.Store((*[{{.Count}}]{{.Type}})(s)) x.Store((*[{{.Count}}]{{.Etype}})(s))
} }
`) `)
var unaryTemplate = templateOf("unary_helpers", ` var unaryTemplate = templateOf("unary_helpers", `
// test{{.Vec}}Unary tests the simd unary method f against the expected behavior generated by want // test{{.VType}}Unary tests the simd unary method f against the expected behavior generated by want
func test{{.Vec}}Unary(t *testing.T, f func(_ simd.{{.Vec}}) simd.{{.Vec}}, want func(_ []{{.Type}}) []{{.Type}}) { func test{{.VType}}Unary(t *testing.T, f func(_ simd.{{.VType}}) simd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
g := make([]{{.Type}}, n) g := make([]{{.Etype}}, n)
f(a).StoreSlice(g) f(a).StoreSlice(g)
w := want(x) w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
@ -296,15 +296,15 @@ func test{{.Vec}}Unary(t *testing.T, f func(_ simd.{{.Vec}}) simd.{{.Vec}}, want
`) `)
var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", ` var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", `
// test{{.Vec}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // test{{.VType}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works // but using a flakiness parameter because we haven't exactly figured out how simd floating point works
func test{{.Vec}}UnaryFlaky(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Vec}}, want func(x []{{.Type}}) []{{.Type}}, flakiness float64) { func test{{.VType}}UnaryFlaky(t *testing.T, f func(x simd.{{.VType}}) simd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
g := make([]{{.Type}}, n) g := make([]{{.Etype}}, n)
f(a).StoreSlice(g) f(a).StoreSlice(g)
w := want(x) w := want(x)
return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x)}) return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x)})
@ -313,15 +313,15 @@ func test{{.Vec}}UnaryFlaky(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Vec}},
`) `)
var convertTemplate = templateOf("convert_helpers", ` var convertTemplate = templateOf("convert_helpers", `
// test{{.Vec}}ConvertTo{{.OType}} tests the simd conversion method f against the expected behavior generated by want // test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
func test{{.Vec}}ConvertTo{{.OType}}(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Ovec}}, want func(x []{{.Type}}) []{{.Otype}}) { func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x simd.{{.VType}}) simd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
g := make([]{{.Otype}}, n) g := make([]{{.OEtype}}, n)
f(a).StoreSlice(g) f(a).StoreSlice(g)
w := want(x) w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
@ -334,15 +334,15 @@ var unaryToUint32 = convertTemplate.target("uint", 32)
var unaryToUint16 = convertTemplate.target("uint", 16) var unaryToUint16 = convertTemplate.target("uint", 16)
var binaryTemplate = templateOf("binary_helpers", ` var binaryTemplate = templateOf("binary_helpers", `
// test{{.Vec}}Binary tests the simd binary method f against the expected behavior generated by want // test{{.VType}}Binary tests the simd binary method f against the expected behavior generated by want
func test{{.Vec}}Binary(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.{{.Vec}}, want func(_, _ []{{.Type}}) []{{.Type}}) { func test{{.VType}}Binary(t *testing.T, f func(_, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSlicePair(t, {{.Type}}s, n, func(x, y []{{.Type}}) bool { forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.Vec}}Slice(y) b := simd.Load{{.VType}}Slice(y)
g := make([]{{.Type}}, n) g := make([]{{.Etype}}, n)
f(a, b).StoreSlice(g) f(a, b).StoreSlice(g)
w := want(x, y) w := want(x, y)
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); }) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
@ -351,16 +351,16 @@ func test{{.Vec}}Binary(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.{{.Vec}},
`) `)
var ternaryTemplate = templateOf("ternary_helpers", ` var ternaryTemplate = templateOf("ternary_helpers", `
// test{{.Vec}}Ternary tests the simd ternary method f against the expected behavior generated by want // test{{.VType}}Ternary tests the simd ternary method f against the expected behavior generated by want
func test{{.Vec}}Ternary(t *testing.T, f func(_, _, _ simd.{{.Vec}}) simd.{{.Vec}}, want func(_, _, _ []{{.Type}}) []{{.Type}}) { func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool { forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.Vec}}Slice(y) b := simd.Load{{.VType}}Slice(y)
c := simd.Load{{.Vec}}Slice(z) c := simd.Load{{.VType}}Slice(z)
g := make([]{{.Type}}, n) g := make([]{{.Etype}}, n)
f(a, b, c).StoreSlice(g) f(a, b, c).StoreSlice(g)
w := want(x, y, z) w := want(x, y, z)
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); }) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
@ -369,17 +369,17 @@ func test{{.Vec}}Ternary(t *testing.T, f func(_, _, _ simd.{{.Vec}}) simd.{{.Vec
`) `)
var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", ` var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", `
// test{{.Vec}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want, // test{{.VType}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works // but using a flakiness parameter because we haven't exactly figured out how simd floating point works
func test{{.Vec}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.Vec}}) simd.{{.Vec}}, want func(x, y, z []{{.Type}}) []{{.Type}}, flakiness float64) { func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.VType}}) simd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool { forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.Vec}}Slice(y) b := simd.Load{{.VType}}Slice(y)
c := simd.Load{{.Vec}}Slice(z) c := simd.Load{{.VType}}Slice(z)
g := make([]{{.Type}}, n) g := make([]{{.Etype}}, n)
f(a, b, c).StoreSlice(g) f(a, b, c).StoreSlice(g)
w := want(x, y, z) w := want(x, y, z)
return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); }) return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); })
@ -388,15 +388,15 @@ func test{{.Vec}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.Vec}}) simd.{
`) `)
var compareTemplate = templateOf("compare_helpers", ` var compareTemplate = templateOf("compare_helpers", `
// test{{.Vec}}Compare tests the simd comparison method f against the expected behavior generated by want // test{{.VType}}Compare tests the simd comparison method f against the expected behavior generated by want
func test{{.Vec}}Compare(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Type}}) []int64) { func test{{.VType}}Compare(t *testing.T, f func(_, _ simd.{{.VType}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSlicePair(t, {{.Type}}s, n, func(x, y []{{.Type}}) bool { forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.Vec}}Slice(y) b := simd.Load{{.VType}}Slice(y)
g := make([]int{{.Width}}, n) g := make([]int{{.EWidth}}, n)
f(a, b).AsInt{{.WxC}}().StoreSlice(g) f(a, b).AsInt{{.WxC}}().StoreSlice(g)
w := want(x, y) w := want(x, y)
return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); }) return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); })
@ -406,19 +406,19 @@ func test{{.Vec}}Compare(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.Mask{{.Wx
// TODO this has not been tested yet. // TODO this has not been tested yet.
var compareMaskedTemplate = templateOf("comparemasked_helpers", ` var compareMaskedTemplate = templateOf("comparemasked_helpers", `
// test{{.Vec}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // test{{.VType}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed. // The mask is applied to the output of want; anything not in the mask, is zeroed.
func test{{.Vec}}CompareMasked(t *testing.T, func test{{.VType}}CompareMasked(t *testing.T,
f func(_, _ simd.{{.Vec}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}}, f func(_, _ simd.{{.VType}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}},
want func(_, _ []{{.Type}}) []int64) { want func(_, _ []{{.Etype}}) []int64) {
n := {{.Count}} n := {{.Count}}
t.Helper() t.Helper()
forSlicePairMasked(t, {{.Type}}s, n, func(x, y []{{.Type}}, m []bool) bool { forSlicePairMasked(t, {{.Etype}}s, n, func(x, y []{{.Etype}}, m []bool) bool {
t.Helper() t.Helper()
a := simd.Load{{.Vec}}Slice(x) a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.Vec}}Slice(y) b := simd.Load{{.VType}}Slice(y)
k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.Width}}](m)).ToMask() k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask()
g := make([]int{{.Width}}, n) g := make([]int{{.EWidth}}, n)
f(a, b, k).AsInt{{.WxC}}().StoreSlice(g) f(a, b, k).AsInt{{.WxC}}().StoreSlice(g)
w := want(x, y) w := want(x, y)
for i := range m { for i := range m {
@ -432,26 +432,26 @@ func test{{.Vec}}CompareMasked(t *testing.T,
`) `)
var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512Shapes, "avx 512 load slice part", ` var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512Shapes, "avx 512 load slice part", `
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // Load{{.VType}}SlicePart loads a {{.VType}} from the slice s.
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.VType}}Slice.
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { func Load{{.VType}}SlicePart(s []{{.Etype}}) {{.VType}} {
l := len(s) l := len(s)
if l >= {{.Count}} { if l >= {{.Count}} {
return Load{{.Vec}}Slice(s) return Load{{.VType}}Slice(s)
} }
if l == 0 { if l == 0 {
var x {{.Vec}} var x {{.VType}}
return x return x
} }
mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l)) mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l))
return LoadMasked{{.Vec}}(pa{{.Vec}}(s), mask) return LoadMasked{{.VType}}(pa{{.VType}}(s), mask)
} }
// StoreSlicePart stores the {{.Count}} elements of x into the slice s. // StoreSlicePart stores the {{.Count}} elements of x into the slice s.
// It stores as many elements as will fit in s. // It stores as many elements as will fit in s.
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice. // If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { func (x {{.VType}}) StoreSlicePart(s []{{.Etype}}) {
l := len(s) l := len(s)
if l >= {{.Count}} { if l >= {{.Count}} {
x.StoreSlice(s) x.StoreSlice(s)
@ -461,31 +461,31 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
return return
} }
mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l)) mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l))
x.StoreMasked(pa{{.Vec}}(s), mask) x.StoreMasked(pa{{.VType}}(s), mask)
} }
`) `)
var avx2MaskedLoadSlicePartTemplate = shapedTemplateOf(avx2MaskedLoadShapes, "avx 2 load slice part", ` var avx2MaskedLoadSlicePartTemplate = shapedTemplateOf(avx2MaskedLoadShapes, "avx 2 load slice part", `
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // Load{{.VType}}SlicePart loads a {{.VType}} from the slice s.
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.VType}}Slice.
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { func Load{{.VType}}SlicePart(s []{{.Etype}}) {{.VType}} {
l := len(s) l := len(s)
if l >= {{.Count}} { if l >= {{.Count}} {
return Load{{.Vec}}Slice(s) return Load{{.VType}}Slice(s)
} }
if l == 0 { if l == 0 {
var x {{.Vec}} var x {{.VType}}
return x return x
} }
mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:] mask := vecMask{{.EWidth}}[len(vecMask{{.EWidth}})/2-l:]
return LoadMasked{{.Vec}}(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).asMask()) return LoadMasked{{.VType}}(pa{{.VType}}(s), LoadInt{{.WxC}}Slice(mask).asMask())
} }
// StoreSlicePart stores the {{.Count}} elements of x into the slice s. // StoreSlicePart stores the {{.Count}} elements of x into the slice s.
// It stores as many elements as will fit in s. // It stores as many elements as will fit in s.
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice. // If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { func (x {{.VType}}) StoreSlicePart(s []{{.Etype}}) {
l := len(s) l := len(s)
if l >= {{.Count}} { if l >= {{.Count}} {
x.StoreSlice(s) x.StoreSlice(s)
@ -494,32 +494,32 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
if l == 0 { if l == 0 {
return return
} }
mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:] mask := vecMask{{.EWidth}}[len(vecMask{{.EWidth}})/2-l:]
x.StoreMasked(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).asMask()) x.StoreMasked(pa{{.VType}}(s), LoadInt{{.WxC}}Slice(mask).asMask())
} }
`) `)
var avx2SmallLoadSlicePartTemplate = shapedTemplateOf(avx2SmallLoadPunShapes, "avx 2 small load slice part", ` var avx2SmallLoadSlicePartTemplate = shapedTemplateOf(avx2SmallLoadPunShapes, "avx 2 small load slice part", `
// Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // Load{{.VType}}SlicePart loads a {{.VType}} from the slice s.
// If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes.
// If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.VType}}Slice.
func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { func Load{{.VType}}SlicePart(s []{{.Etype}}) {{.VType}} {
if len(s) == 0 { if len(s) == 0 {
var zero {{.Vec}} var zero {{.VType}}
return zero return zero
} }
t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s)) t := unsafe.Slice((*int{{.EWidth}})(unsafe.Pointer(&s[0])), len(s))
return LoadInt{{.WxC}}SlicePart(t).As{{.Vec}}() return LoadInt{{.WxC}}SlicePart(t).As{{.VType}}()
} }
// StoreSlicePart stores the {{.Count}} elements of x into the slice s. // StoreSlicePart stores the {{.Count}} elements of x into the slice s.
// It stores as many elements as will fit in s. // It stores as many elements as will fit in s.
// If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice. // If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice.
func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { func (x {{.VType}}) StoreSlicePart(s []{{.Etype}}) {
if len(s) == 0 { if len(s) == 0 {
return return
} }
t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s)) t := unsafe.Slice((*int{{.EWidth}})(unsafe.Pointer(&s[0])), len(s))
x.AsInt{{.WxC}}().StoreSlicePart(t) x.AsInt{{.WxC}}().StoreSlicePart(t)
} }
`) `)
@ -540,14 +540,14 @@ var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx
// Less returns a mask whose elements indicate whether x < y // Less returns a mask whose elements indicate whether x < y
// //
// Emulated, CPU Feature {{.CPUfeature}} // Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} {
return y.Greater(x) return y.Greater(x)
} }
// GreaterEqual returns a mask whose elements indicate whether x >= y // GreaterEqual returns a mask whose elements indicate whether x >= y
// //
// Emulated, CPU Feature {{.CPUfeature}} // Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
return y.Greater(x).AsInt{{.WxC}}().Xor(ones).asMask() return y.Greater(x).AsInt{{.WxC}}().Xor(ones).asMask()
} }
@ -555,7 +555,7 @@ func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
// LessEqual returns a mask whose elements indicate whether x <= y // LessEqual returns a mask whose elements indicate whether x <= y
// //
// Emulated, CPU Feature {{.CPUfeature}} // Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
return x.Greater(y).AsInt{{.WxC}}().Xor(ones).asMask() return x.Greater(y).AsInt{{.WxC}}().Xor(ones).asMask()
} }
@ -563,7 +563,7 @@ func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
// NotEqual returns a mask whose elements indicate whether x != y // NotEqual returns a mask whose elements indicate whether x != y
// //
// Emulated, CPU Feature {{.CPUfeature}} // Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} {
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
return x.Equal(y).AsInt{{.WxC}}().Xor(ones).asMask() return x.Equal(y).AsInt{{.WxC}}().Xor(ones).asMask()
} }
@ -575,7 +575,7 @@ func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2) // the sizes > 8 (shifts are AVX) but must use broadcast (AVX2)
// for bytes. // for bytes.
func (t templateData) CPUfeatureAVX2if8() string { func (t templateData) CPUfeatureAVX2if8() string {
if t.Width == 8 { if t.EWidth == 8 {
return "AVX2" return "AVX2"
} }
return t.CPUfeature() return t.CPUfeature()
@ -585,13 +585,13 @@ var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons,
// Greater returns a mask whose elements indicate whether x > y // Greater returns a mask whose elements indicate whether x > y
// //
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} // Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .Width 8}} {{- if eq .EWidth 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) signs := BroadcastInt{{.WxC}}(-1 << ({{.EWidth}}-1))
{{- else}} {{- else}}
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
signs := ones.ShiftAllLeft({{.Width}}-1) signs := ones.ShiftAllLeft({{.EWidth}}-1)
{{- end }} {{- end }}
return a.Xor(signs).Greater(b.Xor(signs)) return a.Xor(signs).Greater(b.Xor(signs))
} }
@ -599,13 +599,13 @@ func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} {
// Less returns a mask whose elements indicate whether x < y // Less returns a mask whose elements indicate whether x < y
// //
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} // Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .Width 8}} {{- if eq .EWidth 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) signs := BroadcastInt{{.WxC}}(-1 << ({{.EWidth}}-1))
{{- else}} {{- else}}
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
signs := ones.ShiftAllLeft({{.Width}}-1) signs := ones.ShiftAllLeft({{.EWidth}}-1)
{{- end }} {{- end }}
return b.Xor(signs).Greater(a.Xor(signs)) return b.Xor(signs).Greater(a.Xor(signs))
} }
@ -613,13 +613,13 @@ func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
// GreaterEqual returns a mask whose elements indicate whether x >= y // GreaterEqual returns a mask whose elements indicate whether x >= y
// //
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} // Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
{{- if eq .Width 8}} {{- if eq .EWidth 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) signs := BroadcastInt{{.WxC}}(-1 << ({{.EWidth}}-1))
{{- else}} {{- else}}
signs := ones.ShiftAllLeft({{.Width}}-1) signs := ones.ShiftAllLeft({{.EWidth}}-1)
{{- end }} {{- end }}
return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask() return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask()
} }
@ -627,13 +627,13 @@ func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
// LessEqual returns a mask whose elements indicate whether x <= y // LessEqual returns a mask whose elements indicate whether x <= y
// //
// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} // Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
{{- if eq .Width 8}} {{- if eq .EWidth 8}}
signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) signs := BroadcastInt{{.WxC}}(-1 << ({{.EWidth}}-1))
{{- else}} {{- else}}
signs := ones.ShiftAllLeft({{.Width}}-1) signs := ones.ShiftAllLeft({{.EWidth}}-1)
{{- end }} {{- end }}
return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask() return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).asMask()
} }
@ -641,7 +641,7 @@ func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
// NotEqual returns a mask whose elements indicate whether x != y // NotEqual returns a mask whose elements indicate whether x != y
// //
// Emulated, CPU Feature {{.CPUfeature}} // Emulated, CPU Feature {{.CPUfeature}}
func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}()
return a.Equal(b).AsInt{{.WxC}}().Xor(ones).asMask() return a.Equal(b).AsInt{{.WxC}}().Xor(ones).asMask()
@ -649,27 +649,27 @@ func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
`) `)
var unsafePATemplate = templateOf("unsafe PA helper", ` var unsafePATemplate = templateOf("unsafe PA helper", `
// pa{{.Vec}} returns a type-unsafe pointer to array that can // pa{{.VType}} returns a type-unsafe pointer to array that can
// only be used with partial load/store operations that only // only be used with partial load/store operations that only
// access the known-safe portions of the array. // access the known-safe portions of the array.
func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} { func pa{{.VType}}(s []{{.Etype}}) *[{{.Count}}]{{.Etype}} {
return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0])) return (*[{{.Count}}]{{.Etype}})(unsafe.Pointer(&s[0]))
} }
`) `)
var avx2MaskedTemplate = shapedTemplateOf(avx2Shapes, "avx2 .Masked methods", ` var avx2MaskedTemplate = shapedTemplateOf(avx2Shapes, "avx2 .Masked methods", `
// Masked returns x but with elements zeroed where mask is false. // Masked returns x but with elements zeroed where mask is false.
func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} { func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} {
im := mask.AsInt{{.WxC}}() im := mask.AsInt{{.WxC}}()
{{- if eq .Base "Int" }} {{- if eq .Base "Int" }}
return im.And(x) return im.And(x)
{{- else}} {{- else}}
return x.AsInt{{.WxC}}().And(im).As{{.Vec}}() return x.AsInt{{.WxC}}().And(im).As{{.VType}}()
{{- end -}} {{- end -}}
} }
// Merge returns x but with elements set to y where mask is false. // Merge returns x but with elements set to y where mask is false.
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} { func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} {
{{- if eq .BxC .WxC -}} {{- if eq .BxC .WxC -}}
im := mask.AsInt{{.BxC}}() im := mask.AsInt{{.BxC}}()
{{- else}} {{- else}}
@ -680,7 +680,7 @@ func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
{{- else}} {{- else}}
ix := x.AsInt{{.BxC}}() ix := x.AsInt{{.BxC}}()
iy := y.AsInt{{.BxC}}() iy := y.AsInt{{.BxC}}()
return iy.blend(ix, im).As{{.Vec}}() return iy.blend(ix, im).As{{.VType}}()
{{- end -}} {{- end -}}
} }
`) `)
@ -688,23 +688,23 @@ func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
// TODO perhaps write these in ways that work better on AVX512 // TODO perhaps write these in ways that work better on AVX512
var avx512MaskedTemplate = shapedTemplateOf(avx512Shapes, "avx512 .Masked methods", ` var avx512MaskedTemplate = shapedTemplateOf(avx512Shapes, "avx512 .Masked methods", `
// Masked returns x but with elements zeroed where mask is false. // Masked returns x but with elements zeroed where mask is false.
func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} { func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} {
im := mask.AsInt{{.WxC}}() im := mask.AsInt{{.WxC}}()
{{- if eq .Base "Int" }} {{- if eq .Base "Int" }}
return im.And(x) return im.And(x)
{{- else}} {{- else}}
return x.AsInt{{.WxC}}().And(im).As{{.Vec}}() return x.AsInt{{.WxC}}().And(im).As{{.VType}}()
{{- end -}} {{- end -}}
} }
// Merge returns x but with elements set to y where m is false. // Merge returns x but with elements set to y where m is false.
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} { func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} {
{{- if eq .Base "Int" }} {{- if eq .Base "Int" }}
return y.blendMasked(x, mask) return y.blendMasked(x, mask)
{{- else}} {{- else}}
ix := x.AsInt{{.WxC}}() ix := x.AsInt{{.WxC}}()
iy := y.AsInt{{.WxC}}() iy := y.AsInt{{.WxC}}()
return iy.blendMasked(ix, mask).As{{.Vec}}() return iy.blendMasked(ix, mask).As{{.VType}}()
{{- end -}} {{- end -}}
} }
`) `)
@ -716,7 +716,7 @@ func (t templateData) CPUfeatureBC() string {
case 256: case 256:
return "AVX2" return "AVX2"
case 512: case 512:
if t.Width <= 16 { if t.EWidth <= 16 {
return "AVX512BW" return "AVX512BW"
} }
return "AVX512F" return "AVX512F"
@ -725,11 +725,11 @@ func (t templateData) CPUfeatureBC() string {
} }
var broadcastTemplate = templateOf("Broadcast functions", ` var broadcastTemplate = templateOf("Broadcast functions", `
// Broadcast{{.Vec}} returns a vector with the input // Broadcast{{.VType}} returns a vector with the input
// x assigned to all elements of the output. // x assigned to all elements of the output.
// //
// Emulated, CPU Feature {{.CPUfeatureBC}} // Emulated, CPU Feature {{.CPUfeatureBC}}
func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} { func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
var z {{.As128BitVec }} var z {{.As128BitVec }}
return z.SetElem(0, x).Broadcast{{.Vwidth}}() return z.SetElem(0, x).Broadcast{{.Vwidth}}()
} }