// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build ignore package main // this generates type-instantiated boilerplate code for // slice operations and tests import ( "bufio" "bytes" "flag" "fmt" "go/format" "io" "os" "strings" "text/template" ) // shapes describes a combination of vector widths and various element types type shapes struct { vecs []int // Vector bit width for this shape. ints []int // Int element bit width(s) for this shape uints []int // Unsigned int element bit width(s) for this shape floats []int // Float element bit width(s) for this shape } // shapeAndTemplate is a template and the set of shapes on which it will be expanded type shapeAndTemplate struct { s *shapes t *template.Template } var allShapes = &shapes{ vecs: []int{128, 256, 512}, ints: []int{8, 16, 32, 64}, uints: []int{8, 16, 32, 64}, floats: []int{32, 64}, } // these are the shapes that are currently converted to int32 // (not all conversions are available, yet) var convert32Shapes = &shapes{ vecs: []int{128, 256, 512}, floats: []int{32}, } var avx512Shapes = &shapes{ vecs: []int{512}, ints: []int{8, 16, 32, 64}, uints: []int{8, 16, 32, 64}, floats: []int{32, 64}, } var avx2Shapes = &shapes{ vecs: []int{128, 256}, ints: []int{8, 16, 32, 64}, uints: []int{8, 16, 32, 64}, floats: []int{32, 64}, } var avx2MaskedLoadShapes = &shapes{ vecs: []int{128, 256}, ints: []int{32, 64}, uints: []int{32, 64}, floats: []int{32, 64}, } var avx2SmallLoadPunShapes = &shapes{ // ints are done by hand, these are type-punned to int. vecs: []int{128, 256}, uints: []int{8, 16}, } var unaryFlaky = &shapes{ // for tests that support flaky equality vecs: []int{128, 256, 512}, floats: []int{32, 64}, } var ternaryFlaky = &shapes{ // for tests that support flaky equality vecs: []int{128, 256, 512}, floats: []int{32}, } var avx2SignedComparisons = &shapes{ vecs: []int{128, 256}, ints: []int{8, 16, 32, 64}, } var avx2UnsignedComparisons = &shapes{ vecs: []int{128, 256}, uints: []int{8, 16, 32, 64}, } type templateData struct { Vec string // the type of the vector, e.g. Float32x4 AOrAn string // for documentation, the article "a" or "an" Width int // the bit width of the element type, e.g. 32 Vwidth int // the width of the vector type, e.g. 128 Count int // the number of elements, e.g. 4 WxC string // the width-by-type string, e.g., "32x4" BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8) Base string // the capitalized Base Type of the vector, e.g., "Float" Type string // the element type, e.g. "float32" OxFF string // a mask for the lowest 'count' bits } func (t templateData) As128BitVec() string { return fmt.Sprintf("%s%dx%d", t.Base, t.Width, 128/t.Width) } func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer) { b := width * count if b < 128 || b > 512 { return } BaseType := strings.ToUpper(baseType[:1]) + baseType[1:] eType := fmt.Sprintf("%s%d", baseType, width) wxc := fmt.Sprintf("%dx%d", width, count) bxc := fmt.Sprintf("%dx%d", 8, count*(width/8)) vType := fmt.Sprintf("%s%s", BaseType, wxc) aOrAn := "a" if strings.Contains("aeiou", baseType[:1]) { aOrAn = "an" } oxFF := fmt.Sprintf("0x%x", uint64((1<x. package simd_test import ( "simd" "testing" ) `, s, t) } func curryTestPrologue(t string) func(s string, out io.Writer) { return func(s string, out io.Writer) { testPrologue(t, s, out) } } func templateOf(name, temp string) shapeAndTemplate { return shapeAndTemplate{s: allShapes, t: template.Must(template.New(name).Parse(temp))} } func shapedTemplateOf(s *shapes, name, temp string) shapeAndTemplate { return shapeAndTemplate{s: s, t: template.Must(template.New(name).Parse(temp))} } var sliceTemplate = templateOf("slice", ` // Load{{.Vec}}Slice loads {{.AOrAn}} {{.Vec}} from a slice of at least {{.Count}} {{.Type}}s func Load{{.Vec}}Slice(s []{{.Type}}) {{.Vec}} { return Load{{.Vec}}((*[{{.Count}}]{{.Type}})(s)) } // StoreSlice stores x into a slice of at least {{.Count}} {{.Type}}s func (x {{.Vec}}) StoreSlice(s []{{.Type}}) { x.Store((*[{{.Count}}]{{.Type}})(s)) } `) var unaryTemplate = templateOf("unary_helpers", ` // test{{.Vec}}Unary tests the simd unary method f against the expected behavior generated by want func test{{.Vec}}Unary(t *testing.T, f func(_ simd.{{.Vec}}) simd.{{.Vec}}, want func(_ []{{.Type}}) []{{.Type}}) { n := {{.Count}} t.Helper() forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) g := make([]{{.Type}}, n) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) }) } `) var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", ` // test{{.Vec}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works func test{{.Vec}}UnaryFlaky(t *testing.T, f func(x simd.{{.Vec}}) simd.{{.Vec}}, want func(x []{{.Type}}) []{{.Type}}, flakiness float64) { n := {{.Count}} t.Helper() forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) g := make([]{{.Type}}, n) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x)}) }) } `) var unaryTemplateToInt32 = shapedTemplateOf(convert32Shapes, "unary_int32_helpers", ` // test{{.Vec}}Unary tests the simd unary method f against the expected behavior generated by want func test{{.Vec}}UnaryToInt32(t *testing.T, f func(x simd.{{.Vec}}) simd.Int32x{{.Count}}, want func(x []{{.Type}}) []int32) { n := {{.Count}} t.Helper() forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) }) } `) var unaryTemplateToUint32 = shapedTemplateOf(convert32Shapes, "unary_uint32_helpers", ` // test{{.Vec}}Unary tests the simd unary method f against the expected behavior generated by want func test{{.Vec}}UnaryToUint32(t *testing.T, f func(x simd.{{.Vec}}) simd.Uint32x{{.Count}}, want func(x []{{.Type}}) []uint32) { n := {{.Count}} t.Helper() forSlice(t, {{.Type}}s, n, func(x []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) }) } `) var binaryTemplate = templateOf("binary_helpers", ` // test{{.Vec}}Binary tests the simd binary method f against the expected behavior generated by want func test{{.Vec}}Binary(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.{{.Vec}}, want func(_, _ []{{.Type}}) []{{.Type}}) { n := {{.Count}} t.Helper() forSlicePair(t, {{.Type}}s, n, func(x, y []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) b := simd.Load{{.Vec}}Slice(y) g := make([]{{.Type}}, n) f(a, b).StoreSlice(g) w := want(x, y) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); }) }) } `) var ternaryTemplate = templateOf("ternary_helpers", ` // test{{.Vec}}Ternary tests the simd ternary method f against the expected behavior generated by want func test{{.Vec}}Ternary(t *testing.T, f func(_, _, _ simd.{{.Vec}}) simd.{{.Vec}}, want func(_, _, _ []{{.Type}}) []{{.Type}}) { n := {{.Count}} t.Helper() forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) b := simd.Load{{.Vec}}Slice(y) c := simd.Load{{.Vec}}Slice(z) g := make([]{{.Type}}, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); }) }) } `) var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", ` // test{{.Vec}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works func test{{.Vec}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.Vec}}) simd.{{.Vec}}, want func(x, y, z []{{.Type}}) []{{.Type}}, flakiness float64) { n := {{.Count}} t.Helper() forSliceTriple(t, {{.Type}}s, n, func(x, y, z []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) b := simd.Load{{.Vec}}Slice(y) c := simd.Load{{.Vec}}Slice(z) g := make([]{{.Type}}, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) return checkSlicesLogInput(t, g, w, flakiness, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("z=%v", z); }) }) } `) var compareTemplate = templateOf("compare_helpers", ` // test{{.Vec}}Compare tests the simd comparison method f against the expected behavior generated by want func test{{.Vec}}Compare(t *testing.T, f func(_, _ simd.{{.Vec}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Type}}) []int64) { n := {{.Count}} t.Helper() forSlicePair(t, {{.Type}}s, n, func(x, y []{{.Type}}) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) b := simd.Load{{.Vec}}Slice(y) g := make([]int{{.Width}}, n) f(a, b).AsInt{{.WxC}}().StoreSlice(g) w := want(x, y) return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); }) }) } `) // TODO this has not been tested yet. var compareMaskedTemplate = templateOf("comparemasked_helpers", ` // test{{.Vec}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func test{{.Vec}}CompareMasked(t *testing.T, f func(_, _ simd.{{.Vec}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Type}}) []int64) { n := {{.Count}} t.Helper() forSlicePairMasked(t, {{.Type}}s, n, func(x, y []{{.Type}}, m []bool) bool { t.Helper() a := simd.Load{{.Vec}}Slice(x) b := simd.Load{{.Vec}}Slice(y) k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.Width}}](m)).AsMask{{.WxC}}() g := make([]int{{.Width}}, n) f(a, b, k).AsInt{{.WxC}}().StoreSlice(g) w := want(x, y) for i := range m { if !m[i] { w[i] = 0 } } return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y); t.Logf("m=%v", m); }) }) } `) var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512Shapes, "avx 512 load slice part", ` // Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { l := len(s) if l >= {{.Count}} { return Load{{.Vec}}Slice(s) } if l == 0 { var x {{.Vec}} return x } mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l)) return LoadMasked{{.Vec}}(pa{{.Vec}}(s), mask) } // StoreSlicePart stores the {{.Count}} elements of x into the slice s. // It stores as many elements as will fit in s. // If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice. func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { l := len(s) if l >= {{.Count}} { x.StoreSlice(s) return } if l == 0 { return } mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l)) x.StoreMasked(pa{{.Vec}}(s), mask) } `) var avx2MaskedLoadSlicePartTemplate = shapedTemplateOf(avx2MaskedLoadShapes, "avx 2 load slice part", ` // Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { l := len(s) if l >= {{.Count}} { return Load{{.Vec}}Slice(s) } if l == 0 { var x {{.Vec}} return x } mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:] return LoadMasked{{.Vec}}(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).AsMask{{.WxC}}()) } // StoreSlicePart stores the {{.Count}} elements of x into the slice s. // It stores as many elements as will fit in s. // If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice. func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { l := len(s) if l >= {{.Count}} { x.StoreSlice(s) return } if l == 0 { return } mask := vecMask{{.Width}}[len(vecMask{{.Width}})/2-l:] x.StoreMasked(pa{{.Vec}}(s), LoadInt{{.WxC}}Slice(mask).AsMask{{.WxC}}()) } `) var avx2SmallLoadSlicePartTemplate = shapedTemplateOf(avx2SmallLoadPunShapes, "avx 2 small load slice part", ` // Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { if len(s) == 0 { var zero {{.Vec}} return zero } t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s)) return LoadInt{{.WxC}}SlicePart(t).As{{.Vec}}() } // StoreSlicePart stores the {{.Count}} elements of x into the slice s. // It stores as many elements as will fit in s. // If s has {{.Count}} or more elements, the method is equivalent to x.StoreSlice. func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { if len(s) == 0 { return } t := unsafe.Slice((*int{{.Width}})(unsafe.Pointer(&s[0])), len(s)) x.AsInt{{.WxC}}().StoreSlicePart(t) } `) func (t templateData) CPUfeature() string { switch t.Vwidth { case 128: return "AVX" case 256: return "AVX2" case 512: return "AVX512" } panic(fmt.Errorf("unexpected vector width %d", t.Vwidth)) } var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", ` // Less returns a mask whose elements indicate whether x < y // // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { return y.Greater(x) } // GreaterEqual returns a mask whose elements indicate whether x >= y // // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { ones := x.Equal(x).AsInt{{.WxC}}() return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() } // LessEqual returns a mask whose elements indicate whether x <= y // // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { ones := x.Equal(x).AsInt{{.WxC}}() return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() } // NotEqual returns a mask whose elements indicate whether x != y // // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { ones := x.Equal(x).AsInt{{.WxC}}() return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() } `) // CPUfeatureAVX2if8 return AVX2 if the element width is 8, // otherwise, it returns CPUfeature. This is for the cpufeature // of unsigned comparison emulation, which uses shifts for all // the sizes > 8 (shifts are AVX) but must use broadcast (AVX2) // for bytes. func (t templateData) CPUfeatureAVX2if8() string { if t.Width == 8 { return "AVX2" } return t.CPUfeature() } var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", ` // Greater returns a mask whose elements indicate whether x > y // // Emulated, CPU Feature {{.CPUfeatureAVX2if8}} func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() {{- if eq .Width 8}} signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) {{- else}} ones := x.Equal(x).AsInt{{.WxC}}() signs := ones.ShiftAllLeft({{.Width}}-1) {{- end }} return a.Xor(signs).Greater(b.Xor(signs)) } // Less returns a mask whose elements indicate whether x < y // // Emulated, CPU Feature {{.CPUfeatureAVX2if8}} func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() {{- if eq .Width 8}} signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) {{- else}} ones := x.Equal(x).AsInt{{.WxC}}() signs := ones.ShiftAllLeft({{.Width}}-1) {{- end }} return b.Xor(signs).Greater(a.Xor(signs)) } // GreaterEqual returns a mask whose elements indicate whether x >= y // // Emulated, CPU Feature {{.CPUfeatureAVX2if8}} func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}() {{- if eq .Width 8}} signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) {{- else}} signs := ones.ShiftAllLeft({{.Width}}-1) {{- end }} return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() } // LessEqual returns a mask whose elements indicate whether x <= y // // Emulated, CPU Feature {{.CPUfeatureAVX2if8}} func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}() {{- if eq .Width 8}} signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) {{- else}} signs := ones.ShiftAllLeft({{.Width}}-1) {{- end }} return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() } // NotEqual returns a mask whose elements indicate whether x != y // // Emulated, CPU Feature {{.CPUfeature}} func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).AsInt{{.WxC}}() return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() } `) var unsafePATemplate = templateOf("unsafe PA helper", ` // pa{{.Vec}} returns a type-unsafe pointer to array that can // only be used with partial load/store operations that only // access the known-safe portions of the array. func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} { return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0])) } `) var avx2MaskedTemplate = shapedTemplateOf(avx2Shapes, "avx2 .Masked methods", ` // Masked returns x but with elements zeroed where mask is false. func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} { im := mask.AsInt{{.WxC}}() {{- if eq .Base "Int" }} return im.And(x) {{- else}} return x.AsInt{{.WxC}}().And(im).As{{.Vec}}() {{- end -}} } // Merge returns x but with elements set to y where mask is false. func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} { {{- if eq .BxC .WxC -}} im := mask.AsInt{{.BxC}}() {{- else}} im := mask.AsInt{{.WxC}}().AsInt{{.BxC}}() {{- end -}} {{- if and (eq .Base "Int") (eq .BxC .WxC) }} return y.blend(x, im) {{- else}} ix := x.AsInt{{.BxC}}() iy := y.AsInt{{.BxC}}() return iy.blend(ix, im).As{{.Vec}}() {{- end -}} } `) // TODO perhaps write these in ways that work better on AVX512 var avx512MaskedTemplate = shapedTemplateOf(avx512Shapes, "avx512 .Masked methods", ` // Masked returns x but with elements zeroed where mask is false. func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} { im := mask.AsInt{{.WxC}}() {{- if eq .Base "Int" }} return im.And(x) {{- else}} return x.AsInt{{.WxC}}().And(im).As{{.Vec}}() {{- end -}} } // Merge returns x but with elements set to y where m is false. func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} { {{- if eq .Base "Int" }} return y.blendMasked(x, mask) {{- else}} ix := x.AsInt{{.WxC}}() iy := y.AsInt{{.WxC}}() return iy.blendMasked(ix, mask).As{{.Vec}}() {{- end -}} } `) func (t templateData) CPUfeatureBC() string { switch t.Vwidth { case 128: return "AVX2" case 256: return "AVX2" case 512: if t.Width <= 16 { return "AVX512BW" } return "AVX512F" } panic(fmt.Errorf("unexpected vector width %d", t.Vwidth)) } var broadcastTemplate = templateOf("Broadcast functions", ` // Broadcast{{.Vec}} returns a vector with the input // x assigned to all elements of the output. // // Emulated, CPU Feature {{.CPUfeatureBC}} func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} { var z {{.As128BitVec }} return z.SetElem(0, x).Broadcast{{.Vwidth}}() } `) func main() { sl := flag.String("sl", "slice_amd64.go", "file name for slice operations") ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers") bh := flag.String("bh", "binary_helpers_test.go", "file name for binary test helpers") uh := flag.String("uh", "unary_helpers_test.go", "file name for unary test helpers") th := flag.String("th", "ternary_helpers_test.go", "file name for ternary test helpers") ch := flag.String("ch", "compare_helpers_test.go", "file name for compare test helpers") cmh := flag.String("cmh", "comparemasked_helpers_test.go", "file name for compare-masked test helpers") flag.Parse() if *sl != "" { one(*sl, prologue, sliceTemplate, avx512MaskedLoadSlicePartTemplate, avx2MaskedLoadSlicePartTemplate, avx2SmallLoadSlicePartTemplate, avx2MaskedTemplate, avx512MaskedTemplate, avx2SignedComparisonsTemplate, avx2UnsignedComparisonsTemplate, broadcastTemplate, ) } if *ush != "" { one(*ush, unsafePrologue, unsafePATemplate) } if *uh != "" { one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, unaryTemplateToInt32, unaryTemplateToUint32, unaryFlakyTemplate) } if *bh != "" { one(*bh, curryTestPrologue("binary simd methods"), binaryTemplate) } if *th != "" { one(*th, curryTestPrologue("ternary simd methods"), ternaryTemplate, ternaryFlakyTemplate) } if *ch != "" { one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate) } if *cmh != "" { one(*cmh, curryTestPrologue("simd methods that compare two operands under a mask"), compareMaskedTemplate) } } // numberLines takes a slice of bytes, and returns a string where each line // is numbered, starting from 1. func numberLines(data []byte) string { var buf bytes.Buffer r := bytes.NewReader(data) s := bufio.NewScanner(r) for i := 1; s.Scan(); i++ { fmt.Fprintf(&buf, "%d: %s\n", i, s.Text()) } return buf.String() } func one(filename string, prologue func(s string, out io.Writer), sats ...shapeAndTemplate) { if filename == "" { return } ofile := os.Stdout if filename != "-" { var err error ofile, err = os.Create(filename) if err != nil { fmt.Fprintf(os.Stderr, "Could not create the output file %s for the generated code, %v", filename, err) os.Exit(1) } } out := new(bytes.Buffer) prologue("go run genfiles.go", out) for _, sat := range sats { sat.forTemplates(out) } b, err := format.Source(out.Bytes()) if err != nil { fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v\n", filename, err) fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes())) fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code for %s, %v\n", filename, err) os.Exit(1) } else { ofile.Write(b) ofile.Close() } }