mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd: initialize directory to make it suitable for testing SIMD
this is a multistep operation between two repos to coordinate this move. First copy internal/simd top simd (and adjust so that it works with future generated SIMD), after this lands, update golang/arch/internal/simdgen to target this directory and add it to the end-to-end test (which will also be added once it works and is truly end-to-end), finally remove internal/simd once the updated generator has been submitted. Change-Id: If372baadc0c02e47cc32bc55b39ac19d551b2b21 Reviewed-on: https://go-review.googlesource.com/c/go/+/676955 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
11d2b28bff
commit
fdb067d946
2 changed files with 161 additions and 0 deletions
7
src/simd/dummy.s
Normal file
7
src/simd/dummy.s
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
// Copyright 2025 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build amd64
|
||||||
|
|
||||||
|
// Empty file to allow bodyless functions.
|
||||||
154
src/simd/testdata/sample.go
vendored
Normal file
154
src/simd/testdata/sample.go
vendored
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
// Copyright 2025 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"simd"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
func load(s []float64) simd.Float64x4 {
|
||||||
|
return simd.LoadFloat64x4((*[4]float64)(s[:4]))
|
||||||
|
}
|
||||||
|
|
||||||
|
type S1 = simd.Float64x4
|
||||||
|
|
||||||
|
type S2 simd.Float64x4
|
||||||
|
|
||||||
|
func (s S2) Len() int {
|
||||||
|
return simd.Float64x4(s).Len()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s S2) Load(a []float64) S2 {
|
||||||
|
return S2(load(a))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s S2) Store(a *[4]float64) {
|
||||||
|
simd.Float64x4(s).Store(a)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s S2) Add(a S2) S2 {
|
||||||
|
return S2(simd.Float64x4(s).Add(simd.Float64x4(a)))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s S2) Mul(a S2) S2 {
|
||||||
|
return S2(simd.Float64x4(s).Mul(simd.Float64x4(a)))
|
||||||
|
}
|
||||||
|
|
||||||
|
type S3 struct {
|
||||||
|
simd.Float64x4
|
||||||
|
}
|
||||||
|
|
||||||
|
func ip64_0(a, b []float64) float64 {
|
||||||
|
s := 0.0
|
||||||
|
for i := range a {
|
||||||
|
s += a[i] * b[i]
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func ip64_1(a, b []float64) float64 {
|
||||||
|
var z S1
|
||||||
|
sum := z
|
||||||
|
var i int
|
||||||
|
stride := z.Len()
|
||||||
|
for ; i <= len(a)-stride; i += stride {
|
||||||
|
va := load(a[i:])
|
||||||
|
vb := load(b[i:])
|
||||||
|
sum = sum.Add(va.Mul(vb))
|
||||||
|
}
|
||||||
|
var tmp [4]float64
|
||||||
|
sum.Store(&tmp)
|
||||||
|
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
func ip64_1a(a, b []float64) float64 {
|
||||||
|
var z S1
|
||||||
|
sum := z
|
||||||
|
var i int
|
||||||
|
stride := z.Len()
|
||||||
|
for ; i <= len(a)-stride; i += stride {
|
||||||
|
va := load(a[i:])
|
||||||
|
vb := load(b[i:])
|
||||||
|
sum = FMA(sum, va, vb)
|
||||||
|
}
|
||||||
|
var tmp [4]float64
|
||||||
|
sum.Store(&tmp)
|
||||||
|
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func FMA(a, b, c simd.Float64x4) simd.Float64x4 {
|
||||||
|
return a.Add(b.Mul(c))
|
||||||
|
}
|
||||||
|
|
||||||
|
func ip64_2(a, b []float64) float64 {
|
||||||
|
var z S2
|
||||||
|
sum := z
|
||||||
|
var i int
|
||||||
|
stride := z.Len()
|
||||||
|
for ; i <= len(a)-stride; i += stride {
|
||||||
|
va := z.Load(a[i:])
|
||||||
|
vb := z.Load(b[i:])
|
||||||
|
sum = sum.Add(va.Mul(vb))
|
||||||
|
}
|
||||||
|
var tmp [4]float64
|
||||||
|
sum.Store(&tmp)
|
||||||
|
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
func ip64_3(a, b []float64) float64 {
|
||||||
|
var z S3
|
||||||
|
sum := z
|
||||||
|
var i int
|
||||||
|
stride := z.Len()
|
||||||
|
for ; i <= len(a)-stride; i += stride {
|
||||||
|
va := load(a[i:])
|
||||||
|
vb := load(b[i:])
|
||||||
|
sum = S3{sum.Add(va.Mul(vb))}
|
||||||
|
}
|
||||||
|
var tmp [4]float64
|
||||||
|
sum.Store(&tmp)
|
||||||
|
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
a := []float64{1, 2, 3, 4, 5, 6, 7, 8}
|
||||||
|
ip0 := ip64_0(a, a)
|
||||||
|
ip1 := ip64_1(a, a)
|
||||||
|
ip1a := ip64_1a(a, a)
|
||||||
|
ip2 := ip64_2(a, a)
|
||||||
|
ip3 := ip64_3(a, a)
|
||||||
|
fmt.Printf("Test IP = %f\n", ip0)
|
||||||
|
fmt.Printf("SIMD IP 1 = %f\n", ip1)
|
||||||
|
fmt.Printf("SIMD IP 1a = %f\n", ip1a)
|
||||||
|
fmt.Printf("SIMD IP 2 = %f\n", ip2)
|
||||||
|
fmt.Printf("SIMD IP 3 = %f\n", ip3)
|
||||||
|
var z1 S1
|
||||||
|
var z2 S2
|
||||||
|
var z3 S2
|
||||||
|
|
||||||
|
s1, s2, s3 := unsafe.Sizeof(z1), unsafe.Sizeof(z2), unsafe.Sizeof(z3)
|
||||||
|
|
||||||
|
fmt.Printf("unsafe.Sizeof(z1, z2, z3)=%d, %d, %d\n", s1, s2, s3)
|
||||||
|
|
||||||
|
fail := false
|
||||||
|
|
||||||
|
if s1 != 32 || s2 != 32 || s3 != 32 {
|
||||||
|
fmt.Println("Failed a sizeof check, should all be 32")
|
||||||
|
fail = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if ip1 != ip0 || ip1a != ip0 || ip2 != ip0 || ip3 != ip0 {
|
||||||
|
fmt.Println("Failed an inner product check, should all be", ip0)
|
||||||
|
fail = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if fail {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue