mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: track which CPU features are in scope
analysis for - is this block only reached through feature checks? - does the function signature imply AVX-something? - is there an instruction in this block which implies AVX-something? and keep track of which features those are. Features = AVX, AVX2, AVX512, etc. Has a test. Change-Id: I0b6f2e87d01ec587818db11cf71fac1e4d500650 Reviewed-on: https://go-review.googlesource.com/c/go/+/706337 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
48756abd3a
commit
d2270bccbd
6 changed files with 417 additions and 1 deletions
|
|
@ -18,6 +18,9 @@ type Block struct {
|
|||
// Source position for block's control operation
|
||||
Pos src.XPos
|
||||
|
||||
// What cpu features (AVXnnn, SVEyyy) are implied to reach/execute this block?
|
||||
CPUfeatures CPUfeatures
|
||||
|
||||
// The kind of block this is.
|
||||
Kind BlockKind
|
||||
|
||||
|
|
@ -449,3 +452,53 @@ const (
|
|||
HotPgoInitial = HotPgo | HotInitial // special case; single block loop, initial block is header block has a flow-in entry, but PGO says it is hot
|
||||
HotPgoInitialNotFLowIn = HotPgo | HotInitial | HotNotFlowIn // PGO says it is hot, and the loop is rotated so flow enters loop with a branch
|
||||
)
|
||||
|
||||
type CPUfeatures uint32
|
||||
|
||||
const (
|
||||
CPUNone CPUfeatures = 0
|
||||
CPUAll CPUfeatures = ^CPUfeatures(0)
|
||||
CPUavx CPUfeatures = 1 << iota
|
||||
CPUavx2
|
||||
CPUavxvnni
|
||||
CPUavx512
|
||||
CPUbitalg
|
||||
CPUgfni
|
||||
CPUvbmi
|
||||
CPUvbmi2
|
||||
CPUvpopcntdq
|
||||
CPUavx512vnni
|
||||
|
||||
CPUneon
|
||||
CPUsve2
|
||||
)
|
||||
|
||||
func (f CPUfeatures) String() string {
|
||||
if f == CPUNone {
|
||||
return "none"
|
||||
}
|
||||
if f == CPUAll {
|
||||
return "all"
|
||||
}
|
||||
s := ""
|
||||
foo := func(what string, feat CPUfeatures) {
|
||||
if feat&f != 0 {
|
||||
if s != "" {
|
||||
s += "+"
|
||||
}
|
||||
s += what
|
||||
}
|
||||
}
|
||||
foo("avx", CPUavx)
|
||||
foo("avx2", CPUavx2)
|
||||
foo("avx512", CPUavx512)
|
||||
foo("avxvnni", CPUavxvnni)
|
||||
foo("bitalg", CPUbitalg)
|
||||
foo("gfni", CPUgfni)
|
||||
foo("vbmi", CPUvbmi)
|
||||
foo("vbmi2", CPUvbmi2)
|
||||
foo("popcntdq", CPUvpopcntdq)
|
||||
foo("avx512vnni", CPUavx512vnni)
|
||||
|
||||
return s
|
||||
}
|
||||
|
|
|
|||
|
|
@ -485,6 +485,7 @@ var passes = [...]pass{
|
|||
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
|
||||
{name: "insert resched checks", fn: insertLoopReschedChecks,
|
||||
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
|
||||
{name: "cpufeatures", fn: cpufeatures, required: buildcfg.Experiment.SIMD, disabled: !buildcfg.Experiment.SIMD},
|
||||
{name: "lower", fn: lower, required: true},
|
||||
{name: "addressing modes", fn: addressingModes, required: false},
|
||||
{name: "late lower", fn: lateLower, required: true},
|
||||
|
|
@ -587,6 +588,8 @@ var passOrder = [...]constraint{
|
|||
{"branchelim", "late opt"},
|
||||
// ranchelim is an arch-independent pass.
|
||||
{"branchelim", "lower"},
|
||||
// lower needs cpu feature information (for SIMD)
|
||||
{"cpufeatures", "lower"},
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
|
|
|||
261
src/cmd/compile/internal/ssa/cpufeatures.go
Normal file
261
src/cmd/compile/internal/ssa/cpufeatures.go
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ssa
|
||||
|
||||
import (
|
||||
"cmd/compile/internal/types"
|
||||
"cmd/internal/obj"
|
||||
"fmt"
|
||||
"internal/goarch"
|
||||
)
|
||||
|
||||
type localEffect struct {
|
||||
start CPUfeatures // features present at beginning of block
|
||||
internal CPUfeatures // features implied by execution of block
|
||||
end [2]CPUfeatures // for BlockIf, features present on outgoing edges
|
||||
visited bool // On the first iteration this will be false for backedges.
|
||||
}
|
||||
|
||||
func (e localEffect) String() string {
|
||||
return fmt.Sprintf("visited=%v, start=%v, internal=%v, end[0]=%v, end[1]=%v", e.visited, e.start, e.internal, e.end[0], e.end[1])
|
||||
}
|
||||
|
||||
// ifEffect pattern matches for a BlockIf conditional on a load
|
||||
// of a field from internal/cpu.X86 and returns the corresponding
|
||||
// effect.
|
||||
func ifEffect(b *Block) (features CPUfeatures, taken int) {
|
||||
// TODO generalize for other architectures.
|
||||
if b.Kind != BlockIf {
|
||||
return
|
||||
}
|
||||
c := b.Controls[0]
|
||||
|
||||
if c.Op == OpNot {
|
||||
taken = 1
|
||||
c = c.Args[0]
|
||||
}
|
||||
if c.Op != OpLoad {
|
||||
return
|
||||
}
|
||||
offPtr := c.Args[0]
|
||||
if offPtr.Op != OpOffPtr {
|
||||
return
|
||||
}
|
||||
addr := offPtr.Args[0]
|
||||
if addr.Op != OpAddr || addr.Args[0].Op != OpSB {
|
||||
return
|
||||
}
|
||||
sym := addr.Aux.(*obj.LSym)
|
||||
if sym.Name != "internal/cpu.X86" {
|
||||
return
|
||||
}
|
||||
o := offPtr.AuxInt
|
||||
t := addr.Type
|
||||
if !t.IsPtr() {
|
||||
b.Func.Fatalf("The symbol %s is not a pointer, found %v instead", sym.Name, t)
|
||||
}
|
||||
t = t.Elem()
|
||||
if !t.IsStruct() {
|
||||
b.Func.Fatalf("The referent of symbol %s is not a struct, found %v instead", sym.Name, t)
|
||||
}
|
||||
match := ""
|
||||
for _, f := range t.Fields() {
|
||||
if o == f.Offset && f.Sym != nil {
|
||||
match = f.Sym.Name
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
switch match {
|
||||
|
||||
case "HasAVX":
|
||||
features = CPUavx
|
||||
case "HasAVXVNNI":
|
||||
features = CPUavx | CPUavxvnni
|
||||
case "HasAVX2":
|
||||
features = CPUavx2 | CPUavx
|
||||
|
||||
// Compiler currently treats these all alike.
|
||||
case "HasAVX512", "HasAVX512F", "HasAVX512CD", "HasAVX512BW",
|
||||
"HasAVX512DQ", "HasAVX512VL", "HasAVX512VPCLMULQDQ":
|
||||
features = CPUavx512 | CPUavx2 | CPUavx
|
||||
|
||||
case "HasAVX512GFNI":
|
||||
features = CPUavx512 | CPUgfni | CPUavx2 | CPUavx
|
||||
case "HasAVX512VNNI":
|
||||
features = CPUavx512 | CPUavx512vnni | CPUavx2 | CPUavx
|
||||
case "HasAVX512VBMI":
|
||||
features = CPUavx512 | CPUvbmi | CPUavx2 | CPUavx
|
||||
case "HasAVX512VBMI2":
|
||||
features = CPUavx512 | CPUvbmi2 | CPUavx2 | CPUavx
|
||||
case "HasAVX512BITALG":
|
||||
features = CPUavx512 | CPUbitalg | CPUavx2 | CPUavx
|
||||
case "HasAVX512VPOPCNTDQ":
|
||||
features = CPUavx512 | CPUvpopcntdq | CPUavx2 | CPUavx
|
||||
|
||||
case "HasBMI1":
|
||||
features = CPUvbmi
|
||||
case "HasBMI2":
|
||||
features = CPUvbmi2
|
||||
|
||||
// Features that are not currently interesting to the compiler.
|
||||
case "HasAES", "HasADX", "HasERMS", "HasFSRM", "HasFMA", "HasGFNI", "HasOSXSAVE",
|
||||
"HasPCLMULQDQ", "HasPOPCNT", "HasRDTSCP", "HasSHA",
|
||||
"HasSSE3", "HasSSSE3", "HasSSE41", "HasSSE42":
|
||||
|
||||
}
|
||||
if b.Func.pass.debug > 2 {
|
||||
b.Func.Warnl(b.Pos, "%s, block b%v has features offset %d, match is %s, features is %v", b.Func.Name, b.ID, o, match, features)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func cpufeatures(f *Func) {
|
||||
arch := f.Config.Ctxt().Arch.Family
|
||||
// TODO there are other SIMD architectures
|
||||
if arch != goarch.AMD64 {
|
||||
return
|
||||
}
|
||||
|
||||
po := f.Postorder()
|
||||
|
||||
effects := make([]localEffect, 1+f.NumBlocks(), 1+f.NumBlocks())
|
||||
|
||||
features := func(t *types.Type) CPUfeatures {
|
||||
if t.IsSIMD() {
|
||||
switch t.Size() {
|
||||
case 16, 32:
|
||||
return CPUavx
|
||||
case 64:
|
||||
return CPUavx512 | CPUavx2 | CPUavx
|
||||
}
|
||||
}
|
||||
return CPUNone
|
||||
}
|
||||
|
||||
// visit blocks in reverse post order
|
||||
// when b is visited, all of its predecessors (except for loop back edges)
|
||||
// will have been visited
|
||||
for i := len(po) - 1; i >= 0; i-- {
|
||||
b := po[i]
|
||||
|
||||
var feat CPUfeatures
|
||||
|
||||
if b == f.Entry {
|
||||
// Check the types of inputs and outputs, as well as annotations.
|
||||
// Start with none and union all that is implied by all the types seen.
|
||||
if f.Type != nil { // a problem for SSA tests
|
||||
for _, field := range f.Type.RecvParamsResults() {
|
||||
feat |= features(field.Type)
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// Start with all and intersect over predecessors
|
||||
feat = CPUAll
|
||||
for _, p := range b.Preds {
|
||||
pb := p.Block()
|
||||
if !effects[pb.ID].visited {
|
||||
|
||||
continue
|
||||
}
|
||||
pi := p.Index()
|
||||
if pb.Kind != BlockIf {
|
||||
pi = 0
|
||||
}
|
||||
|
||||
feat &= effects[pb.ID].end[pi]
|
||||
}
|
||||
}
|
||||
|
||||
e := localEffect{start: feat, visited: true}
|
||||
|
||||
// Separately capture the internal effects of this block
|
||||
var internal CPUfeatures
|
||||
for _, v := range b.Values {
|
||||
// the rule applied here is, if the block contains any
|
||||
// instruction that would fault if the feature (avx, avx512)
|
||||
// were not present, then assume that the feature is present
|
||||
// for all the instructions in the block, a fault is a fault.
|
||||
t := v.Type
|
||||
if t.IsResults() {
|
||||
for i := 0; i < t.NumFields(); i++ {
|
||||
feat |= features(t.FieldType(i))
|
||||
}
|
||||
} else {
|
||||
internal |= features(v.Type)
|
||||
}
|
||||
}
|
||||
e.internal = internal
|
||||
feat |= internal
|
||||
|
||||
branchEffect, taken := ifEffect(b)
|
||||
e.end = [2]CPUfeatures{feat, feat}
|
||||
e.end[taken] |= branchEffect
|
||||
|
||||
effects[b.ID] = e
|
||||
if f.pass.debug > 1 && feat != CPUNone {
|
||||
f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, feat)
|
||||
}
|
||||
|
||||
b.CPUfeatures = feat
|
||||
}
|
||||
|
||||
// If the flow graph is irreducible, things can still change on backedges.
|
||||
change := true
|
||||
for change {
|
||||
change = false
|
||||
for i := len(po) - 1; i >= 0; i-- {
|
||||
b := po[i]
|
||||
|
||||
if b == f.Entry {
|
||||
continue // cannot change
|
||||
}
|
||||
feat := CPUAll
|
||||
for _, p := range b.Preds {
|
||||
pb := p.Block()
|
||||
pi := p.Index()
|
||||
if pb.Kind != BlockIf {
|
||||
pi = 0
|
||||
}
|
||||
feat &= effects[pb.ID].end[pi]
|
||||
}
|
||||
e := effects[b.ID]
|
||||
if feat == e.start {
|
||||
continue
|
||||
}
|
||||
e.start = feat
|
||||
effects[b.ID] = e
|
||||
// uh-oh, something changed
|
||||
if f.pass.debug > 1 {
|
||||
f.Warnl(b.Pos, "%s, block b%v saw predecessor feature change", b.Func.Name, b.ID)
|
||||
}
|
||||
|
||||
feat |= e.internal
|
||||
if feat == e.end[0]&e.end[1] {
|
||||
continue
|
||||
}
|
||||
|
||||
branchEffect, taken := ifEffect(b)
|
||||
e.end = [2]CPUfeatures{feat, feat}
|
||||
e.end[taken] |= branchEffect
|
||||
|
||||
effects[b.ID] = e
|
||||
b.CPUfeatures = feat
|
||||
if f.pass.debug > 1 {
|
||||
f.Warnl(b.Pos, "%s, block b%v has new features %v", b.Func.Name, b.ID, feat)
|
||||
}
|
||||
change = true
|
||||
}
|
||||
}
|
||||
if f.pass.debug > 0 {
|
||||
for _, b := range f.Blocks {
|
||||
if b.CPUfeatures != CPUNone {
|
||||
f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, b.CPUfeatures)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
|
|||
_64bit uintptr // size on 64bit platforms
|
||||
}{
|
||||
{Value{}, 72, 112},
|
||||
{Block{}, 164, 304},
|
||||
{Block{}, 168, 312},
|
||||
{LocalSlot{}, 28, 40},
|
||||
{valState{}, 28, 40},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -989,6 +989,7 @@ func (t *Type) ArgWidth() int64 {
|
|||
return t.extra.(*Func).Argwid
|
||||
}
|
||||
|
||||
// Size returns the width of t in bytes.
|
||||
func (t *Type) Size() int64 {
|
||||
if t.kind == TSSA {
|
||||
return t.width
|
||||
|
|
@ -997,6 +998,7 @@ func (t *Type) Size() int64 {
|
|||
return t.width
|
||||
}
|
||||
|
||||
// Alignment returns the alignment of t in bytes.
|
||||
func (t *Type) Alignment() int64 {
|
||||
CalcSize(t)
|
||||
return int64(t.align)
|
||||
|
|
|
|||
97
test/simd.go
Normal file
97
test/simd.go
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
// errorcheck -0 -d=ssa/cpufeatures/debug=1
|
||||
|
||||
//go:build goexperiment.simd && amd64
|
||||
|
||||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package foo
|
||||
|
||||
import "simd"
|
||||
|
||||
func f1(x simd.Int8x16) {
|
||||
return // ERROR "has features avx"
|
||||
}
|
||||
|
||||
func g1() simd.Int8x16 {
|
||||
var x simd.Int8x16
|
||||
return x // ERROR "has features avx$"
|
||||
}
|
||||
|
||||
type T1 simd.Int8x16
|
||||
|
||||
func (x T1) h() {
|
||||
return // ERROR "has features avx$"
|
||||
}
|
||||
|
||||
func f2(x simd.Int8x64) {
|
||||
return // ERROR "has features avx[+]avx2[+]avx512$"
|
||||
}
|
||||
|
||||
func g2() simd.Int8x64 {
|
||||
var x simd.Int8x64
|
||||
return x // ERROR "has features avx[+]avx2[+]avx512$"
|
||||
}
|
||||
|
||||
type T2 simd.Int8x64
|
||||
|
||||
func (x T2) h() {
|
||||
return // ERROR "has features avx[+]avx2[+]avx512$"
|
||||
}
|
||||
|
||||
var a int
|
||||
|
||||
func f() {
|
||||
if a == 0 {
|
||||
if !simd.HasAVX512() {
|
||||
return
|
||||
}
|
||||
println("has avx512") // ERROR "has features avx[+]avx2[+]avx512$"
|
||||
} else {
|
||||
if !simd.HasAVX2() {
|
||||
return
|
||||
}
|
||||
println("has avx2") // ERROR "has features avx[+]avx2$"
|
||||
}
|
||||
println("has something")
|
||||
} // ERROR "has features avx[+]avx2$"
|
||||
|
||||
func g() {
|
||||
if simd.HasAVX2() { // ERROR "has features avx[+]avx2$"
|
||||
for range 5 { // ERROR "has features avx[+]avx2$"
|
||||
if a < 0 { // ERROR "has features avx[+]avx2$"
|
||||
a++ // ERROR "has features avx[+]avx2$"
|
||||
}
|
||||
}
|
||||
}
|
||||
println("ahoy!") // ERROR "has features avx[+]avx2$" // this is an artifact of flaky block numbering and why isn't it fused?
|
||||
if a > 0 {
|
||||
a--
|
||||
}
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func p() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func hasIrreducibleLoop() {
|
||||
if simd.HasAVX2() {
|
||||
goto a // ERROR "has features avx[+]avx2$"
|
||||
} else {
|
||||
goto b
|
||||
}
|
||||
a:
|
||||
println("a")
|
||||
if p() {
|
||||
goto c
|
||||
}
|
||||
b:
|
||||
println("b")
|
||||
if p() {
|
||||
goto a
|
||||
}
|
||||
c:
|
||||
println("c")
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue