[dev.simd] cmd/compile: track which CPU features are in scope

analysis for

- is this block only reached through feature checks?
- does the function signature imply AVX-something?
- is there an instruction in this block which implies AVX-something?

and keep track of which features those are.  Features =
AVX, AVX2, AVX512, etc.

Has a test.

Change-Id: I0b6f2e87d01ec587818db11cf71fac1e4d500650
Reviewed-on: https://go-review.googlesource.com/c/go/+/706337
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
David Chase 2025-09-05 19:05:18 -04:00
parent 48756abd3a
commit d2270bccbd
6 changed files with 417 additions and 1 deletions

View file

@ -18,6 +18,9 @@ type Block struct {
// Source position for block's control operation
Pos src.XPos
// What cpu features (AVXnnn, SVEyyy) are implied to reach/execute this block?
CPUfeatures CPUfeatures
// The kind of block this is.
Kind BlockKind
@ -449,3 +452,53 @@ const (
HotPgoInitial = HotPgo | HotInitial // special case; single block loop, initial block is header block has a flow-in entry, but PGO says it is hot
HotPgoInitialNotFLowIn = HotPgo | HotInitial | HotNotFlowIn // PGO says it is hot, and the loop is rotated so flow enters loop with a branch
)
type CPUfeatures uint32
const (
CPUNone CPUfeatures = 0
CPUAll CPUfeatures = ^CPUfeatures(0)
CPUavx CPUfeatures = 1 << iota
CPUavx2
CPUavxvnni
CPUavx512
CPUbitalg
CPUgfni
CPUvbmi
CPUvbmi2
CPUvpopcntdq
CPUavx512vnni
CPUneon
CPUsve2
)
func (f CPUfeatures) String() string {
if f == CPUNone {
return "none"
}
if f == CPUAll {
return "all"
}
s := ""
foo := func(what string, feat CPUfeatures) {
if feat&f != 0 {
if s != "" {
s += "+"
}
s += what
}
}
foo("avx", CPUavx)
foo("avx2", CPUavx2)
foo("avx512", CPUavx512)
foo("avxvnni", CPUavxvnni)
foo("bitalg", CPUbitalg)
foo("gfni", CPUgfni)
foo("vbmi", CPUvbmi)
foo("vbmi2", CPUvbmi2)
foo("popcntdq", CPUvpopcntdq)
foo("avx512vnni", CPUavx512vnni)
return s
}

View file

@ -485,6 +485,7 @@ var passes = [...]pass{
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
{name: "insert resched checks", fn: insertLoopReschedChecks,
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
{name: "cpufeatures", fn: cpufeatures, required: buildcfg.Experiment.SIMD, disabled: !buildcfg.Experiment.SIMD},
{name: "lower", fn: lower, required: true},
{name: "addressing modes", fn: addressingModes, required: false},
{name: "late lower", fn: lateLower, required: true},
@ -587,6 +588,8 @@ var passOrder = [...]constraint{
{"branchelim", "late opt"},
// ranchelim is an arch-independent pass.
{"branchelim", "lower"},
// lower needs cpu feature information (for SIMD)
{"cpufeatures", "lower"},
}
func init() {

View file

@ -0,0 +1,261 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"fmt"
"internal/goarch"
)
type localEffect struct {
start CPUfeatures // features present at beginning of block
internal CPUfeatures // features implied by execution of block
end [2]CPUfeatures // for BlockIf, features present on outgoing edges
visited bool // On the first iteration this will be false for backedges.
}
func (e localEffect) String() string {
return fmt.Sprintf("visited=%v, start=%v, internal=%v, end[0]=%v, end[1]=%v", e.visited, e.start, e.internal, e.end[0], e.end[1])
}
// ifEffect pattern matches for a BlockIf conditional on a load
// of a field from internal/cpu.X86 and returns the corresponding
// effect.
func ifEffect(b *Block) (features CPUfeatures, taken int) {
// TODO generalize for other architectures.
if b.Kind != BlockIf {
return
}
c := b.Controls[0]
if c.Op == OpNot {
taken = 1
c = c.Args[0]
}
if c.Op != OpLoad {
return
}
offPtr := c.Args[0]
if offPtr.Op != OpOffPtr {
return
}
addr := offPtr.Args[0]
if addr.Op != OpAddr || addr.Args[0].Op != OpSB {
return
}
sym := addr.Aux.(*obj.LSym)
if sym.Name != "internal/cpu.X86" {
return
}
o := offPtr.AuxInt
t := addr.Type
if !t.IsPtr() {
b.Func.Fatalf("The symbol %s is not a pointer, found %v instead", sym.Name, t)
}
t = t.Elem()
if !t.IsStruct() {
b.Func.Fatalf("The referent of symbol %s is not a struct, found %v instead", sym.Name, t)
}
match := ""
for _, f := range t.Fields() {
if o == f.Offset && f.Sym != nil {
match = f.Sym.Name
break
}
}
switch match {
case "HasAVX":
features = CPUavx
case "HasAVXVNNI":
features = CPUavx | CPUavxvnni
case "HasAVX2":
features = CPUavx2 | CPUavx
// Compiler currently treats these all alike.
case "HasAVX512", "HasAVX512F", "HasAVX512CD", "HasAVX512BW",
"HasAVX512DQ", "HasAVX512VL", "HasAVX512VPCLMULQDQ":
features = CPUavx512 | CPUavx2 | CPUavx
case "HasAVX512GFNI":
features = CPUavx512 | CPUgfni | CPUavx2 | CPUavx
case "HasAVX512VNNI":
features = CPUavx512 | CPUavx512vnni | CPUavx2 | CPUavx
case "HasAVX512VBMI":
features = CPUavx512 | CPUvbmi | CPUavx2 | CPUavx
case "HasAVX512VBMI2":
features = CPUavx512 | CPUvbmi2 | CPUavx2 | CPUavx
case "HasAVX512BITALG":
features = CPUavx512 | CPUbitalg | CPUavx2 | CPUavx
case "HasAVX512VPOPCNTDQ":
features = CPUavx512 | CPUvpopcntdq | CPUavx2 | CPUavx
case "HasBMI1":
features = CPUvbmi
case "HasBMI2":
features = CPUvbmi2
// Features that are not currently interesting to the compiler.
case "HasAES", "HasADX", "HasERMS", "HasFSRM", "HasFMA", "HasGFNI", "HasOSXSAVE",
"HasPCLMULQDQ", "HasPOPCNT", "HasRDTSCP", "HasSHA",
"HasSSE3", "HasSSSE3", "HasSSE41", "HasSSE42":
}
if b.Func.pass.debug > 2 {
b.Func.Warnl(b.Pos, "%s, block b%v has features offset %d, match is %s, features is %v", b.Func.Name, b.ID, o, match, features)
}
return
}
func cpufeatures(f *Func) {
arch := f.Config.Ctxt().Arch.Family
// TODO there are other SIMD architectures
if arch != goarch.AMD64 {
return
}
po := f.Postorder()
effects := make([]localEffect, 1+f.NumBlocks(), 1+f.NumBlocks())
features := func(t *types.Type) CPUfeatures {
if t.IsSIMD() {
switch t.Size() {
case 16, 32:
return CPUavx
case 64:
return CPUavx512 | CPUavx2 | CPUavx
}
}
return CPUNone
}
// visit blocks in reverse post order
// when b is visited, all of its predecessors (except for loop back edges)
// will have been visited
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
var feat CPUfeatures
if b == f.Entry {
// Check the types of inputs and outputs, as well as annotations.
// Start with none and union all that is implied by all the types seen.
if f.Type != nil { // a problem for SSA tests
for _, field := range f.Type.RecvParamsResults() {
feat |= features(field.Type)
}
}
} else {
// Start with all and intersect over predecessors
feat = CPUAll
for _, p := range b.Preds {
pb := p.Block()
if !effects[pb.ID].visited {
continue
}
pi := p.Index()
if pb.Kind != BlockIf {
pi = 0
}
feat &= effects[pb.ID].end[pi]
}
}
e := localEffect{start: feat, visited: true}
// Separately capture the internal effects of this block
var internal CPUfeatures
for _, v := range b.Values {
// the rule applied here is, if the block contains any
// instruction that would fault if the feature (avx, avx512)
// were not present, then assume that the feature is present
// for all the instructions in the block, a fault is a fault.
t := v.Type
if t.IsResults() {
for i := 0; i < t.NumFields(); i++ {
feat |= features(t.FieldType(i))
}
} else {
internal |= features(v.Type)
}
}
e.internal = internal
feat |= internal
branchEffect, taken := ifEffect(b)
e.end = [2]CPUfeatures{feat, feat}
e.end[taken] |= branchEffect
effects[b.ID] = e
if f.pass.debug > 1 && feat != CPUNone {
f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, feat)
}
b.CPUfeatures = feat
}
// If the flow graph is irreducible, things can still change on backedges.
change := true
for change {
change = false
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
if b == f.Entry {
continue // cannot change
}
feat := CPUAll
for _, p := range b.Preds {
pb := p.Block()
pi := p.Index()
if pb.Kind != BlockIf {
pi = 0
}
feat &= effects[pb.ID].end[pi]
}
e := effects[b.ID]
if feat == e.start {
continue
}
e.start = feat
effects[b.ID] = e
// uh-oh, something changed
if f.pass.debug > 1 {
f.Warnl(b.Pos, "%s, block b%v saw predecessor feature change", b.Func.Name, b.ID)
}
feat |= e.internal
if feat == e.end[0]&e.end[1] {
continue
}
branchEffect, taken := ifEffect(b)
e.end = [2]CPUfeatures{feat, feat}
e.end[taken] |= branchEffect
effects[b.ID] = e
b.CPUfeatures = feat
if f.pass.debug > 1 {
f.Warnl(b.Pos, "%s, block b%v has new features %v", b.Func.Name, b.ID, feat)
}
change = true
}
}
if f.pass.debug > 0 {
for _, b := range f.Blocks {
if b.CPUfeatures != CPUNone {
f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, b.CPUfeatures)
}
}
}
}

View file

@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
_64bit uintptr // size on 64bit platforms
}{
{Value{}, 72, 112},
{Block{}, 164, 304},
{Block{}, 168, 312},
{LocalSlot{}, 28, 40},
{valState{}, 28, 40},
}

View file

@ -989,6 +989,7 @@ func (t *Type) ArgWidth() int64 {
return t.extra.(*Func).Argwid
}
// Size returns the width of t in bytes.
func (t *Type) Size() int64 {
if t.kind == TSSA {
return t.width
@ -997,6 +998,7 @@ func (t *Type) Size() int64 {
return t.width
}
// Alignment returns the alignment of t in bytes.
func (t *Type) Alignment() int64 {
CalcSize(t)
return int64(t.align)

97
test/simd.go Normal file
View file

@ -0,0 +1,97 @@
// errorcheck -0 -d=ssa/cpufeatures/debug=1
//go:build goexperiment.simd && amd64
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package foo
import "simd"
func f1(x simd.Int8x16) {
return // ERROR "has features avx"
}
func g1() simd.Int8x16 {
var x simd.Int8x16
return x // ERROR "has features avx$"
}
type T1 simd.Int8x16
func (x T1) h() {
return // ERROR "has features avx$"
}
func f2(x simd.Int8x64) {
return // ERROR "has features avx[+]avx2[+]avx512$"
}
func g2() simd.Int8x64 {
var x simd.Int8x64
return x // ERROR "has features avx[+]avx2[+]avx512$"
}
type T2 simd.Int8x64
func (x T2) h() {
return // ERROR "has features avx[+]avx2[+]avx512$"
}
var a int
func f() {
if a == 0 {
if !simd.HasAVX512() {
return
}
println("has avx512") // ERROR "has features avx[+]avx2[+]avx512$"
} else {
if !simd.HasAVX2() {
return
}
println("has avx2") // ERROR "has features avx[+]avx2$"
}
println("has something")
} // ERROR "has features avx[+]avx2$"
func g() {
if simd.HasAVX2() { // ERROR "has features avx[+]avx2$"
for range 5 { // ERROR "has features avx[+]avx2$"
if a < 0 { // ERROR "has features avx[+]avx2$"
a++ // ERROR "has features avx[+]avx2$"
}
}
}
println("ahoy!") // ERROR "has features avx[+]avx2$" // this is an artifact of flaky block numbering and why isn't it fused?
if a > 0 {
a--
}
}
//go:noinline
func p() bool {
return true
}
func hasIrreducibleLoop() {
if simd.HasAVX2() {
goto a // ERROR "has features avx[+]avx2$"
} else {
goto b
}
a:
println("a")
if p() {
goto c
}
b:
println("b")
if p() {
goto a
}
c:
println("c")
}