go/src/cmd/compile/internal/ssa/cpufeatures.go

262 lines
6.3 KiB
Go
Raw Normal View History

// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"fmt"
"internal/goarch"
)
type localEffect struct {
start CPUfeatures // features present at beginning of block
internal CPUfeatures // features implied by execution of block
end [2]CPUfeatures // for BlockIf, features present on outgoing edges
visited bool // On the first iteration this will be false for backedges.
}
func (e localEffect) String() string {
return fmt.Sprintf("visited=%v, start=%v, internal=%v, end[0]=%v, end[1]=%v", e.visited, e.start, e.internal, e.end[0], e.end[1])
}
// ifEffect pattern matches for a BlockIf conditional on a load
// of a field from internal/cpu.X86 and returns the corresponding
// effect.
func ifEffect(b *Block) (features CPUfeatures, taken int) {
// TODO generalize for other architectures.
if b.Kind != BlockIf {
return
}
c := b.Controls[0]
if c.Op == OpNot {
taken = 1
c = c.Args[0]
}
if c.Op != OpLoad {
return
}
offPtr := c.Args[0]
if offPtr.Op != OpOffPtr {
return
}
addr := offPtr.Args[0]
if addr.Op != OpAddr || addr.Args[0].Op != OpSB {
return
}
sym := addr.Aux.(*obj.LSym)
if sym.Name != "internal/cpu.X86" {
return
}
o := offPtr.AuxInt
t := addr.Type
if !t.IsPtr() {
b.Func.Fatalf("The symbol %s is not a pointer, found %v instead", sym.Name, t)
}
t = t.Elem()
if !t.IsStruct() {
b.Func.Fatalf("The referent of symbol %s is not a struct, found %v instead", sym.Name, t)
}
match := ""
for _, f := range t.Fields() {
if o == f.Offset && f.Sym != nil {
match = f.Sym.Name
break
}
}
switch match {
case "HasAVX":
features = CPUavx
case "HasAVXVNNI":
features = CPUavx | CPUavxvnni
case "HasAVX2":
features = CPUavx2 | CPUavx
// Compiler currently treats these all alike.
case "HasAVX512", "HasAVX512F", "HasAVX512CD", "HasAVX512BW",
"HasAVX512DQ", "HasAVX512VL", "HasAVX512VPCLMULQDQ":
features = CPUavx512 | CPUavx2 | CPUavx
case "HasAVX512GFNI":
features = CPUavx512 | CPUgfni | CPUavx2 | CPUavx
case "HasAVX512VNNI":
features = CPUavx512 | CPUavx512vnni | CPUavx2 | CPUavx
case "HasAVX512VBMI":
features = CPUavx512 | CPUvbmi | CPUavx2 | CPUavx
case "HasAVX512VBMI2":
features = CPUavx512 | CPUvbmi2 | CPUavx2 | CPUavx
case "HasAVX512BITALG":
features = CPUavx512 | CPUbitalg | CPUavx2 | CPUavx
case "HasAVX512VPOPCNTDQ":
features = CPUavx512 | CPUvpopcntdq | CPUavx2 | CPUavx
case "HasBMI1":
features = CPUvbmi
case "HasBMI2":
features = CPUvbmi2
// Features that are not currently interesting to the compiler.
case "HasAES", "HasADX", "HasERMS", "HasFSRM", "HasFMA", "HasGFNI", "HasOSXSAVE",
"HasPCLMULQDQ", "HasPOPCNT", "HasRDTSCP", "HasSHA",
"HasSSE3", "HasSSSE3", "HasSSE41", "HasSSE42":
}
if b.Func.pass.debug > 2 {
b.Func.Warnl(b.Pos, "%s, block b%v has features offset %d, match is %s, features is %v", b.Func.Name, b.ID, o, match, features)
}
return
}
func cpufeatures(f *Func) {
arch := f.Config.Ctxt().Arch.Family
// TODO there are other SIMD architectures
if arch != goarch.AMD64 {
return
}
po := f.Postorder()
effects := make([]localEffect, 1+f.NumBlocks(), 1+f.NumBlocks())
features := func(t *types.Type) CPUfeatures {
if t.IsSIMD() {
switch t.Size() {
case 16, 32:
return CPUavx
case 64:
return CPUavx512 | CPUavx2 | CPUavx
}
}
return CPUNone
}
// visit blocks in reverse post order
// when b is visited, all of its predecessors (except for loop back edges)
// will have been visited
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
var feat CPUfeatures
if b == f.Entry {
// Check the types of inputs and outputs, as well as annotations.
// Start with none and union all that is implied by all the types seen.
if f.Type != nil { // a problem for SSA tests
for _, field := range f.Type.RecvParamsResults() {
feat |= features(field.Type)
}
}
} else {
// Start with all and intersect over predecessors
feat = CPUAll
for _, p := range b.Preds {
pb := p.Block()
if !effects[pb.ID].visited {
continue
}
pi := p.Index()
if pb.Kind != BlockIf {
pi = 0
}
feat &= effects[pb.ID].end[pi]
}
}
e := localEffect{start: feat, visited: true}
// Separately capture the internal effects of this block
var internal CPUfeatures
for _, v := range b.Values {
// the rule applied here is, if the block contains any
// instruction that would fault if the feature (avx, avx512)
// were not present, then assume that the feature is present
// for all the instructions in the block, a fault is a fault.
t := v.Type
if t.IsResults() {
for i := 0; i < t.NumFields(); i++ {
feat |= features(t.FieldType(i))
}
} else {
internal |= features(v.Type)
}
}
e.internal = internal
feat |= internal
branchEffect, taken := ifEffect(b)
e.end = [2]CPUfeatures{feat, feat}
e.end[taken] |= branchEffect
effects[b.ID] = e
if f.pass.debug > 1 && feat != CPUNone {
f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, feat)
}
b.CPUfeatures = feat
}
// If the flow graph is irreducible, things can still change on backedges.
change := true
for change {
change = false
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
if b == f.Entry {
continue // cannot change
}
feat := CPUAll
for _, p := range b.Preds {
pb := p.Block()
pi := p.Index()
if pb.Kind != BlockIf {
pi = 0
}
feat &= effects[pb.ID].end[pi]
}
e := effects[b.ID]
if feat == e.start {
continue
}
e.start = feat
effects[b.ID] = e
// uh-oh, something changed
if f.pass.debug > 1 {
f.Warnl(b.Pos, "%s, block b%v saw predecessor feature change", b.Func.Name, b.ID)
}
feat |= e.internal
if feat == e.end[0]&e.end[1] {
continue
}
branchEffect, taken := ifEffect(b)
e.end = [2]CPUfeatures{feat, feat}
e.end[taken] |= branchEffect
effects[b.ID] = e
b.CPUfeatures = feat
if f.pass.debug > 1 {
f.Warnl(b.Pos, "%s, block b%v has new features %v", b.Func.Name, b.ID, feat)
}
change = true
}
}
if f.pass.debug > 0 {
for _, b := range f.Blocks {
if b.CPUfeatures != CPUNone {
f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, b.CPUfeatures)
}
}
}
}