simd/archsimd/_gen/simdgen: feature implications

This simplifies our handling of XED features, adds a table of which
features imply which other features, and adds this information to the
documentation of the CPU features APIs.

As part of this we fix an issue around the "AVXAES" feature. AVXAES is
defined as the combination of the AVX and AES CPUID flags. Several
other features also work like this, but have hand-written logic in
internal/cpu to compute logical feature flags from the underlying
CPUID bits. For these, we expose a single feature check function from
the SIMD API.

AVXAES currently doesn't work like this: it requires the user to check
both features. However, this forces the SIMD API to expose an "AES"
feature check, which really has nothing to do with SIMD. To make this
consistent, we introduce an AVXAES feature check function and use it
in feature requirement docs. Unlike the others combo features, this is
implemented in the simd package, but the difference is invisible to
the user.

Change-Id: I2985ebd361f0ecd45fd428903efe4c981a5ec65d
Reviewed-on: https://go-review.googlesource.com/c/go/+/736100
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/736200
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Austin Clements 2026-01-13 09:34:53 -05:00 committed by Cherry Mui
parent e2fef50def
commit 9ef1692c93
4 changed files with 205 additions and 63 deletions

View file

@ -189,6 +189,7 @@ type X86Features struct {}
var X86 X86Features
{{range .}}
{{$f := .}}
{{- if eq .Feature "AVX512"}}
// {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
//
@ -199,11 +200,19 @@ var X86 X86Features
{{- else -}}
// {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
{{- end}}
{{- if ne .ImpliesAll ""}}
//
// If it returns true, then the CPU also supports {{.ImpliesAll}}.
{{- end}}
//
// {{.Feature}} is defined on all GOARCHes, but will only return true on
// GOARCH {{.GoArch}}.
func (X86Features) {{.Feature}}() bool {
return cpu.X86.Has{{.Feature}}
func ({{.FeatureVar}}Features) {{.Feature}}() bool {
{{- if .Virtual}}
return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
{{- else}}
return cpu.{{.FeatureVar}}.Has{{.Feature}}
{{- end}}
}
{{end}}
`
@ -591,6 +600,65 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
return buffer
}
type goarchFeatures struct {
// featureVar is the name of the exported feature-check variable for this
// architecture.
featureVar string
// features records per-feature information.
features map[string]featureInfo
}
type featureInfo struct {
// Implies is a list of other CPU features that are required for this
// feature. These are allowed to chain.
//
// For example, if the Frob feature lists "Baz", then if X.Frob() returns
// true, it must also be true that the CPU has feature Baz.
Implies []string
// Virtual means this feature is not represented directly in internal/cpu,
// but is instead the logical AND of the features in Implies.
Virtual bool
}
// goarchFeatureInfo maps from GOARCH to CPU feature to additional information
// about that feature. Not all features need to be in this map.
var goarchFeatureInfo = make(map[string]goarchFeatures)
func registerFeatureInfo(goArch string, features goarchFeatures) {
goarchFeatureInfo[goArch] = features
}
func featureImplies(goarch string, base string) string {
// Compute the transitive closure of base.
var list []string
var visit func(f string)
visit = func(f string) {
list = append(list, f)
for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
visit(dep)
}
}
visit(base)
// Drop base
list = list[1:]
// Put in "nice" order
slices.Reverse(list)
// Combine into a comment-ready form
switch len(list) {
case 0:
return ""
case 1:
return list[0]
case 2:
return list[0] + " and " + list[1]
default:
list[len(list)-1] = "and " + list[len(list)-1]
return strings.Join(list, ", ")
}
}
func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
// Gather all features
type featureKey struct {
@ -606,13 +674,36 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
featureSet[featureKey{op.GoArch, feature}] = struct{}{}
}
}
features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
return c
}
return compareNatural(a.Feature, b.Feature)
})
// TODO: internal/cpu doesn't enforce these at all. You can even do
// GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off
// AVX2. We need to push these dependencies into it somehow.
type feature struct {
featureKey
FeatureVar string
Virtual bool
Implies []string
ImpliesAll string
}
var features []feature
for _, k := range featureKeys {
featureVar := goarchFeatureInfo[k.GoArch].featureVar
fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
features = append(features, feature{
featureKey: k,
FeatureVar: featureVar,
Virtual: fi.Virtual,
Implies: fi.Implies,
ImpliesAll: featureImplies(k.GoArch, k.Feature),
})
}
// If we ever have the same feature name on more than one GOARCH, we'll have
// to be more careful about this.
t := templateOf(simdFeaturesTemplate, "features")

View file

@ -5,7 +5,6 @@
package main
import (
"cmp"
"fmt"
"log"
"maps"
@ -210,16 +209,9 @@ func loadXED(xedPath string) []*unify.Value {
}
log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
} else {
keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
return cmp.Or(cmp.Compare(a.Extension, b.Extension),
cmp.Compare(a.ISASet, b.ISASet))
})
keys := slices.Sorted(maps.Keys(unknownFeatures))
for _, key := range keys {
if key.ISASet == "" || key.ISASet == key.Extension {
log.Printf("unhandled Extension %s", key.Extension)
} else {
log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
}
log.Printf("unhandled ISASet %s", key)
log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
}
}
@ -763,16 +755,24 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant inst
// decodeCPUFeature returns the CPU feature name required by inst. These match
// the names of the "Has*" feature checks in the simd package.
func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
key := cpuFeatureKey{
Extension: inst.Extension,
ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
isaSet := inst.ISASet
if isaSet == "" {
// Older instructions don't have an ISA set. Use their "extension"
// instead.
isaSet = inst.Extension
}
feat, ok := cpuFeatureMap[key]
// We require AVX512VL to use AVX512 at all, so strip off the vector length
// suffixes.
if strings.HasPrefix(isaSet, "AVX512") {
isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
}
feat, ok := cpuFeatureMap[isaSet]
if !ok {
imap := unknownFeatures[key]
imap := unknownFeatures[isaSet]
if imap == nil {
imap = make(map[string]struct{})
unknownFeatures[key] = imap
unknownFeatures[isaSet] = imap
}
imap[inst.Opcode()] = struct{}{}
return "", false
@ -783,45 +783,74 @@ func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
return feat, true
}
var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
type cpuFeatureKey struct {
Extension, ISASet string
}
// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
// that can be used in the SIMD API.
var cpuFeatureMap = map[cpuFeatureKey]string{
{"SHA", "SHA"}: "SHA",
{"AVX", ""}: "AVX",
{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
{"AVX2", ""}: "AVX2",
{"AVXAES", ""}: "AVX, AES",
// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
// name to expose in the SIMD feature check API.
//
// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
var cpuFeatureMap = map[string]string{
"AVX": "AVX",
"AVX_VNNI": "AVXVNNI",
"AVX2": "AVX2",
"AVXAES": "AVXAES",
"SHA": "SHA",
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
{"AVX512EVEX", "AVX512F"}: "AVX512",
{"AVX512EVEX", "AVX512CD"}: "AVX512",
{"AVX512EVEX", "AVX512BW"}: "AVX512",
{"AVX512EVEX", "AVX512DQ"}: "AVX512",
// AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
// the vector length suffix.
"AVX512F": "AVX512",
"AVX512BW": "AVX512",
"AVX512CD": "AVX512",
"AVX512DQ": "AVX512",
// AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
// required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
// AVX-512 extension features
{"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
{"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
{"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
{"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
{"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ",
"AVX512_BITALG": "AVX512BITALG",
"AVX512_GFNI": "AVX512GFNI",
"AVX512_VBMI": "AVX512VBMI",
"AVX512_VBMI2": "AVX512VBMI2",
"AVX512_VNNI": "AVX512VNNI",
"AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ",
"AVX512_VAES": "AVX512VAES",
"AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
// AVX 10.2 (not yet supported)
{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
"AVX10_2_RC": "ignore",
}
var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
func init() {
// TODO: In general, Intel doesn't make any guarantees about what flags are
// set, so this means our feature checks need to ensure these, just to be
// sure.
var features = map[string]featureInfo{
"AVX2": {Implies: []string{"AVX"}},
"AVX512": {Implies: []string{"AVX2"}},
"AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
// AVX-512 subfeatures.
"AVX512BITALG": {Implies: []string{"AVX512"}},
"AVX512GFNI": {Implies: []string{"AVX512"}},
"AVX512VBMI": {Implies: []string{"AVX512"}},
"AVX512VBMI2": {Implies: []string{"AVX512"}},
"AVX512VNNI": {Implies: []string{"AVX512"}},
"AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
"AVX512VAES": {Implies: []string{"AVX512"}},
// AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
// instructions to VEX encoding, limited to 256 bit vectors. They're
// intended for lower end CPUs that want to support VNNI/IFMA without
// supporting AVX-512. As such, they're built on AVX2's VEX encoding.
"AVXVNNI": {Implies: []string{"AVX2"}},
"AVXIFMA": {Implies: []string{"AVX2"}},
}
registerFeatureInfo("amd64", goarchFeatures{
featureVar: "X86",
features: features,
})
}
var unknownFeatures = map[string]map[string]struct{}{}
// hasOptionalMask returns whether there is an optional mask operand in ops.
func hasOptionalMask(ops []operand) bool {

View file

@ -10,14 +10,6 @@ type X86Features struct{}
var X86 X86Features
// AES returns whether the CPU supports the AES feature.
//
// AES is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AES() bool {
return cpu.X86.HasAES
}
// AVX returns whether the CPU supports the AVX feature.
//
// AVX is defined on all GOARCHes, but will only return true on
@ -28,6 +20,8 @@ func (X86Features) AVX() bool {
// AVX2 returns whether the CPU supports the AVX2 feature.
//
// If it returns true, then the CPU also supports AVX.
//
// AVX2 is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX2() bool {
@ -41,6 +35,8 @@ func (X86Features) AVX2() bool {
// Nearly every CPU that has shipped with any support for AVX-512 has
// supported all five of these features.
//
// If it returns true, then the CPU also supports AVX and AVX2.
//
// AVX512 is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512() bool {
@ -49,6 +45,8 @@ func (X86Features) AVX512() bool {
// AVX512BITALG returns whether the CPU supports the AVX512BITALG feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512BITALG is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512BITALG() bool {
@ -57,6 +55,8 @@ func (X86Features) AVX512BITALG() bool {
// AVX512GFNI returns whether the CPU supports the AVX512GFNI feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512GFNI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512GFNI() bool {
@ -65,6 +65,8 @@ func (X86Features) AVX512GFNI() bool {
// AVX512VAES returns whether the CPU supports the AVX512VAES feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512VAES is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VAES() bool {
@ -73,6 +75,8 @@ func (X86Features) AVX512VAES() bool {
// AVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512VBMI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VBMI() bool {
@ -81,6 +85,8 @@ func (X86Features) AVX512VBMI() bool {
// AVX512VBMI2 returns whether the CPU supports the AVX512VBMI2 feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512VBMI2 is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VBMI2() bool {
@ -89,6 +95,8 @@ func (X86Features) AVX512VBMI2() bool {
// AVX512VNNI returns whether the CPU supports the AVX512VNNI feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512VNNI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VNNI() bool {
@ -105,14 +113,28 @@ func (X86Features) AVX512VPCLMULQDQ() bool {
// AVX512VPOPCNTDQ returns whether the CPU supports the AVX512VPOPCNTDQ feature.
//
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
//
// AVX512VPOPCNTDQ is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VPOPCNTDQ() bool {
return cpu.X86.HasAVX512VPOPCNTDQ
}
// AVXAES returns whether the CPU supports the AVXAES feature.
//
// If it returns true, then the CPU also supports AES and AVX.
//
// AVXAES is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVXAES() bool {
return cpu.X86.HasAVX && cpu.X86.HasAES
}
// AVXVNNI returns whether the CPU supports the AVXVNNI feature.
//
// If it returns true, then the CPU also supports AVX and AVX2.
//
// AVXVNNI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVXVNNI() bool {

View file

@ -11,7 +11,7 @@ package archsimd
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
// Asm: VAESDECLAST, CPU Feature: AVX, AES
// Asm: VAESDECLAST, CPU Feature: AVXAES
func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
@ -37,7 +37,7 @@ func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
// Asm: VAESDEC, CPU Feature: AVX, AES
// Asm: VAESDEC, CPU Feature: AVXAES
func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
@ -63,7 +63,7 @@ func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENCLAST, CPU Feature: AVX, AES
// Asm: VAESENCLAST, CPU Feature: AVXAES
func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
@ -89,7 +89,7 @@ func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
// Asm: VAESENC, CPU Feature: AVX, AES
// Asm: VAESENC, CPU Feature: AVXAES
func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
@ -114,7 +114,7 @@ func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64
// x is the chunk of w array in use.
// result = InvMixColumns(x)
//
// Asm: VAESIMC, CPU Feature: AVX, AES
// Asm: VAESIMC, CPU Feature: AVXAES
func (x Uint32x4) AESInvMixColumns() Uint32x4
/* AESRoundKeyGenAssist */
@ -129,7 +129,7 @@ func (x Uint32x4) AESInvMixColumns() Uint32x4
//
// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
// Asm: VAESKEYGENASSIST, CPU Feature: AVXAES
func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
/* Abs */