[dev.simd] simd/_gen: add mem peephole with feat mismatches

This CL attempts to add peepholes for Op -> Opload where the Opload has
a different CPU feature than Op. However the new simdgen changes doesn't
do anything because such peepholes do not exist.

Change-Id: I20c3e4b43bb7414c3a309d77786218372ca1b5b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/711380
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-10-13 18:30:05 +00:00
parent ba72ee0f30
commit c4fbf3b4cf
4 changed files with 50 additions and 12 deletions

View file

@ -25,6 +25,7 @@ type tplRuleData struct {
Size int // e.g. 128
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
FeatCheck string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file.
}
var (
@ -43,6 +44,8 @@ var (
{{end}}
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
{{end}}
{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}})
{{end}}
`))
)
@ -277,7 +280,18 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
memOpData.ArgsLoadAddr += " mask"
}
memOpData.ArgsAddr += " mem"
memOpData.tplName = "vregMem"
if gOp.MemFeaturesData != nil {
_, feat2 := getVbcstData(*gOp.MemFeaturesData)
knownFeatChecks := map[string]string{
"AVX": "v.Block.CPUfeatures.hasFeature(CPUavx)",
"AVX2": "v.Block.CPUfeatures.hasFeature(CPUavx2)",
"AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)",
}
memOpData.FeatCheck = knownFeatChecks[feat2]
memOpData.tplName = "vregMemFeatCheck"
} else {
memOpData.tplName = "vregMem"
}
memOptData = append(memOptData, memOpData)
}
}

View file

@ -800,6 +800,14 @@ func reportXEDInconsistency(ops []Operation) error {
return nil
}
func getVbcstData(s string) (feat1Match, feat2Match string) {
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
if err != nil {
panic(err)
}
return
}
func (o Operation) String() string {
return pprints(o)
}

View file

@ -52,15 +52,16 @@ type rawOperation struct {
// Should be paired with special templates in gen_simdrules.go
SpecialLower *string
In []Operand // Parameters
InVariant []Operand // Optional parameters
Out []Operand // Results
MemFeatures *string // The memory operand feature this operation supports
Commutative bool // Commutativity
CPUFeature string // CPUID/Has* feature name
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
Documentation *string // Documentation will be appended to the stubs comments.
AddDoc *string // Additional doc to be appended.
In []Operand // Parameters
InVariant []Operand // Optional parameters
Out []Operand // Results
MemFeatures *string // The memory operand feature this operation supports
MemFeaturesData *string // Additional data associated with MemFeatures
Commutative bool // Commutativity
CPUFeature string // CPUID/Has* feature name
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
Documentation *string // Documentation will be appended to the stubs comments.
AddDoc *string // Additional doc to be appended.
// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
// If present, it will be copied to [In[0].Const].
ConstImm *string

View file

@ -125,16 +125,20 @@ func loadXED(xedPath string) []*unify.Value {
feat1, ok1 := decodeCPUFeature(o.inst)
// Then check if there exist such an operation that for all vreg
// shapes they are the same at the same index
var feat1Match, feat2Match string
matchIdx := -1
var featMismatchCnt int
outer:
for i, m := range ms {
// Their CPU feature should match first
var featMismatch bool
feat2, ok2 := decodeCPUFeature(m.inst)
if !ok1 || !ok2 {
continue
}
if feat1 != feat2 {
continue
featMismatch = true
featMismatchCnt++
}
if len(o.ops) == len(m.ops) {
for j := range o.ops {
@ -160,7 +164,15 @@ func loadXED(xedPath string) []*unify.Value {
}
// Found a match, break early
matchIdx = i
break
feat1Match = feat1
feat2Match = feat2
if featMismatchCnt > 1 {
panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
}
if !featMismatch {
// Mismatch feat is ok but should prioritize matching cases.
break
}
}
}
// Remove the match from memOps, it's now merged to this pure vreg operation
@ -169,6 +181,9 @@ func loadXED(xedPath string) []*unify.Value {
// Merge is done by adding a new field
// Right now we only have vbcst
addFields["memFeatures"] = "vbcst"
if feat1Match != feat2Match {
addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
}
}
}
}