[dev.simd] simd/_gen: add mem peephole with feat mismatches

This CL attempts to add peepholes for Op -> Opload where the Opload has
a different CPU feature than Op. However the new simdgen changes doesn't
do anything because such peepholes do not exist.

Change-Id: I20c3e4b43bb7414c3a309d77786218372ca1b5b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/711380
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-10-13 18:30:05 +00:00
parent ba72ee0f30
commit c4fbf3b4cf
4 changed files with 50 additions and 12 deletions

View file

@ -25,6 +25,7 @@ type tplRuleData struct {
Size int // e.g. 128 Size int // e.g. 128
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask. ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end. ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
FeatCheck string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file.
} }
var ( var (
@ -43,6 +44,8 @@ var (
{{end}} {{end}}
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}}) {{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
{{end}} {{end}}
{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}})
{{end}}
`)) `))
) )
@ -277,7 +280,18 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
memOpData.ArgsLoadAddr += " mask" memOpData.ArgsLoadAddr += " mask"
} }
memOpData.ArgsAddr += " mem" memOpData.ArgsAddr += " mem"
if gOp.MemFeaturesData != nil {
_, feat2 := getVbcstData(*gOp.MemFeaturesData)
knownFeatChecks := map[string]string{
"AVX": "v.Block.CPUfeatures.hasFeature(CPUavx)",
"AVX2": "v.Block.CPUfeatures.hasFeature(CPUavx2)",
"AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)",
}
memOpData.FeatCheck = knownFeatChecks[feat2]
memOpData.tplName = "vregMemFeatCheck"
} else {
memOpData.tplName = "vregMem" memOpData.tplName = "vregMem"
}
memOptData = append(memOptData, memOpData) memOptData = append(memOptData, memOpData)
} }
} }

View file

@ -800,6 +800,14 @@ func reportXEDInconsistency(ops []Operation) error {
return nil return nil
} }
func getVbcstData(s string) (feat1Match, feat2Match string) {
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
if err != nil {
panic(err)
}
return
}
func (o Operation) String() string { func (o Operation) String() string {
return pprints(o) return pprints(o)
} }

View file

@ -56,6 +56,7 @@ type rawOperation struct {
InVariant []Operand // Optional parameters InVariant []Operand // Optional parameters
Out []Operand // Results Out []Operand // Results
MemFeatures *string // The memory operand feature this operation supports MemFeatures *string // The memory operand feature this operation supports
MemFeaturesData *string // Additional data associated with MemFeatures
Commutative bool // Commutativity Commutative bool // Commutativity
CPUFeature string // CPUID/Has* feature name CPUFeature string // CPUID/Has* feature name
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"

View file

@ -125,16 +125,20 @@ func loadXED(xedPath string) []*unify.Value {
feat1, ok1 := decodeCPUFeature(o.inst) feat1, ok1 := decodeCPUFeature(o.inst)
// Then check if there exist such an operation that for all vreg // Then check if there exist such an operation that for all vreg
// shapes they are the same at the same index // shapes they are the same at the same index
var feat1Match, feat2Match string
matchIdx := -1 matchIdx := -1
var featMismatchCnt int
outer: outer:
for i, m := range ms { for i, m := range ms {
// Their CPU feature should match first // Their CPU feature should match first
var featMismatch bool
feat2, ok2 := decodeCPUFeature(m.inst) feat2, ok2 := decodeCPUFeature(m.inst)
if !ok1 || !ok2 { if !ok1 || !ok2 {
continue continue
} }
if feat1 != feat2 { if feat1 != feat2 {
continue featMismatch = true
featMismatchCnt++
} }
if len(o.ops) == len(m.ops) { if len(o.ops) == len(m.ops) {
for j := range o.ops { for j := range o.ops {
@ -160,15 +164,26 @@ func loadXED(xedPath string) []*unify.Value {
} }
// Found a match, break early // Found a match, break early
matchIdx = i matchIdx = i
feat1Match = feat1
feat2Match = feat2
if featMismatchCnt > 1 {
panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
}
if !featMismatch {
// Mismatch feat is ok but should prioritize matching cases.
break break
} }
} }
}
// Remove the match from memOps, it's now merged to this pure vreg operation // Remove the match from memOps, it's now merged to this pure vreg operation
if matchIdx != -1 { if matchIdx != -1 {
memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...) memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
// Merge is done by adding a new field // Merge is done by adding a new field
// Right now we only have vbcst // Right now we only have vbcst
addFields["memFeatures"] = "vbcst" addFields["memFeatures"] = "vbcst"
if feat1Match != feat2Match {
addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
}
} }
} }
} }