mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd/_gen: add mem peephole with feat mismatches
This CL attempts to add peepholes for Op -> Opload where the Opload has a different CPU feature than Op. However the new simdgen changes doesn't do anything because such peepholes do not exist. Change-Id: I20c3e4b43bb7414c3a309d77786218372ca1b5b8 Reviewed-on: https://go-review.googlesource.com/c/go/+/711380 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
ba72ee0f30
commit
c4fbf3b4cf
4 changed files with 50 additions and 12 deletions
|
|
@ -25,6 +25,7 @@ type tplRuleData struct {
|
||||||
Size int // e.g. 128
|
Size int // e.g. 128
|
||||||
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
|
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
|
||||||
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
|
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
|
||||||
|
FeatCheck string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file.
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
@ -43,6 +44,8 @@ var (
|
||||||
{{end}}
|
{{end}}
|
||||||
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
|
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
|
||||||
{{end}}
|
{{end}}
|
||||||
|
{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}})
|
||||||
|
{{end}}
|
||||||
`))
|
`))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -277,7 +280,18 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
||||||
memOpData.ArgsLoadAddr += " mask"
|
memOpData.ArgsLoadAddr += " mask"
|
||||||
}
|
}
|
||||||
memOpData.ArgsAddr += " mem"
|
memOpData.ArgsAddr += " mem"
|
||||||
|
if gOp.MemFeaturesData != nil {
|
||||||
|
_, feat2 := getVbcstData(*gOp.MemFeaturesData)
|
||||||
|
knownFeatChecks := map[string]string{
|
||||||
|
"AVX": "v.Block.CPUfeatures.hasFeature(CPUavx)",
|
||||||
|
"AVX2": "v.Block.CPUfeatures.hasFeature(CPUavx2)",
|
||||||
|
"AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)",
|
||||||
|
}
|
||||||
|
memOpData.FeatCheck = knownFeatChecks[feat2]
|
||||||
|
memOpData.tplName = "vregMemFeatCheck"
|
||||||
|
} else {
|
||||||
memOpData.tplName = "vregMem"
|
memOpData.tplName = "vregMem"
|
||||||
|
}
|
||||||
memOptData = append(memOptData, memOpData)
|
memOptData = append(memOptData, memOpData)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -800,6 +800,14 @@ func reportXEDInconsistency(ops []Operation) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getVbcstData(s string) (feat1Match, feat2Match string) {
|
||||||
|
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func (o Operation) String() string {
|
func (o Operation) String() string {
|
||||||
return pprints(o)
|
return pprints(o)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,7 @@ type rawOperation struct {
|
||||||
InVariant []Operand // Optional parameters
|
InVariant []Operand // Optional parameters
|
||||||
Out []Operand // Results
|
Out []Operand // Results
|
||||||
MemFeatures *string // The memory operand feature this operation supports
|
MemFeatures *string // The memory operand feature this operation supports
|
||||||
|
MemFeaturesData *string // Additional data associated with MemFeatures
|
||||||
Commutative bool // Commutativity
|
Commutative bool // Commutativity
|
||||||
CPUFeature string // CPUID/Has* feature name
|
CPUFeature string // CPUID/Has* feature name
|
||||||
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
|
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
|
||||||
|
|
|
||||||
|
|
@ -125,16 +125,20 @@ func loadXED(xedPath string) []*unify.Value {
|
||||||
feat1, ok1 := decodeCPUFeature(o.inst)
|
feat1, ok1 := decodeCPUFeature(o.inst)
|
||||||
// Then check if there exist such an operation that for all vreg
|
// Then check if there exist such an operation that for all vreg
|
||||||
// shapes they are the same at the same index
|
// shapes they are the same at the same index
|
||||||
|
var feat1Match, feat2Match string
|
||||||
matchIdx := -1
|
matchIdx := -1
|
||||||
|
var featMismatchCnt int
|
||||||
outer:
|
outer:
|
||||||
for i, m := range ms {
|
for i, m := range ms {
|
||||||
// Their CPU feature should match first
|
// Their CPU feature should match first
|
||||||
|
var featMismatch bool
|
||||||
feat2, ok2 := decodeCPUFeature(m.inst)
|
feat2, ok2 := decodeCPUFeature(m.inst)
|
||||||
if !ok1 || !ok2 {
|
if !ok1 || !ok2 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if feat1 != feat2 {
|
if feat1 != feat2 {
|
||||||
continue
|
featMismatch = true
|
||||||
|
featMismatchCnt++
|
||||||
}
|
}
|
||||||
if len(o.ops) == len(m.ops) {
|
if len(o.ops) == len(m.ops) {
|
||||||
for j := range o.ops {
|
for j := range o.ops {
|
||||||
|
|
@ -160,15 +164,26 @@ func loadXED(xedPath string) []*unify.Value {
|
||||||
}
|
}
|
||||||
// Found a match, break early
|
// Found a match, break early
|
||||||
matchIdx = i
|
matchIdx = i
|
||||||
|
feat1Match = feat1
|
||||||
|
feat2Match = feat2
|
||||||
|
if featMismatchCnt > 1 {
|
||||||
|
panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
|
||||||
|
}
|
||||||
|
if !featMismatch {
|
||||||
|
// Mismatch feat is ok but should prioritize matching cases.
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// Remove the match from memOps, it's now merged to this pure vreg operation
|
// Remove the match from memOps, it's now merged to this pure vreg operation
|
||||||
if matchIdx != -1 {
|
if matchIdx != -1 {
|
||||||
memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
|
memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
|
||||||
// Merge is done by adding a new field
|
// Merge is done by adding a new field
|
||||||
// Right now we only have vbcst
|
// Right now we only have vbcst
|
||||||
addFields["memFeatures"] = "vbcst"
|
addFields["memFeatures"] = "vbcst"
|
||||||
|
if feat1Match != feat2Match {
|
||||||
|
addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue