mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd/_gen: add mem peephole with feat mismatches
This CL attempts to add peepholes for Op -> Opload where the Opload has a different CPU feature than Op. However the new simdgen changes doesn't do anything because such peepholes do not exist. Change-Id: I20c3e4b43bb7414c3a309d77786218372ca1b5b8 Reviewed-on: https://go-review.googlesource.com/c/go/+/711380 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
ba72ee0f30
commit
c4fbf3b4cf
4 changed files with 50 additions and 12 deletions
|
|
@ -25,6 +25,7 @@ type tplRuleData struct {
|
|||
Size int // e.g. 128
|
||||
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
|
||||
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
|
||||
FeatCheck string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file.
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
@ -43,6 +44,8 @@ var (
|
|||
{{end}}
|
||||
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
|
||||
{{end}}
|
||||
{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}})
|
||||
{{end}}
|
||||
`))
|
||||
)
|
||||
|
||||
|
|
@ -277,7 +280,18 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
|||
memOpData.ArgsLoadAddr += " mask"
|
||||
}
|
||||
memOpData.ArgsAddr += " mem"
|
||||
memOpData.tplName = "vregMem"
|
||||
if gOp.MemFeaturesData != nil {
|
||||
_, feat2 := getVbcstData(*gOp.MemFeaturesData)
|
||||
knownFeatChecks := map[string]string{
|
||||
"AVX": "v.Block.CPUfeatures.hasFeature(CPUavx)",
|
||||
"AVX2": "v.Block.CPUfeatures.hasFeature(CPUavx2)",
|
||||
"AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)",
|
||||
}
|
||||
memOpData.FeatCheck = knownFeatChecks[feat2]
|
||||
memOpData.tplName = "vregMemFeatCheck"
|
||||
} else {
|
||||
memOpData.tplName = "vregMem"
|
||||
}
|
||||
memOptData = append(memOptData, memOpData)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -800,6 +800,14 @@ func reportXEDInconsistency(ops []Operation) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func getVbcstData(s string) (feat1Match, feat2Match string) {
|
||||
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o Operation) String() string {
|
||||
return pprints(o)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,15 +52,16 @@ type rawOperation struct {
|
|||
// Should be paired with special templates in gen_simdrules.go
|
||||
SpecialLower *string
|
||||
|
||||
In []Operand // Parameters
|
||||
InVariant []Operand // Optional parameters
|
||||
Out []Operand // Results
|
||||
MemFeatures *string // The memory operand feature this operation supports
|
||||
Commutative bool // Commutativity
|
||||
CPUFeature string // CPUID/Has* feature name
|
||||
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
|
||||
Documentation *string // Documentation will be appended to the stubs comments.
|
||||
AddDoc *string // Additional doc to be appended.
|
||||
In []Operand // Parameters
|
||||
InVariant []Operand // Optional parameters
|
||||
Out []Operand // Results
|
||||
MemFeatures *string // The memory operand feature this operation supports
|
||||
MemFeaturesData *string // Additional data associated with MemFeatures
|
||||
Commutative bool // Commutativity
|
||||
CPUFeature string // CPUID/Has* feature name
|
||||
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
|
||||
Documentation *string // Documentation will be appended to the stubs comments.
|
||||
AddDoc *string // Additional doc to be appended.
|
||||
// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
|
||||
// If present, it will be copied to [In[0].Const].
|
||||
ConstImm *string
|
||||
|
|
|
|||
|
|
@ -125,16 +125,20 @@ func loadXED(xedPath string) []*unify.Value {
|
|||
feat1, ok1 := decodeCPUFeature(o.inst)
|
||||
// Then check if there exist such an operation that for all vreg
|
||||
// shapes they are the same at the same index
|
||||
var feat1Match, feat2Match string
|
||||
matchIdx := -1
|
||||
var featMismatchCnt int
|
||||
outer:
|
||||
for i, m := range ms {
|
||||
// Their CPU feature should match first
|
||||
var featMismatch bool
|
||||
feat2, ok2 := decodeCPUFeature(m.inst)
|
||||
if !ok1 || !ok2 {
|
||||
continue
|
||||
}
|
||||
if feat1 != feat2 {
|
||||
continue
|
||||
featMismatch = true
|
||||
featMismatchCnt++
|
||||
}
|
||||
if len(o.ops) == len(m.ops) {
|
||||
for j := range o.ops {
|
||||
|
|
@ -160,7 +164,15 @@ func loadXED(xedPath string) []*unify.Value {
|
|||
}
|
||||
// Found a match, break early
|
||||
matchIdx = i
|
||||
break
|
||||
feat1Match = feat1
|
||||
feat2Match = feat2
|
||||
if featMismatchCnt > 1 {
|
||||
panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
|
||||
}
|
||||
if !featMismatch {
|
||||
// Mismatch feat is ok but should prioritize matching cases.
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove the match from memOps, it's now merged to this pure vreg operation
|
||||
|
|
@ -169,6 +181,9 @@ func loadXED(xedPath string) []*unify.Value {
|
|||
// Merge is done by adding a new field
|
||||
// Right now we only have vbcst
|
||||
addFields["memFeatures"] = "vbcst"
|
||||
if feat1Match != feat2Match {
|
||||
addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue