diff --git a/src/simd/_gen/simdgen/gen_simdrules.go b/src/simd/_gen/simdgen/gen_simdrules.go index 2339a1910d7..059a2a4f365 100644 --- a/src/simd/_gen/simdgen/gen_simdrules.go +++ b/src/simd/_gen/simdgen/gen_simdrules.go @@ -25,6 +25,7 @@ type tplRuleData struct { Size int // e.g. 128 ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask. ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end. + FeatCheck string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file. } var ( @@ -43,6 +44,8 @@ var ( {{end}} {{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}}) {{end}} +{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}}) +{{end}} `)) ) @@ -277,7 +280,18 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { memOpData.ArgsLoadAddr += " mask" } memOpData.ArgsAddr += " mem" - memOpData.tplName = "vregMem" + if gOp.MemFeaturesData != nil { + _, feat2 := getVbcstData(*gOp.MemFeaturesData) + knownFeatChecks := map[string]string{ + "AVX": "v.Block.CPUfeatures.hasFeature(CPUavx)", + "AVX2": "v.Block.CPUfeatures.hasFeature(CPUavx2)", + "AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)", + } + memOpData.FeatCheck = knownFeatChecks[feat2] + memOpData.tplName = "vregMemFeatCheck" + } else { + memOpData.tplName = "vregMem" + } memOptData = append(memOptData, memOpData) } } diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/_gen/simdgen/gen_utility.go index 78a214783b1..70f07cf7a49 100644 --- a/src/simd/_gen/simdgen/gen_utility.go +++ b/src/simd/_gen/simdgen/gen_utility.go @@ -800,6 +800,14 @@ func reportXEDInconsistency(ops []Operation) error { return nil } +func getVbcstData(s string) (feat1Match, feat2Match string) { + _, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match) + if err != nil { + panic(err) + } + return +} + func (o Operation) String() string { return pprints(o) } diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go index f9a2caaca30..bda1dfc8fec 100644 --- a/src/simd/_gen/simdgen/godefs.go +++ b/src/simd/_gen/simdgen/godefs.go @@ -52,15 +52,16 @@ type rawOperation struct { // Should be paired with special templates in gen_simdrules.go SpecialLower *string - In []Operand // Parameters - InVariant []Operand // Optional parameters - Out []Operand // Results - MemFeatures *string // The memory operand feature this operation supports - Commutative bool // Commutativity - CPUFeature string // CPUID/Has* feature name - Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" - Documentation *string // Documentation will be appended to the stubs comments. - AddDoc *string // Additional doc to be appended. + In []Operand // Parameters + InVariant []Operand // Optional parameters + Out []Operand // Results + MemFeatures *string // The memory operand feature this operation supports + MemFeaturesData *string // Additional data associated with MemFeatures + Commutative bool // Commutativity + CPUFeature string // CPUID/Has* feature name + Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" + Documentation *string // Documentation will be appended to the stubs comments. + AddDoc *string // Additional doc to be appended. // ConstMask is a hack to reduce the size of defs the user writes for const-immediate // If present, it will be copied to [In[0].Const]. ConstImm *string diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/_gen/simdgen/xed.go index 1781f5c74d0..c3eb4780be0 100644 --- a/src/simd/_gen/simdgen/xed.go +++ b/src/simd/_gen/simdgen/xed.go @@ -125,16 +125,20 @@ func loadXED(xedPath string) []*unify.Value { feat1, ok1 := decodeCPUFeature(o.inst) // Then check if there exist such an operation that for all vreg // shapes they are the same at the same index + var feat1Match, feat2Match string matchIdx := -1 + var featMismatchCnt int outer: for i, m := range ms { // Their CPU feature should match first + var featMismatch bool feat2, ok2 := decodeCPUFeature(m.inst) if !ok1 || !ok2 { continue } if feat1 != feat2 { - continue + featMismatch = true + featMismatchCnt++ } if len(o.ops) == len(m.ops) { for j := range o.ops { @@ -160,7 +164,15 @@ func loadXED(xedPath string) []*unify.Value { } // Found a match, break early matchIdx = i - break + feat1Match = feat1 + feat2Match = feat2 + if featMismatchCnt > 1 { + panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish") + } + if !featMismatch { + // Mismatch feat is ok but should prioritize matching cases. + break + } } } // Remove the match from memOps, it's now merged to this pure vreg operation @@ -169,6 +181,9 @@ func loadXED(xedPath string) []*unify.Value { // Merge is done by adding a new field // Right now we only have vbcst addFields["memFeatures"] = "vbcst" + if feat1Match != feat2Match { + addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match) + } } } }