[dev.simd] simd/_gen/simdgen: merge memory ops

This CL merges pure vreg ops with their memory variant(full vec and
broadcasting).

No changes on generated codes.

Change-Id: I362994c2620939d25c766abe0eff8f3db7f289ea
Reviewed-on: https://go-review.googlesource.com/c/go/+/700756
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao 2025-09-03 20:58:49 +00:00
parent f42c9261d3
commit 0b323350a5
2 changed files with 94 additions and 16 deletions

View file

@ -55,6 +55,7 @@ type rawOperation struct {
In []Operand // Parameters In []Operand // Parameters
InVariant []Operand // Optional parameters InVariant []Operand // Optional parameters
Out []Operand // Results Out []Operand // Results
Mem string // Shape of memory operands
Commutative bool // Commutativity Commutative bool // Commutativity
CPUFeature string // CPUID/Has* feature name CPUFeature string // CPUID/Has* feature name
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"

View file

@ -50,6 +50,27 @@ func loadXED(xedPath string) []*unify.Value {
} }
var defs []*unify.Value var defs []*unify.Value
type opData struct {
inst *xeddata.Inst
ops []operand
mem string
}
// Maps from opcode to opdata(s).
memOps := make(map[string][]opData, 0)
otherOps := make(map[string][]opData, 0)
appendDefs := func(inst *xeddata.Inst, ops []operand, addFields map[string]string) {
applyQuirks(inst, ops)
defsPos := len(defs)
defs = append(defs, instToUVal(inst, ops, addFields)...)
if *flagDebugXED {
for i := defsPos; i < len(defs); i++ {
y, _ := yaml.Marshal(defs[i])
fmt.Printf("==>\n%s\n", y)
}
}
}
err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) { err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) {
inst.Pattern = xeddata.ExpandStates(db, inst.Pattern) inst.Pattern = xeddata.ExpandStates(db, inst.Pattern)
@ -73,19 +94,72 @@ func loadXED(xedPath string) []*unify.Value {
} }
return return
} }
var data map[string][]opData
applyQuirks(inst, ops) mem := checkMem(ops)
if mem == "vbcst" {
defsPos := len(defs) // A pure vreg variant might exist, wait for later to see if we can
defs = append(defs, instToUVal(inst, ops)...) // merge them
data = memOps
if *flagDebugXED { } else {
for i := defsPos; i < len(defs); i++ { data = otherOps
y, _ := yaml.Marshal(defs[i])
fmt.Printf("==>\n%s\n", y)
} }
opcode := inst.Opcode()
if _, ok := data[opcode]; !ok {
s := make([]opData, 1)
s[0] = opData{inst, ops, mem}
data[opcode] = s
} else {
data[opcode] = append(data[opcode], opData{inst, ops, mem})
} }
}) })
for _, s := range otherOps {
for _, o := range s {
addFields := map[string]string{}
if o.mem == "noMem" {
opcode := o.inst.Opcode()
// Checking if there is a vbcst variant of this operation exist
// First check the opcode
// Keep this logic in sync with [decodeOperands]
if ms, ok := memOps[opcode]; ok {
// Then check if there exist such an operation that for all vreg
// shapes they are the same at the same index
matchIdx := -1
outer:
for i, m := range ms {
if len(o.ops) == len(m.ops) {
for j := range o.ops {
v1, ok1 := o.ops[j].(operandVReg)
v2, ok2 := m.ops[j].(operandVReg)
if ok1 && ok2 {
if v1.vecShape != v2.vecShape {
// A mismatch, skip this memOp
continue outer
}
}
}
// Found a match, break early
matchIdx = i
break
}
}
// Remove the match from memOps, it's now merged to this pure vreg operation
if matchIdx != -1 {
memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
}
// Merge is done by adding a new field
// Right now we only have vbcst
addFields["memFeatures"] = "vbcst"
}
}
appendDefs(o.inst, o.ops, addFields)
}
}
for _, ms := range memOps {
for _, m := range ms {
log.Printf("mem op not merged: %s, %v\n", m.inst.Opcode(), m)
appendDefs(m.inst, m.ops, nil)
}
}
if err != nil { if err != nil {
log.Fatalf("walk insts: %v", err) log.Fatalf("walk insts: %v", err)
} }
@ -561,7 +635,7 @@ func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVaria
instDB.Add("mem", unify.NewValue(unify.NewStringExact(checkMem(ops)))) instDB.Add("mem", unify.NewValue(unify.NewStringExact(checkMem(ops))))
} }
// checkMem checks the shapes of memory operand in the instruction and returns the shape. // checkMem checks the shapes of memory operand in the operation and returns the shape.
// Keep this function in sync with [decodeOperand]. // Keep this function in sync with [decodeOperand].
func checkMem(ops []operand) string { func checkMem(ops []operand) string {
memState := "noMem" memState := "noMem"
@ -589,26 +663,29 @@ func checkMem(ops []operand) string {
return memState return memState
} }
func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value { func instToUVal(inst *xeddata.Inst, ops []operand, addFields map[string]string) []*unify.Value {
feature, ok := decodeCPUFeature(inst) feature, ok := decodeCPUFeature(inst)
if !ok { if !ok {
return nil return nil
} }
var vals []*unify.Value var vals []*unify.Value
vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone)) vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone, addFields))
if hasOptionalMask(ops) { if hasOptionalMask(ops) {
vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked)) vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked, addFields))
} }
return vals return vals
} }
func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant) *unify.Value { func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant, addFields map[string]string) *unify.Value {
var db unify.DefBuilder var db unify.DefBuilder
db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64"))) db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode()))) db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
addOperandsToDef(ops, &db, variant) addOperandsToDef(ops, &db, variant)
db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature))) db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature)))
for k, v := range addFields {
db.Add(k, unify.NewValue(unify.NewStringExact(v)))
}
if strings.Contains(inst.Pattern, "ZEROING=0") { if strings.Contains(inst.Pattern, "ZEROING=0") {
// This is an EVEX instruction, but the ".Z" (zero-merging) // This is an EVEX instruction, but the ".Z" (zero-merging)