cmd/compile: reimplement location list generation

Completely redesign and reimplement location list generation to be more efficient, and hopefully not too hard to understand. RegKills are gone. Instead of using the regalloc's liveness calculations, redo them using the Ops' clobber information. Besides saving a lot of Values, this avoids adding RegKills to blocks that would be empty otherwise, which was messing up optimizations. This does mean that it's much harder to tell whether the generation process is buggy (there's nothing to cross-check it with), and there may be disagreements with GC liveness. But the performance gain is significant, and it's nice not to be messing with earlier compiler phases. The intermediate representations are gone. Instead of producing ssa.BlockDebugs, then dwarf.LocationLists, and then finally real location lists, go directly from the SSA to a (mostly) real location list. Because the SSA analysis happens before assembly, it stores encoded block/value IDs where PCs would normally go. It would be easier to do the SSA analysis after assembly, but I didn't want to retain the SSA just for that. Generation proceeds in two phases: first, it traverses the function in CFG order, storing the state of the block at the beginning and end. End states are used to produce the start states of the successor blocks. In the second phase, it traverses in program text order and produces the location lists. The processing in the second phase is redundant, but much cheaper than storing the intermediate representation. It might be possible to combine the two phases somewhat to take advantage of cases where the CFG matches the block layout, but I haven't tried. Location lists are finalized by adding a base address selection entry, translating each encoded block/value ID to a real PC, and adding the terminating zero entry. This probably won't work on OSX, where dsymutil will choke on the base address selection. I tried emitting CU-relative relocations for each address, and it was *very* bad for performance -- it uses more memory storing all the relocations than it does for the actual location list bytes. I think I'm going to end up synthesizing the relocations in the linker only on OSX, but TBD. TestNexting needs updating: with more optimizations working, the debugger doesn't stop on the continue (line 88) any more, and the test's duplicate suppression kicks in. Also, dx and dy live a little longer now, but they have the correct values. Change-Id: Ie772dfe23a4e389ca573624fac4d05401ae32307 Reviewed-on: https://go-review.googlesource.com/89356 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2025-12-08 06:10:04 +00:00 · 2017-10-26 15:40:17 -04:00 · 2017-10-26 15:40:17 -04:00 · 2075a9323d
commit 2075a9323d
parent 7d7af6106f
11 changed files with 712 additions and 834 deletions
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@ -13,7 +13,6 @@ import (
 	"cmd/internal/src"
 	"cmd/internal/sys"
 	"fmt"
-	"math"
 	"math/rand"
 	"sort"
 	"strings"
@ -304,8 +303,6 @@ func compileFunctions() {

 func debuginfo(fnsym *obj.LSym, curfn interface{}) ([]dwarf.Scope, dwarf.InlCalls) {
 	fn := curfn.(*Node)
-	debugInfo := fn.Func.DebugInfo
-	fn.Func.DebugInfo = nil
 	if fn.Func.Nname != nil {
 		if expect := fn.Func.Nname.Sym.Linksym(); fnsym != expect {
 			Fatalf("unexpected fnsym: %v != %v", fnsym, expect)
@ -344,7 +341,7 @@ func debuginfo(fnsym *obj.LSym, curfn interface{}) ([]dwarf.Scope, dwarf.InlCall
 		})
 	}

-	decls, dwarfVars := createDwarfVars(fnsym, debugInfo, automDecls)
+	decls, dwarfVars := createDwarfVars(fnsym, fn.Func, automDecls)

 	var varScopes []ScopeID
 	for _, decl := range decls {
@ -437,65 +434,24 @@ func createSimpleVars(automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool
 	return decls, vars, selected
 }

-type varPart struct {
-	varOffset int64
-	slot      ssa.SlotID
-}
-
-func createComplexVars(fnsym *obj.LSym, debugInfo *ssa.FuncDebug, automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool) {
-	for _, blockDebug := range debugInfo.Blocks {
-		for _, locList := range blockDebug.Variables {
-			for _, loc := range locList.Locations {
-				if loc.StartProg != nil {
-					loc.StartPC = loc.StartProg.Pc
-				}
-				if loc.EndProg != nil {
-					loc.EndPC = loc.EndProg.Pc
-				} else {
-					loc.EndPC = fnsym.Size
-				}
-				if Debug_locationlist == 0 {
-					loc.EndProg = nil
-					loc.StartProg = nil
-				}
-			}
-		}
-	}
-
-	// Group SSA variables by the user variable they were decomposed from.
-	varParts := map[*Node][]varPart{}
-	ssaVars := make(map[*Node]bool)
-	for slotID, slot := range debugInfo.VarSlots {
-		for slot.SplitOf != nil {
-			slot = slot.SplitOf
-		}
-		n := slot.N.(*Node)
-		ssaVars[n] = true
-		varParts[n] = append(varParts[n], varPart{varOffset(slot), ssa.SlotID(slotID)})
-	}
+// createComplexVars creates recomposed DWARF vars with location lists,
+// suitable for describing optimized code.
+func createComplexVars(fnsym *obj.LSym, fn *Func, automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool) {
+	debugInfo := fn.DebugInfo

 	// Produce a DWARF variable entry for each user variable.
-	// Don't iterate over the map -- that's nondeterministic, and
-	// createComplexVar has side effects. Instead, go by slot.
 	var decls []*Node
 	var vars []*dwarf.Var
-	for _, slot := range debugInfo.VarSlots {
-		for slot.SplitOf != nil {
-			slot = slot.SplitOf
-		}
-		n := slot.N.(*Node)
-		parts := varParts[n]
-		if parts == nil {
-			continue
-		}
-		// Don't work on this variable again, no matter how many slots it has.
-		delete(varParts, n)
+	ssaVars := make(map[*Node]bool)

-		// Get the order the parts need to be in to represent the memory
-		// of the decomposed user variable.
-		sort.Sort(partsByVarOffset(parts))
+	for varID := range debugInfo.Vars {
+		n := debugInfo.Vars[varID].(*Node)
+		ssaVars[n] = true
+		for _, slot := range debugInfo.VarSlots[varID] {
+			ssaVars[debugInfo.Slots[slot].N.(*Node)] = true
+		}

-		if dvar := createComplexVar(debugInfo, n, parts); dvar != nil {
+		if dvar := createComplexVar(fn, ssa.VarID(varID)); dvar != nil {
 			decls = append(decls, n)
 			vars = append(vars, dvar)
 		}
@ -504,13 +460,15 @@ func createComplexVars(fnsym *obj.LSym, debugInfo *ssa.FuncDebug, automDecls []*
 	return decls, vars, ssaVars
 }

-func createDwarfVars(fnsym *obj.LSym, debugInfo *ssa.FuncDebug, automDecls []*Node) ([]*Node, []*dwarf.Var) {
+// createDwarfVars process fn, returning a list of DWARF variables and the
+// Nodes they represent.
+func createDwarfVars(fnsym *obj.LSym, fn *Func, automDecls []*Node) ([]*Node, []*dwarf.Var) {
 	// Collect a raw list of DWARF vars.
 	var vars []*dwarf.Var
 	var decls []*Node
 	var selected map[*Node]bool
-	if Ctxt.Flag_locationlists && Ctxt.Flag_optimize && debugInfo != nil {
-		decls, vars, selected = createComplexVars(fnsym, debugInfo, automDecls)
+	if Ctxt.Flag_locationlists && Ctxt.Flag_optimize && fn.DebugInfo != nil {
+		decls, vars, selected = createComplexVars(fnsym, fn, automDecls)
 	} else {
 		decls, vars, selected = createSimpleVars(automDecls)
 	}
@ -635,22 +593,6 @@ func (s byNodeName) Len() int           { return len(s) }
 func (s byNodeName) Less(i, j int) bool { return cmpNodeName(s[i], s[j]) }
 func (s byNodeName) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }

-// varOffset returns the offset of slot within the user variable it was
-// decomposed from. This has nothing to do with its stack offset.
-func varOffset(slot *ssa.LocalSlot) int64 {
-	offset := slot.Off
-	for ; slot.SplitOf != nil; slot = slot.SplitOf {
-		offset += slot.SplitOffset
-	}
-	return offset
-}
-
-type partsByVarOffset []varPart
-
-func (a partsByVarOffset) Len() int           { return len(a) }
-func (a partsByVarOffset) Less(i, j int) bool { return a[i].varOffset < a[j].varOffset }
-func (a partsByVarOffset) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
-
 // stackOffset returns the stack location of a LocalSlot relative to the
 // stack pointer, suitable for use in a DWARF location entry. This has nothing
 // to do with its offset in the user variable.
@ -671,24 +613,17 @@ func stackOffset(slot *ssa.LocalSlot) int32 {
 	return int32(base + n.Xoffset + slot.Off)
 }

-// createComplexVar builds a DWARF variable entry and location list representing n.
-func createComplexVar(debugInfo *ssa.FuncDebug, n *Node, parts []varPart) *dwarf.Var {
-	slots := debugInfo.Slots
-	var offs int64 // base stack offset for this kind of variable
+// createComplexVar builds a single DWARF variable entry and location list.
+func createComplexVar(fn *Func, varID ssa.VarID) *dwarf.Var {
+	debug := fn.DebugInfo
+	n := debug.Vars[varID].(*Node)
+
 	var abbrev int
 	switch n.Class() {
 	case PAUTO:
 		abbrev = dwarf.DW_ABRV_AUTO_LOCLIST
-		if Ctxt.FixedFrameSize() == 0 {
-			offs -= int64(Widthptr)
-		}
-		if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
-			offs -= int64(Widthptr)
-		}
-
 	case PPARAM, PPARAMOUT:
 		abbrev = dwarf.DW_ABRV_PARAM_LOCLIST
-		offs += Ctxt.FixedFrameSize()
 	default:
 		return nil
 	}
@ -712,196 +647,20 @@ func createComplexVar(debugInfo *ssa.FuncDebug, n *Node, parts []varPart) *dwarf
 		Abbrev:        abbrev,
 		Type:          Ctxt.Lookup(typename),
 		// The stack offset is used as a sorting key, so for decomposed
-		// variables just give it the lowest one. It's not used otherwise.
+		// variables just give it the first one. It's not used otherwise.
 		// This won't work well if the first slot hasn't been assigned a stack
 		// location, but it's not obvious how to do better.
-		StackOffset: int32(stackOffset(slots[parts[0].slot])),
+		StackOffset: stackOffset(debug.Slots[debug.VarSlots[varID][0]]),
 		DeclFile:    declpos.Base().SymFilename(),
 		DeclLine:    declpos.Line(),
 		DeclCol:     declpos.Col(),
 		InlIndex:    int32(inlIndex),
 		ChildIndex:  -1,
 	}
-
-	if Debug_locationlist != 0 {
-		Ctxt.Logf("Building location list for %+v. Parts:\n", n)
-		for _, part := range parts {
-			Ctxt.Logf("\t%v => %v\n", debugInfo.Slots[part.slot], debugInfo.SlotLocsString(part.slot))
-		}
-	}
-
-	// Given a variable that's been decomposed into multiple parts,
-	// its location list may need a new entry after the beginning or
-	// end of every location entry for each of its parts. For example:
-	//
-	// [variable]    [pc range]
-	// string.ptr    |----|-----|    |----|
-	// string.len    |------------|  |--|
-	// ... needs a location list like:
-	// string        |----|-----|-|  |--|-|
-	//
-	// Note that location entries may or may not line up with each other,
-	// and some of the result will only have one or the other part.
-	//
-	// To build the resulting list:
-	// - keep a "current" pointer for each part
-	// - find the next transition point
-	// - advance the current pointer for each part up to that transition point
-	// - build the piece for the range between that transition point and the next
-	// - repeat
-
-	type locID struct {
-		block int
-		loc   int
-	}
-	findLoc := func(part varPart, id locID) *ssa.VarLoc {
-		if id.block >= len(debugInfo.Blocks) {
-			return nil
-		}
-		return debugInfo.Blocks[id.block].Variables[part.slot].Locations[id.loc]
-	}
-	nextLoc := func(part varPart, id locID) (locID, *ssa.VarLoc) {
-		// Check if there's another loc in this block
-		id.loc++
-		if b := debugInfo.Blocks[id.block]; b != nil && id.loc < len(b.Variables[part.slot].Locations) {
-			return id, findLoc(part, id)
-		}
-		// Find the next block that has a loc for this part.
-		id.loc = 0
-		id.block++
-		for ; id.block < len(debugInfo.Blocks); id.block++ {
-			if b := debugInfo.Blocks[id.block]; b != nil && len(b.Variables[part.slot].Locations) != 0 {
-				return id, findLoc(part, id)
-			}
-		}
-		return id, nil
-	}
-	curLoc := make([]locID, len(slots))
-	// Position each pointer at the first entry for its slot.
-	for _, part := range parts {
-		if b := debugInfo.Blocks[0]; b != nil && len(b.Variables[part.slot].Locations) != 0 {
-			// Block 0 has an entry; no need to advance.
-			continue
-		}
-		curLoc[part.slot], _ = nextLoc(part, curLoc[part.slot])
-	}
-
-	// findBoundaryAfter finds the next beginning or end of a piece after currentPC.
-	findBoundaryAfter := func(currentPC int64) int64 {
-		min := int64(math.MaxInt64)
-		for _, part := range parts {
-			// For each part, find the first PC greater than current. Doesn't
-			// matter if it's a start or an end, since we're looking for any boundary.
-			// If it's the new winner, save it.
-		onePart:
-			for i, loc := curLoc[part.slot], findLoc(part, curLoc[part.slot]); loc != nil; i, loc = nextLoc(part, i) {
-				for _, pc := range [2]int64{loc.StartPC, loc.EndPC} {
-					if pc > currentPC {
-						if pc < min {
-							min = pc
-						}
-						break onePart
-					}
-				}
-			}
-		}
-		return min
-	}
-	var start int64
-	end := findBoundaryAfter(0)
-	for {
-		// Advance to the next chunk.
-		start = end
-		end = findBoundaryAfter(start)
-		if end == math.MaxInt64 {
-			break
-		}
-
-		dloc := dwarf.Location{StartPC: start, EndPC: end}
-		if Debug_locationlist != 0 {
-			Ctxt.Logf("Processing range %x -> %x\n", start, end)
-		}
-
-		// Advance curLoc to the last location that starts before/at start.
-		// After this loop, if there's a location that covers [start, end), it will be current.
-		// Otherwise the current piece will be too early.
-		for _, part := range parts {
-			choice := locID{-1, -1}
-			for i, loc := curLoc[part.slot], findLoc(part, curLoc[part.slot]); loc != nil; i, loc = nextLoc(part, i) {
-				if loc.StartPC > start {
-					break //overshot
-				}
-				choice = i // best yet
-			}
-			if choice.block != -1 {
-				curLoc[part.slot] = choice
-			}
-			if Debug_locationlist != 0 {
-				Ctxt.Logf("\t %v => %v", slots[part.slot], curLoc[part.slot])
-			}
-		}
-		if Debug_locationlist != 0 {
-			Ctxt.Logf("\n")
-		}
-		// Assemble the location list entry for this chunk.
-		present := 0
-		for _, part := range parts {
-			dpiece := dwarf.Piece{
-				Length: slots[part.slot].Type.Size(),
-			}
-			loc := findLoc(part, curLoc[part.slot])
-			if loc == nil || start >= loc.EndPC || end <= loc.StartPC {
-				if Debug_locationlist != 0 {
-					Ctxt.Logf("\t%v: missing", slots[part.slot])
-				}
-				dpiece.Missing = true
-				dloc.Pieces = append(dloc.Pieces, dpiece)
-				continue
-			}
-			present++
-			if Debug_locationlist != 0 {
-				Ctxt.Logf("\t%v: %v", slots[part.slot], debugInfo.Blocks[curLoc[part.slot].block].LocString(loc))
-			}
-			if loc.OnStack {
-				dpiece.OnStack = true
-				dpiece.StackOffset = stackOffset(slots[loc.StackLocation])
-			} else {
-				for reg := 0; reg < len(debugInfo.Registers); reg++ {
-					if loc.Registers&(1<<uint8(reg)) != 0 {
-						dpiece.RegNum = Ctxt.Arch.DWARFRegisters[debugInfo.Registers[reg].ObjNum()]
-					}
-				}
-			}
-			dloc.Pieces = append(dloc.Pieces, dpiece)
-		}
-		if present == 0 {
-			if Debug_locationlist != 0 {
-				Ctxt.Logf(" -> totally missing\n")
-			}
-			continue
-		}
-		// Extend the previous entry if possible.
-		if len(dvar.LocationList) > 0 {
-			prev := &dvar.LocationList[len(dvar.LocationList)-1]
-			if prev.EndPC == dloc.StartPC && len(prev.Pieces) == len(dloc.Pieces) {
-				equal := true
-				for i := range prev.Pieces {
-					if prev.Pieces[i] != dloc.Pieces[i] {
-						equal = false
-					}
-				}
-				if equal {
-					prev.EndPC = end
-					if Debug_locationlist != 0 {
-						Ctxt.Logf("-> merged with previous, now %#v\n", prev)
-					}
-					continue
-				}
-			}
-		}
-		dvar.LocationList = append(dvar.LocationList, dloc)
-		if Debug_locationlist != 0 {
-			Ctxt.Logf("-> added: %#v\n", dloc)
+	list := debug.LocationLists[varID]
+	if len(list) != 0 {
+		dvar.PutLocationList = func(listSym, startPC dwarf.Sym) {
+			debug.PutLocationList(list, Ctxt, listSym.(*obj.LSym), startPC.(*obj.LSym))
 		}
 	}
 	return dvar