cmd/compile: convert merge to use appl. bal. trees for sharing

This CL replaces a not-very-shared linear-sized set
representation with a much more shared representation.
For the annoying test program in question, it reduces
the heap size by 95%, and the time slightly.

However, for some programs build time is longer.

This also includes at least one bug fix for problems
uncovered while ensuring compatibility with what it
replaces.

Fixes #51543.

Change-Id: Ie7a4c6ea460775faeed2b0378ab21ddffd15badc
Reviewed-on: https://go-review.googlesource.com/c/go/+/397318
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
This commit is contained in:
David Chase 2022-03-29 13:16:35 -04:00
parent d339d085c9
commit 857cda4625
4 changed files with 1950 additions and 253 deletions

View file

@ -6,6 +6,7 @@ package ssa
import (
"cmd/compile/internal/abi"
"cmd/compile/internal/abt"
"cmd/compile/internal/ir"
"cmd/compile/internal/types"
"cmd/internal/dwarf"
@ -23,8 +24,8 @@ type SlotID int32
type VarID int32
// A FuncDebug contains all the debug information for the variables in a
// function. Variables are identified by their LocalSlot, which may be the
// result of decomposing a larger variable.
// function. Variables are identified by their LocalSlot, which may be
// the result of decomposing a larger variable.
type FuncDebug struct {
// Slots is all the slots used in the debug info, indexed by their SlotID.
Slots []LocalSlot
@ -43,27 +44,37 @@ type FuncDebug struct {
}
type BlockDebug struct {
// State at the start and end of the block. These are initialized,
// and updated from new information that flows on back edges.
startState, endState abt.T
// Use these to avoid excess work in the merge. If none of the
// predecessors has changed since the last check, the old answer is
// still good.
lastCheckedTime, lastChangedTime int32
// Whether the block had any changes to user variables at all.
relevant bool
// State at the end of the block if it's fully processed. Immutable once initialized.
endState []liveSlot
// false until the block has been processed at least once. This
// affects how the merge is done; the goal is to maximize sharing
// and avoid allocation.
everProcessed bool
}
// A liveSlot is a slot that's live in loc at entry/exit of a block.
type liveSlot struct {
// An inlined VarLoc, so it packs into 16 bytes instead of 20.
Registers RegisterSet
StackOffset
VarLoc
}
slot SlotID
func (ls *liveSlot) String() string {
return fmt.Sprintf("0x%x.%d.%d", ls.Registers, ls.stackOffsetValue(), int32(ls.StackOffset)&1)
}
func (loc liveSlot) absent() bool {
return loc.Registers == 0 && !loc.onStack()
}
// StackOffset encodes whether a value is on the stack and if so, where. It is
// a 31-bit integer followed by a presence flag at the low-order bit.
// StackOffset encodes whether a value is on the stack and if so, where.
// It is a 31-bit integer followed by a presence flag at the low-order
// bit.
type StackOffset int32
func (s StackOffset) onStack() bool {
@ -83,7 +94,7 @@ type stateAtPC struct {
}
// reset fills state with the live variables from live.
func (state *stateAtPC) reset(live []liveSlot) {
func (state *stateAtPC) reset(live abt.T) {
slots, registers := state.slots, state.registers
for i := range slots {
slots[i] = VarLoc{}
@ -91,13 +102,15 @@ func (state *stateAtPC) reset(live []liveSlot) {
for i := range registers {
registers[i] = registers[i][:0]
}
for _, live := range live {
slots[live.slot] = VarLoc{live.Registers, live.StackOffset}
if live.Registers == 0 {
for it := live.Iterator(); !it.Done(); {
k, d := it.Next()
live := d.(*liveSlot)
slots[k] = live.VarLoc
if live.VarLoc.Registers == 0 {
continue
}
mask := uint64(live.Registers)
mask := uint64(live.VarLoc.Registers)
for {
if mask == 0 {
break
@ -105,7 +118,7 @@ func (state *stateAtPC) reset(live []liveSlot) {
reg := uint8(bits.TrailingZeros64(mask))
mask &^= 1 << reg
registers[reg] = append(registers[reg], live.slot)
registers[reg] = append(registers[reg], SlotID(k))
}
}
state.slots, state.registers = slots, registers
@ -118,7 +131,7 @@ func (s *debugState) LocString(loc VarLoc) string {
var storage []string
if loc.onStack() {
storage = append(storage, "stack")
storage = append(storage, fmt.Sprintf("@%+d", loc.stackOffsetValue()))
}
mask := uint64(loc.Registers)
@ -147,6 +160,14 @@ func (loc VarLoc) absent() bool {
return loc.Registers == 0 && !loc.onStack()
}
func (loc VarLoc) intersect(other VarLoc) VarLoc {
if !loc.onStack() || !other.onStack() || loc.StackOffset != other.StackOffset {
loc.StackOffset = 0
}
loc.Registers &= other.Registers
return loc
}
var BlockStart = &Value{
ID: -10000,
Op: OpInvalid,
@ -168,8 +189,9 @@ var FuncEnd = &Value{
// RegisterSet is a bitmap of registers, indexed by Register.num.
type RegisterSet uint64
// logf prints debug-specific logging to stdout (always stdout) if the current
// function is tagged by GOSSAFUNC (for ssa output directed either to stdout or html).
// logf prints debug-specific logging to stdout (always stdout) if the
// current function is tagged by GOSSAFUNC (for ssa output directed
// either to stdout or html).
func (s *debugState) logf(msg string, args ...interface{}) {
if s.f.PrintOrHtmlSSA {
fmt.Printf(msg, args...)
@ -186,29 +208,28 @@ type debugState struct {
// The user variable that each slot rolls up to, indexed by SlotID.
slotVars []VarID
f *Func
loggingEnabled bool
registers []Register
stackOffset func(LocalSlot) int32
ctxt *obj.Link
f *Func
loggingLevel int
convergeCount int // testing; iterate over block debug state this many times
registers []Register
stackOffset func(LocalSlot) int32
ctxt *obj.Link
// The names (slots) associated with each value, indexed by Value ID.
valueNames [][]SlotID
// The current state of whatever analysis is running.
currentState stateAtPC
liveCount []int
changedVars *sparseSet
changedSlots *sparseSet
// The pending location list entry for each user variable, indexed by VarID.
pendingEntries []pendingEntry
varParts map[*ir.Name][]SlotID
blockDebug []BlockDebug
pendingSlotLocs []VarLoc
liveSlots []liveSlot
liveSlotSliceBegin int
partsByVarOffset sort.Interface
varParts map[*ir.Name][]SlotID
blockDebug []BlockDebug
pendingSlotLocs []VarLoc
partsByVarOffset sort.Interface
}
func (state *debugState) initializeCache(f *Func, numVars, numSlots int) {
@ -247,15 +268,9 @@ func (state *debugState) initializeCache(f *Func, numVars, numSlots int) {
state.currentState.registers = state.currentState.registers[:len(state.registers)]
}
// Used many times by mergePredecessors.
if cap(state.liveCount) < numSlots {
state.liveCount = make([]int, numSlots)
} else {
state.liveCount = state.liveCount[:numSlots]
}
// A relatively small slice, but used many times as the return from processValue.
state.changedVars = newSparseSet(numVars)
state.changedSlots = newSparseSet(numSlots)
// A pending entry per user variable, with space to track each of its pieces.
numPieces := 0
@ -291,25 +306,12 @@ func (state *debugState) initializeCache(f *Func, numVars, numSlots int) {
state.lists[i] = nil
}
}
state.liveSlots = state.liveSlots[:0]
state.liveSlotSliceBegin = 0
}
func (state *debugState) allocBlock(b *Block) *BlockDebug {
return &state.blockDebug[b.ID]
}
func (state *debugState) appendLiveSlot(ls liveSlot) {
state.liveSlots = append(state.liveSlots, ls)
}
func (state *debugState) getLiveSlotSlice() []liveSlot {
s := state.liveSlots[state.liveSlotSliceBegin:]
state.liveSlotSliceBegin = len(state.liveSlots)
return s
}
func (s *debugState) blockEndStateString(b *BlockDebug) string {
endState := stateAtPC{slots: make([]VarLoc, len(s.slots)), registers: make([][]SlotID, len(s.registers))}
endState.reset(b.endState)
@ -550,15 +552,21 @@ func PopulateABIInRegArgOps(f *Func) {
f.Entry.Values = append(newValues, f.Entry.Values...)
}
// BuildFuncDebug debug information for f, placing the results in "rval".
// f must be fully processed, so that each Value is where it will be when
// machine code is emitted.
func BuildFuncDebug(ctxt *obj.Link, f *Func, loggingEnabled bool, stackOffset func(LocalSlot) int32, rval *FuncDebug) {
// BuildFuncDebug debug information for f, placing the results
// in "rval". f must be fully processed, so that each Value is where it
// will be when machine code is emitted.
func BuildFuncDebug(ctxt *obj.Link, f *Func, loggingLevel int, stackOffset func(LocalSlot) int32, rval *FuncDebug) {
if f.RegAlloc == nil {
f.Fatalf("BuildFuncDebug on func %v that has not been fully processed", f)
}
state := &f.Cache.debugState
state.loggingEnabled = loggingEnabled
state.loggingLevel = loggingLevel % 1000
// A specific number demands exactly that many iterations. Under
// particular circumstances it make require more than the total of
// 2 passes implied by a single run through liveness and a single
// run through location list generation.
state.convergeCount = loggingLevel / 1000
state.f = f
state.registers = f.Config.registers
state.stackOffset = stackOffset
@ -568,7 +576,7 @@ func BuildFuncDebug(ctxt *obj.Link, f *Func, loggingEnabled bool, stackOffset fu
PopulateABIInRegArgOps(f)
}
if state.loggingEnabled {
if state.loggingLevel > 0 {
state.logf("Generating location lists for function %q\n", f.Name)
}
@ -674,242 +682,372 @@ func BuildFuncDebug(ctxt *obj.Link, f *Func, loggingEnabled bool, stackOffset fu
// and end state of each block.
func (state *debugState) liveness() []*BlockDebug {
blockLocs := make([]*BlockDebug, state.f.NumBlocks())
counterTime := int32(1)
// Reverse postorder: visit a block after as many as possible of its
// predecessors have been visited.
po := state.f.Postorder()
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
converged := false
// Build the starting state for the block from the final
// state of its predecessors.
startState, startValid := state.mergePredecessors(b, blockLocs, nil)
changed := false
if state.loggingEnabled {
state.logf("Processing %v, initial state:\n%v", b, state.stateString(state.currentState))
// The iteration rule is that by default, run until converged, but
// if a particular iteration count is specified, run that many
// iterations, no more, no less. A count is specified as the
// thousands digit of the location lists debug flag,
// e.g. -d=locationlists=4000
keepGoing := func(k int) bool {
if state.convergeCount == 0 {
return !converged
}
return k < state.convergeCount
}
for k := 0; keepGoing(k); k++ {
if state.loggingLevel > 0 {
state.logf("Liveness pass %d\n", k)
}
converged = true
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
locs := blockLocs[b.ID]
if locs == nil {
locs = state.allocBlock(b)
blockLocs[b.ID] = locs
}
// Update locs/registers with the effects of each Value.
for _, v := range b.Values {
slots := state.valueNames[v.ID]
// Build the starting state for the block from the final
// state of its predecessors.
startState, blockChanged := state.mergePredecessors(b, blockLocs, nil, false)
locs.lastCheckedTime = counterTime
counterTime++
if state.loggingLevel > 1 {
state.logf("Processing %v, block changed %v, initial state:\n%v", b, blockChanged, state.stateString(state.currentState))
}
// Loads and stores inherit the names of their sources.
var source *Value
switch v.Op {
case OpStoreReg:
source = v.Args[0]
case OpLoadReg:
switch a := v.Args[0]; a.Op {
case OpArg, OpPhi:
source = a
case OpStoreReg:
source = a.Args[0]
default:
if state.loggingEnabled {
state.logf("at %v: load with unexpected source op: %v (%v)\n", v, a.Op, a)
if blockChanged {
// If the start did not change, then the old endState is good
converged = false
changed := false
state.changedSlots.clear()
// Update locs/registers with the effects of each Value.
for _, v := range b.Values {
slots := state.valueNames[v.ID]
// Loads and stores inherit the names of their sources.
var source *Value
switch v.Op {
case OpStoreReg:
source = v.Args[0]
case OpLoadReg:
switch a := v.Args[0]; a.Op {
case OpArg, OpPhi:
source = a
case OpStoreReg:
source = a.Args[0]
default:
if state.loggingLevel > 1 {
state.logf("at %v: load with unexpected source op: %v (%v)\n", v, a.Op, a)
}
}
}
// Update valueNames with the source so that later steps
// don't need special handling.
if source != nil && k == 0 {
// limit to k == 0 otherwise there are duplicates.
slots = append(slots, state.valueNames[source.ID]...)
state.valueNames[v.ID] = slots
}
reg, _ := state.f.getHome(v.ID).(*Register)
c := state.processValue(v, slots, reg)
changed = changed || c
}
}
// Update valueNames with the source so that later steps
// don't need special handling.
if source != nil {
slots = append(slots, state.valueNames[source.ID]...)
state.valueNames[v.ID] = slots
}
reg, _ := state.f.getHome(v.ID).(*Register)
c := state.processValue(v, slots, reg)
changed = changed || c
}
if state.loggingEnabled {
state.f.Logf("Block %v done, locs:\n%v", b, state.stateString(state.currentState))
}
locs := state.allocBlock(b)
locs.relevant = changed
if !changed && startValid {
locs.endState = startState
} else {
for slotID, slotLoc := range state.currentState.slots {
if slotLoc.absent() {
continue
if state.loggingLevel > 1 {
state.logf("Block %v done, locs:\n%v", b, state.stateString(state.currentState))
}
state.appendLiveSlot(liveSlot{slot: SlotID(slotID), Registers: slotLoc.Registers, StackOffset: slotLoc.StackOffset})
locs.relevant = locs.relevant || changed
if !changed {
locs.endState = startState
} else {
for _, id := range state.changedSlots.contents() {
slotID := SlotID(id)
slotLoc := state.currentState.slots[slotID]
if slotLoc.absent() {
startState.Delete(int32(slotID))
continue
}
old := startState.Find(int32(slotID)) // do NOT replace existing values
if oldLS, ok := old.(*liveSlot); !ok || oldLS.VarLoc != slotLoc {
startState.Insert(int32(slotID),
&liveSlot{VarLoc: slotLoc})
}
}
locs.endState = startState
}
locs.lastChangedTime = counterTime
}
locs.endState = state.getLiveSlotSlice()
counterTime++
}
blockLocs[b.ID] = locs
}
return blockLocs
}
// mergePredecessors takes the end state of each of b's predecessors and
// intersects them to form the starting state for b. It puts that state in
// blockLocs, and fills state.currentState with it. If convenient, it returns
// a reused []liveSlot, true that represents the starting state.
// If previousBlock is non-nil, it registers changes vs. that block's end
// state in state.changedVars. Note that previousBlock will often not be a
// predecessor.
func (state *debugState) mergePredecessors(b *Block, blockLocs []*BlockDebug, previousBlock *Block) ([]liveSlot, bool) {
// intersects them to form the starting state for b. It puts that state
// in blockLocs[b.ID].startState, and fills state.currentState with it.
// It returns the start state and whether this is changed from the
// previously approximated value of startState for this block. After
// the first call, subsequent calls can only shrink startState.
//
// Passing forLocationLists=true enables additional side-effects that
// are necessary for building location lists but superflous while still
// iterating to an answer.
//
// If previousBlock is non-nil, it registers changes vs. that block's
// end state in state.changedVars. Note that previousBlock will often
// not be a predecessor.
//
// Note that mergePredecessors behaves slightly differently between
// first and subsequent calls for a block. For the first call, the
// starting state is approximated by taking the state from the
// predecessor whose state is smallest, and removing any elements not
// in all the other predecessors; this makes the smallest number of
// changes and shares the most state. On subsequent calls the old
// value of startState is adjusted with new information; this is judged
// to do the least amount of extra work.
//
// To improve performance, each block's state information is marked with
// lastChanged and lastChecked "times" so unchanged predecessors can be
// skipped on after-the-first iterations. Doing this allows extra
// iterations by the caller to be almost free.
//
// It is important to know that the set representation used for
// startState, endState, and merges can share data for two sets where
// one is a small delta from the other. Doing this does require a
// little care in how sets are updated, both in mergePredecessors, and
// using its result.
func (state *debugState) mergePredecessors(b *Block, blockLocs []*BlockDebug, previousBlock *Block, forLocationLists bool) (abt.T, bool) {
// Filter out back branches.
var predsBuf [10]*Block
preds := predsBuf[:0]
locs := blockLocs[b.ID]
blockChanged := !locs.everProcessed // the first time it always changes.
updating := locs.everProcessed
// For the first merge, exclude predecessors that have not been seen yet.
// I.e., backedges.
for _, pred := range b.Preds {
if blockLocs[pred.b.ID] != nil {
if bl := blockLocs[pred.b.ID]; bl != nil && bl.everProcessed {
// crucially, a self-edge has bl != nil, but bl.everProcessed is false the first time.
preds = append(preds, pred.b)
}
}
if state.loggingEnabled {
locs.everProcessed = true
if state.loggingLevel > 1 {
// The logf below would cause preds to be heap-allocated if
// it were passed directly.
preds2 := make([]*Block, len(preds))
copy(preds2, preds)
state.logf("Merging %v into %v\n", preds2, b)
state.logf("Merging %v into %v (changed=%d, checked=%d)\n", preds2, b, locs.lastChangedTime, locs.lastCheckedTime)
}
// TODO all the calls to this are overkill; only need to do this for slots that are not present in the merge.
markChangedVars := func(slots []liveSlot) {
for _, live := range slots {
state.changedVars.add(ID(state.slotVars[live.slot]))
state.changedVars.clear()
markChangedVars := func(slots, merged abt.T) {
if !forLocationLists {
return
}
// Fill changedVars with those that differ between the previous
// block (in the emit order, not necessarily a flow predecessor)
// and the start state for this block.
for it := slots.Iterator(); !it.Done(); {
k, v := it.Next()
m := merged.Find(k)
if m == nil || v.(*liveSlot).VarLoc != m.(*liveSlot).VarLoc {
state.changedVars.add(ID(state.slotVars[k]))
}
}
}
reset := func(ourStartState abt.T) {
if !(forLocationLists || blockChanged) {
// there is no change and this is not for location lists, do
// not bother to reset currentState because it will not be
// examined.
return
}
state.currentState.reset(ourStartState)
}
// Zero predecessors
if len(preds) == 0 {
if previousBlock != nil {
// Mark everything in previous block as changed because it is not a predecessor.
markChangedVars(blockLocs[previousBlock.ID].endState)
state.f.Fatalf("Function %v, block %s with no predecessors is not first block, has previous %s", state.f, b.String(), previousBlock.String())
}
state.currentState.reset(nil)
return nil, true
// startState is empty
reset(abt.T{})
return abt.T{}, blockChanged
}
p0 := blockLocs[preds[0].ID].endState
// One predecessor
l0 := blockLocs[preds[0].ID]
p0 := l0.endState
if len(preds) == 1 {
if previousBlock != nil && preds[0].ID != previousBlock.ID {
// Mark everything in previous block as changed because it is not a predecessor.
markChangedVars(blockLocs[previousBlock.ID].endState)
// Change from previous block is its endState minus the predecessor's endState
markChangedVars(blockLocs[previousBlock.ID].endState, p0)
}
locs.startState = p0
blockChanged = blockChanged || l0.lastChangedTime > locs.lastCheckedTime
reset(p0)
return p0, blockChanged
}
// More than one predecessor
if updating {
// After the first approximation, i.e., when updating, results
// can only get smaller, because initially backedge
// predecessors do not participate in the intersection. This
// means that for the update, given the prior approximation of
// startState, there is no need to re-intersect with unchanged
// blocks. Therefore remove unchanged blocks from the
// predecessor list.
for i := len(preds) - 1; i >= 0; i-- {
pred := preds[i]
if blockLocs[pred.ID].lastChangedTime > locs.lastCheckedTime {
continue // keep this predecessor
}
preds[i] = preds[len(preds)-1]
preds = preds[:len(preds)-1]
if state.loggingLevel > 2 {
state.logf("Pruned b%d, lastChanged was %d but b%d lastChecked is %d\n", pred.ID, blockLocs[pred.ID].lastChangedTime, b.ID, locs.lastCheckedTime)
}
}
// Check for an early out; this should always hit for the update
// if there are no cycles.
if len(preds) == 0 {
blockChanged = false
reset(locs.startState)
if state.loggingLevel > 2 {
state.logf("Early out, no predecessors changed since last check\n")
}
if previousBlock != nil {
markChangedVars(blockLocs[previousBlock.ID].endState, locs.startState)
}
return locs.startState, blockChanged
}
state.currentState.reset(p0)
return p0, true
}
baseID := preds[0].ID
baseState := p0
// If previous block is not a predecessor, its location information changes at boundary with this block.
previousBlockIsNotPredecessor := previousBlock != nil // If it's nil, no info to change.
// Choose the predecessor with the smallest endState for intersection work
for _, pred := range preds[1:] {
if blockLocs[pred.ID].endState.Size() < baseState.Size() {
baseState = blockLocs[pred.ID].endState
baseID = pred.ID
}
}
if previousBlock != nil {
// Try to use previousBlock as the base state
// if possible.
for _, pred := range preds[1:] {
if pred.ID == previousBlock.ID {
baseID = pred.ID
baseState = blockLocs[pred.ID].endState
previousBlockIsNotPredecessor = false
if state.loggingLevel > 2 {
state.logf("Starting %v with state from b%v:\n%v", b, baseID, state.blockEndStateString(blockLocs[baseID]))
for _, pred := range preds {
if pred.ID == baseID {
continue
}
state.logf("Merging in state from %v:\n%v", pred, state.blockEndStateString(blockLocs[pred.ID]))
}
}
state.currentState.reset(abt.T{})
// The normal logic of "reset" is incuded in the intersection loop below.
slotLocs := state.currentState.slots
// If this is the first call, do updates on the "baseState"; if this
// is a subsequent call, tweak the startState instead. Note that
// these "set" values are values; there are no side effects to
// other values as these are modified.
newState := baseState
if updating {
newState = blockLocs[b.ID].startState
}
for it := newState.Iterator(); !it.Done(); {
k, d := it.Next()
thisSlot := d.(*liveSlot)
x := thisSlot.VarLoc
x0 := x // initial value in newState
// Intersect this slot with the slot in all the predecessors
for _, other := range preds {
if !updating && other.ID == baseID {
continue
}
otherSlot := blockLocs[other.ID].endState.Find(k)
if otherSlot == nil {
x = VarLoc{}
break
}
y := otherSlot.(*liveSlot).VarLoc
x = x.intersect(y)
if x.absent() {
x = VarLoc{}
break
}
}
}
if state.loggingEnabled {
state.logf("Starting %v with state from b%v:\n%v", b, baseID, state.blockEndStateString(blockLocs[baseID]))
}
slotLocs := state.currentState.slots
for _, predSlot := range baseState {
slotLocs[predSlot.slot] = VarLoc{predSlot.Registers, predSlot.StackOffset}
state.liveCount[predSlot.slot] = 1
}
for _, pred := range preds {
if pred.ID == baseID {
continue
}
if state.loggingEnabled {
state.logf("Merging in state from %v:\n%v", pred, state.blockEndStateString(blockLocs[pred.ID]))
}
for _, predSlot := range blockLocs[pred.ID].endState {
state.liveCount[predSlot.slot]++
liveLoc := slotLocs[predSlot.slot]
if !liveLoc.onStack() || !predSlot.onStack() || liveLoc.StackOffset != predSlot.StackOffset {
liveLoc.StackOffset = 0
// Delete if necessary, but not otherwise (in order to maximize sharing).
if x.absent() {
if !x0.absent() {
blockChanged = true
newState.Delete(k)
}
liveLoc.Registers &= predSlot.Registers
slotLocs[predSlot.slot] = liveLoc
}
}
// Check if the final state is the same as the first predecessor's
// final state, and reuse it if so. In principle it could match any,
// but it's probably not worth checking more than the first.
unchanged := true
for _, predSlot := range baseState {
if state.liveCount[predSlot.slot] != len(preds) ||
slotLocs[predSlot.slot].Registers != predSlot.Registers ||
slotLocs[predSlot.slot].StackOffset != predSlot.StackOffset {
unchanged = false
break
}
}
if unchanged {
if state.loggingEnabled {
state.logf("After merge, %v matches b%v exactly.\n", b, baseID)
}
if previousBlockIsNotPredecessor {
// Mark everything in previous block as changed because it is not a predecessor.
markChangedVars(blockLocs[previousBlock.ID].endState)
}
state.currentState.reset(baseState)
return baseState, true
}
for reg := range state.currentState.registers {
state.currentState.registers[reg] = state.currentState.registers[reg][:0]
}
// A slot is live if it was seen in all predecessors, and they all had
// some storage in common.
for _, predSlot := range baseState {
slotLoc := slotLocs[predSlot.slot]
if state.liveCount[predSlot.slot] != len(preds) {
// Seen in only some predecessors. Clear it out.
slotLocs[predSlot.slot] = VarLoc{}
slotLocs[k] = VarLoc{}
continue
}
if x != x0 {
blockChanged = true
newState.Insert(k, &liveSlot{VarLoc: x})
}
// Present in all predecessors.
mask := uint64(slotLoc.Registers)
slotLocs[k] = x
mask := uint64(x.Registers)
for {
if mask == 0 {
break
}
reg := uint8(bits.TrailingZeros64(mask))
mask &^= 1 << reg
state.currentState.registers[reg] = append(state.currentState.registers[reg], predSlot.slot)
state.currentState.registers[reg] = append(state.currentState.registers[reg], SlotID(k))
}
}
if previousBlockIsNotPredecessor {
// Mark everything in previous block as changed because it is not a predecessor.
markChangedVars(blockLocs[previousBlock.ID].endState)
if previousBlock != nil {
markChangedVars(blockLocs[previousBlock.ID].endState, newState)
}
return nil, false
locs.startState = newState
return newState, blockChanged
}
// processValue updates locs and state.registerContents to reflect v, a value with
// the names in vSlots and homed in vReg. "v" becomes visible after execution of
// the instructions evaluating it. It returns which VarIDs were modified by the
// Value's execution.
// processValue updates locs and state.registerContents to reflect v, a
// value with the names in vSlots and homed in vReg. "v" becomes
// visible after execution of the instructions evaluating it. It
// returns which VarIDs were modified by the Value's execution.
func (state *debugState) processValue(v *Value, vSlots []SlotID, vReg *Register) bool {
locs := state.currentState
changed := false
setSlot := func(slot SlotID, loc VarLoc) {
changed = true
state.changedVars.add(ID(state.slotVars[slot]))
state.changedSlots.add(ID(slot))
state.currentState.slots[slot] = loc
}
@ -925,7 +1063,7 @@ func (state *debugState) processValue(v *Value, vSlots []SlotID, vReg *Register)
clobbers &^= 1 << reg
for _, slot := range locs.registers[reg] {
if state.loggingEnabled {
if state.loggingLevel > 1 {
state.logf("at %v: %v clobbered out of %v\n", v, state.slots[slot], &state.registers[reg])
}
@ -954,7 +1092,7 @@ func (state *debugState) processValue(v *Value, vSlots []SlotID, vReg *Register)
stackOffset = StackOffset(state.stackOffset(state.slots[slotID])<<1 | 1)
}
setSlot(slotID, VarLoc{0, stackOffset})
if state.loggingEnabled {
if state.loggingLevel > 1 {
if v.Op == OpVarDef {
state.logf("at %v: stack-only var %v now live\n", v, state.slots[slotID])
} else {
@ -966,7 +1104,7 @@ func (state *debugState) processValue(v *Value, vSlots []SlotID, vReg *Register)
home := state.f.getHome(v.ID).(LocalSlot)
stackOffset := state.stackOffset(home)<<1 | 1
for _, slot := range vSlots {
if state.loggingEnabled {
if state.loggingLevel > 1 {
state.logf("at %v: arg %v now on stack in location %v\n", v, state.slots[slot], home)
if last := locs.slots[slot]; !last.absent() {
state.logf("at %v: unexpected arg op on already-live slot %v\n", v, state.slots[slot])
@ -982,20 +1120,20 @@ func (state *debugState) processValue(v *Value, vSlots []SlotID, vReg *Register)
for _, slot := range vSlots {
last := locs.slots[slot]
if last.absent() {
if state.loggingEnabled {
if state.loggingLevel > 1 {
state.logf("at %v: unexpected spill of unnamed register %s\n", v, vReg)
}
break
}
setSlot(slot, VarLoc{last.Registers, StackOffset(stackOffset)})
if state.loggingEnabled {
state.logf("at %v: %v spilled to stack location %v\n", v, state.slots[slot], home)
if state.loggingLevel > 1 {
state.logf("at %v: %v spilled to stack location %v@%d\n", v, state.slots[slot], home, state.stackOffset(home))
}
}
case vReg != nil:
if state.loggingEnabled {
if state.loggingLevel > 1 {
newSlots := make([]bool, len(state.slots))
for _, slot := range vSlots {
newSlots[slot] = true
@ -1015,7 +1153,7 @@ func (state *debugState) processValue(v *Value, vSlots []SlotID, vReg *Register)
locs.registers[vReg.num] = locs.registers[vReg.num][:0]
locs.registers[vReg.num] = append(locs.registers[vReg.num], vSlots...)
for _, slot := range vSlots {
if state.loggingEnabled {
if state.loggingLevel > 1 {
state.logf("at %v: %v now in %s\n", v, state.slots[slot], vReg)
}
@ -1067,8 +1205,10 @@ func (e *pendingEntry) clear() {
}
}
// canMerge reports whether the location description for new is the same as
// pending.
// canMerge reports whether a new location description is a superset
// of the (non-empty) pending location description, if so, the two
// can be merged (i.e., pending is still a valid and useful location
// description).
func canMerge(pending, new VarLoc) bool {
if pending.absent() && new.absent() {
return true
@ -1076,13 +1216,18 @@ func canMerge(pending, new VarLoc) bool {
if pending.absent() || new.absent() {
return false
}
if pending.onStack() {
return pending.StackOffset == new.StackOffset
// pending is not absent, therefore it has either a stack mapping,
// or registers, or both.
if pending.onStack() && pending.StackOffset != new.StackOffset {
// if pending has a stack offset, then new must also, and it
// must be the same (StackOffset encodes onStack).
return false
}
if pending.Registers != 0 && new.Registers != 0 {
return firstReg(pending.Registers) == firstReg(new.Registers)
if pending.Registers&new.Registers != pending.Registers {
// There is at least one register in pending not mentioned in new.
return false
}
return false
return true
}
// firstReg returns the first register in set that is present.
@ -1095,24 +1240,26 @@ func firstReg(set RegisterSet) uint8 {
return uint8(bits.TrailingZeros64(uint64(set)))
}
// buildLocationLists builds location lists for all the user variables in
// state.f, using the information about block state in blockLocs.
// The returned location lists are not fully complete. They are in terms of
// SSA values rather than PCs, and have no base address/end entries. They will
// be finished by PutLocationList.
// buildLocationLists builds location lists for all the user variables
// in state.f, using the information about block state in blockLocs.
// The returned location lists are not fully complete. They are in
// terms of SSA values rather than PCs, and have no base address/end
// entries. They will be finished by PutLocationList.
func (state *debugState) buildLocationLists(blockLocs []*BlockDebug) {
// Run through the function in program text order, building up location
// lists as we go. The heavy lifting has mostly already been done.
var prevBlock *Block
for _, b := range state.f.Blocks {
state.mergePredecessors(b, blockLocs, prevBlock)
state.mergePredecessors(b, blockLocs, prevBlock, true)
// Handle any differences among predecessor blocks and previous block (perhaps not a predecessor)
for _, varID := range state.changedVars.contents() {
state.updateVar(VarID(varID), b, BlockStart)
}
state.changedVars.clear()
if !blockLocs[b.ID].relevant {
// Handle any differences among predecessor blocks and previous block (perhaps not a predecessor)
for _, varID := range state.changedVars.contents() {
state.updateVar(VarID(varID), b, BlockStart)
}
continue
}
@ -1213,7 +1360,7 @@ func (state *debugState) buildLocationLists(blockLocs []*BlockDebug) {
prevBlock = b
}
if state.loggingEnabled {
if state.loggingLevel > 0 {
state.logf("location lists:\n")
}
@ -1221,7 +1368,7 @@ func (state *debugState) buildLocationLists(blockLocs []*BlockDebug) {
for varID := range state.lists {
state.writePendingEntry(VarID(varID), state.f.Blocks[len(state.f.Blocks)-1].ID, FuncEnd.ID)
list := state.lists[varID]
if state.loggingEnabled {
if state.loggingLevel > 0 {
if len(list) == 0 {
state.logf("\t%v : empty list\n", state.vars[varID])
} else {
@ -1292,9 +1439,10 @@ func (state *debugState) writePendingEntry(varID VarID, endBlock, endValue ID) {
return
}
if start == end {
if state.loggingEnabled {
if state.loggingLevel > 1 {
// Printf not logf so not gated by GOSSAFUNC; this should fire very rarely.
fmt.Printf("Skipping empty location list for %v in %s\n", state.vars[varID], state.f.Name)
// TODO this fires a lot, need to figure out why.
state.logf("Skipping empty location list for %v in %s\n", state.vars[varID], state.f.Name)
}
return
}
@ -1307,7 +1455,7 @@ func (state *debugState) writePendingEntry(varID VarID, endBlock, endValue ID) {
sizeIdx := len(list)
list = list[:len(list)+2]
if state.loggingEnabled {
if state.loggingLevel > 1 {
var partStrs []string
for i, slot := range state.varSlots[varID] {
partStrs = append(partStrs, fmt.Sprintf("%v@%v", state.slots[slot], state.LocString(pending.pieces[i])))
@ -1389,11 +1537,11 @@ func (debugInfo *FuncDebug) PutLocationList(list []byte, ctxt *obj.Link, listSym
listSym.WriteInt(ctxt, listSym.Size, ctxt.Arch.PtrSize, 0)
}
// Pack a value and block ID into an address-sized uint, returning encoded
// value and boolean indicating whether the encoding succeeded. For
// 32-bit architectures the process may fail for very large procedures
// (the theory being that it's ok to have degraded debug quality in
// this case).
// Pack a value and block ID into an address-sized uint, returning
// encoded value and boolean indicating whether the encoding succeeded.
// For 32-bit architectures the process may fail for very large
// procedures(the theory being that it's ok to have degraded debug
// quality in this case).
func encodeValue(ctxt *obj.Link, b, v ID) (uint64, bool) {
if ctxt.Arch.PtrSize == 8 {
result := uint64(b)<<32 | uint64(uint32(v))