mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: optimize liveness in stackalloc
The stackalloc code needs to run a liveness pass to build the interference graph between stack slots. Because the values that we need liveness over is so sparse, we can optimize the analysis by using a path exploration algorithm rather than a iterative dataflow one In local testing, this cuts 74.05 ms of CPU time off a build of cmd/compile. Change-Id: I765ace87d5e8aae177e65eb63da482e3d698bea7 Reviewed-on: https://go-review.googlesource.com/c/go/+/718540 Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
956909ff84
commit
4e761b9a18
1 changed files with 108 additions and 73 deletions
|
|
@ -56,10 +56,35 @@ func putStackAllocState(s *stackAllocState) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type stackValState struct {
|
type stackValState struct {
|
||||||
typ *types.Type
|
typ *types.Type
|
||||||
spill *Value
|
spill *Value
|
||||||
needSlot bool
|
needSlot bool
|
||||||
isArg bool
|
isArg bool
|
||||||
|
defBlock ID
|
||||||
|
useBlocks []stackUseBlock
|
||||||
|
}
|
||||||
|
|
||||||
|
// addUseBlock adds a block to the set of blocks that uses this value.
|
||||||
|
// Note that we only loosely enforce the set property by checking the last
|
||||||
|
// block that was appended to the list and duplicates may occur.
|
||||||
|
// Because we add values block by block (barring phi-nodes), the number of duplicates is
|
||||||
|
// small and we deduplicate as part of the liveness algorithm later anyway.
|
||||||
|
func (sv *stackValState) addUseBlock(b *Block, liveout bool) {
|
||||||
|
entry := stackUseBlock{
|
||||||
|
b: b,
|
||||||
|
liveout: liveout,
|
||||||
|
}
|
||||||
|
if sv.useBlocks == nil || sv.useBlocks[len(sv.useBlocks)-1] != entry {
|
||||||
|
sv.useBlocks = append(sv.useBlocks, stackUseBlock{
|
||||||
|
b: b,
|
||||||
|
liveout: liveout,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type stackUseBlock struct {
|
||||||
|
b *Block
|
||||||
|
liveout bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// stackalloc allocates storage in the stack frame for
|
// stackalloc allocates storage in the stack frame for
|
||||||
|
|
@ -99,6 +124,7 @@ func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
|
||||||
s.values[v.ID].typ = v.Type
|
s.values[v.ID].typ = v.Type
|
||||||
s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() && !v.OnWasmStack
|
s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() && !v.OnWasmStack
|
||||||
s.values[v.ID].isArg = hasAnyArgOp(v)
|
s.values[v.ID].isArg = hasAnyArgOp(v)
|
||||||
|
s.values[v.ID].defBlock = b.ID
|
||||||
if f.pass.debug > stackDebug && s.values[v.ID].needSlot {
|
if f.pass.debug > stackDebug && s.values[v.ID].needSlot {
|
||||||
fmt.Printf("%s needs a stack slot\n", v)
|
fmt.Printf("%s needs a stack slot\n", v)
|
||||||
}
|
}
|
||||||
|
|
@ -291,80 +317,89 @@ func (s *stackAllocState) stackalloc() {
|
||||||
|
|
||||||
// computeLive computes a map from block ID to a list of
|
// computeLive computes a map from block ID to a list of
|
||||||
// stack-slot-needing value IDs live at the end of that block.
|
// stack-slot-needing value IDs live at the end of that block.
|
||||||
// TODO: this could be quadratic if lots of variables are live across lots of
|
|
||||||
// basic blocks. Figure out a way to make this function (or, more precisely, the user
|
|
||||||
// of this function) require only linear size & time.
|
|
||||||
func (s *stackAllocState) computeLive(spillLive [][]ID) {
|
func (s *stackAllocState) computeLive(spillLive [][]ID) {
|
||||||
s.live = make([][]ID, s.f.NumBlocks())
|
|
||||||
var phis []*Value
|
|
||||||
live := s.f.newSparseSet(s.f.NumValues())
|
|
||||||
defer s.f.retSparseSet(live)
|
|
||||||
t := s.f.newSparseSet(s.f.NumValues())
|
|
||||||
defer s.f.retSparseSet(t)
|
|
||||||
|
|
||||||
// Instead of iterating over f.Blocks, iterate over their postordering.
|
// Because values using stack slots are few and far inbetween
|
||||||
// Liveness information flows backward, so starting at the end
|
// (compared to the set of all values), we use a path exploration
|
||||||
// increases the probability that we will stabilize quickly.
|
// algorithm to calculate liveness here.
|
||||||
po := s.f.postorder()
|
f := s.f
|
||||||
for {
|
for _, b := range f.Blocks {
|
||||||
changed := false
|
for _, spillvid := range spillLive[b.ID] {
|
||||||
for _, b := range po {
|
val := &s.values[spillvid]
|
||||||
// Start with known live values at the end of the block
|
val.addUseBlock(b, true)
|
||||||
live.clear()
|
|
||||||
live.addAll(s.live[b.ID])
|
|
||||||
|
|
||||||
// Propagate backwards to the start of the block
|
|
||||||
phis = phis[:0]
|
|
||||||
for i := len(b.Values) - 1; i >= 0; i-- {
|
|
||||||
v := b.Values[i]
|
|
||||||
live.remove(v.ID)
|
|
||||||
if v.Op == OpPhi {
|
|
||||||
// Save phi for later.
|
|
||||||
// Note: its args might need a stack slot even though
|
|
||||||
// the phi itself doesn't. So don't use needSlot.
|
|
||||||
if !v.Type.IsMemory() && !v.Type.IsVoid() {
|
|
||||||
phis = append(phis, v)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, a := range v.Args {
|
|
||||||
if s.values[a.ID].needSlot {
|
|
||||||
live.add(a.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for each predecessor of b, expand its list of live-at-end values
|
|
||||||
// invariant: s contains the values live at the start of b (excluding phi inputs)
|
|
||||||
for i, e := range b.Preds {
|
|
||||||
p := e.b
|
|
||||||
t.clear()
|
|
||||||
t.addAll(s.live[p.ID])
|
|
||||||
t.addAll(live.contents())
|
|
||||||
t.addAll(spillLive[p.ID])
|
|
||||||
for _, v := range phis {
|
|
||||||
a := v.Args[i]
|
|
||||||
if s.values[a.ID].needSlot {
|
|
||||||
t.add(a.ID)
|
|
||||||
}
|
|
||||||
if spill := s.values[a.ID].spill; spill != nil {
|
|
||||||
//TODO: remove? Subsumed by SpillUse?
|
|
||||||
t.add(spill.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if t.size() == len(s.live[p.ID]) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// grow p's live set
|
|
||||||
s.live[p.ID] = append(s.live[p.ID][:0], t.contents()...)
|
|
||||||
changed = true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
for _, v := range b.Values {
|
||||||
if !changed {
|
for i, a := range v.Args {
|
||||||
break
|
val := &s.values[a.ID]
|
||||||
|
useBlock := b
|
||||||
|
forceLiveout := false
|
||||||
|
if v.Op == OpPhi {
|
||||||
|
useBlock = b.Preds[i].b
|
||||||
|
forceLiveout = true
|
||||||
|
if spill := val.spill; spill != nil {
|
||||||
|
//TODO: remove? Subsumed by SpillUse?
|
||||||
|
s.values[spill.ID].addUseBlock(useBlock, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !val.needSlot {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
val.addUseBlock(useBlock, forceLiveout)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.live = make([][]ID, f.NumBlocks())
|
||||||
|
push := func(bid, vid ID) {
|
||||||
|
l := s.live[bid]
|
||||||
|
if l == nil || l[len(l)-1] != vid {
|
||||||
|
l = append(l, vid)
|
||||||
|
s.live[bid] = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO: If we can help along the interference graph by calculating livein sets,
|
||||||
|
// we can do so trivially by turning this sparse set into an array of arrays
|
||||||
|
// and checking the top for the current value instead of inclusion in the sparse set.
|
||||||
|
seen := f.newSparseSet(f.NumBlocks())
|
||||||
|
defer f.retSparseSet(seen)
|
||||||
|
// instead of pruning out duplicate blocks when we build the useblocks slices
|
||||||
|
// or when we add them to the queue, rely on the seen set to stop considering
|
||||||
|
// them. This is slightly faster than building the workqueues as sets
|
||||||
|
//
|
||||||
|
// However, this means that the queue can grow larger than the number of blocks,
|
||||||
|
// usually in very short functions. Returning a slice with values appended beyond the
|
||||||
|
// original allocation can corrupt the allocator state, so cap the queue and return
|
||||||
|
// the originally allocated slice regardless.
|
||||||
|
allocedBqueue := f.Cache.allocBlockSlice(f.NumBlocks())
|
||||||
|
defer f.Cache.freeBlockSlice(allocedBqueue)
|
||||||
|
bqueue := allocedBqueue[:0:f.NumBlocks()]
|
||||||
|
|
||||||
|
for vid, v := range s.values {
|
||||||
|
if !v.needSlot {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen.clear()
|
||||||
|
bqueue = bqueue[:0]
|
||||||
|
for _, b := range v.useBlocks {
|
||||||
|
if b.liveout {
|
||||||
|
push(b.b.ID, ID(vid))
|
||||||
|
}
|
||||||
|
bqueue = append(bqueue, b.b)
|
||||||
|
}
|
||||||
|
for len(bqueue) > 0 {
|
||||||
|
work := bqueue[len(bqueue)-1]
|
||||||
|
bqueue = bqueue[:len(bqueue)-1]
|
||||||
|
if seen.contains(work.ID) || work.ID == v.defBlock {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen.add(work.ID)
|
||||||
|
for _, e := range work.Preds {
|
||||||
|
push(e.b.ID, ID(vid))
|
||||||
|
bqueue = append(bqueue, e.b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if s.f.pass.debug > stackDebug {
|
if s.f.pass.debug > stackDebug {
|
||||||
for _, b := range s.f.Blocks {
|
for _, b := range s.f.Blocks {
|
||||||
fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
|
fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue