2015-03-27 13:41:30 -07:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
2015-07-05 18:23:25 -05:00
|
|
|
// mark values
|
2016-04-22 12:15:08 -04:00
|
|
|
type markKind uint8
|
|
|
|
|
|
2015-07-05 18:23:25 -05:00
|
|
|
const (
|
2016-04-22 12:15:08 -04:00
|
|
|
notFound markKind = 0 // block has not been discovered yet
|
|
|
|
|
notExplored markKind = 1 // discovered and in queue, outedges not processed yet
|
|
|
|
|
explored markKind = 2 // discovered and in queue, outedges processed
|
|
|
|
|
done markKind = 3 // all done, in output ordering
|
2015-07-05 18:23:25 -05:00
|
|
|
)
|
|
|
|
|
|
2015-03-27 13:41:30 -07:00
|
|
|
// This file contains code to compute the dominator tree
|
|
|
|
|
// of a control-flow graph.
|
|
|
|
|
|
|
|
|
|
// postorder computes a postorder traversal ordering for the
|
2016-03-01 23:21:55 +00:00
|
|
|
// basic blocks in f. Unreachable blocks will not appear.
|
2015-03-27 13:41:30 -07:00
|
|
|
func postorder(f *Func) []*Block {
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
return postorderWithNumbering(f, []int32{})
|
2016-04-25 16:24:11 -04:00
|
|
|
}
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
func postorderWithNumbering(f *Func, ponums []int32) []*Block {
|
2016-04-22 12:15:08 -04:00
|
|
|
mark := make([]markKind, f.NumBlocks())
|
2015-03-27 13:41:30 -07:00
|
|
|
|
|
|
|
|
// result ordering
|
|
|
|
|
var order []*Block
|
|
|
|
|
|
|
|
|
|
// stack of blocks
|
|
|
|
|
var s []*Block
|
|
|
|
|
s = append(s, f.Entry)
|
|
|
|
|
mark[f.Entry.ID] = notExplored
|
|
|
|
|
for len(s) > 0 {
|
|
|
|
|
b := s[len(s)-1]
|
|
|
|
|
switch mark[b.ID] {
|
|
|
|
|
case explored:
|
2016-03-01 23:21:55 +00:00
|
|
|
// Children have all been visited. Pop & output block.
|
2015-03-27 13:41:30 -07:00
|
|
|
s = s[:len(s)-1]
|
|
|
|
|
mark[b.ID] = done
|
2016-04-25 16:24:11 -04:00
|
|
|
if len(ponums) > 0 {
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
ponums[b.ID] = int32(len(order))
|
2016-04-25 16:24:11 -04:00
|
|
|
}
|
2015-03-27 13:41:30 -07:00
|
|
|
order = append(order, b)
|
|
|
|
|
case notExplored:
|
2016-03-01 23:21:55 +00:00
|
|
|
// Children have not been visited yet. Mark as explored
|
2015-03-27 13:41:30 -07:00
|
|
|
// and queue any children we haven't seen yet.
|
|
|
|
|
mark[b.ID] = explored
|
2016-04-28 16:52:47 -07:00
|
|
|
for _, e := range b.Succs {
|
|
|
|
|
c := e.b
|
2015-03-27 13:41:30 -07:00
|
|
|
if mark[c.ID] == notFound {
|
|
|
|
|
mark[c.ID] = notExplored
|
|
|
|
|
s = append(s, c)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
default:
|
2015-06-24 14:03:39 -07:00
|
|
|
b.Fatalf("bad stack state %v %d", b, mark[b.ID])
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return order
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-28 16:52:47 -07:00
|
|
|
type linkedBlocks func(*Block) []Edge
|
2015-07-05 18:23:25 -05:00
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
const nscratchslices = 7
|
2016-02-10 17:43:31 -05:00
|
|
|
|
|
|
|
|
// experimentally, functions with 512 or fewer blocks account
|
|
|
|
|
// for 75% of memory (size) allocation for dominator computation
|
|
|
|
|
// in make.bash.
|
|
|
|
|
const minscratchblocks = 512
|
|
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g []ID) {
|
2016-02-10 17:43:31 -05:00
|
|
|
tot := maxBlockID * nscratchslices
|
|
|
|
|
scratch := cfg.domblockstore
|
|
|
|
|
if len(scratch) < tot {
|
|
|
|
|
// req = min(1.5*tot, nscratchslices*minscratchblocks)
|
|
|
|
|
// 50% padding allows for graph growth in later phases.
|
|
|
|
|
req := (tot * 3) >> 1
|
|
|
|
|
if req < nscratchslices*minscratchblocks {
|
|
|
|
|
req = nscratchslices * minscratchblocks
|
|
|
|
|
}
|
|
|
|
|
scratch = make([]ID, req)
|
|
|
|
|
cfg.domblockstore = scratch
|
|
|
|
|
} else {
|
|
|
|
|
// Clear as much of scratch as we will (re)use
|
|
|
|
|
scratch = scratch[0:tot]
|
|
|
|
|
for i := range scratch {
|
|
|
|
|
scratch[i] = 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
a = scratch[0*maxBlockID : 1*maxBlockID]
|
|
|
|
|
b = scratch[1*maxBlockID : 2*maxBlockID]
|
|
|
|
|
c = scratch[2*maxBlockID : 3*maxBlockID]
|
|
|
|
|
d = scratch[3*maxBlockID : 4*maxBlockID]
|
|
|
|
|
e = scratch[4*maxBlockID : 5*maxBlockID]
|
|
|
|
|
f = scratch[5*maxBlockID : 6*maxBlockID]
|
|
|
|
|
g = scratch[6*maxBlockID : 7*maxBlockID]
|
2015-07-05 18:23:25 -05:00
|
|
|
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-27 13:41:30 -07:00
|
|
|
func dominators(f *Func) []*Block {
|
2016-04-28 16:52:47 -07:00
|
|
|
preds := func(b *Block) []Edge { return b.Preds }
|
|
|
|
|
succs := func(b *Block) []Edge { return b.Succs }
|
2015-07-05 18:23:25 -05:00
|
|
|
|
|
|
|
|
//TODO: benchmark and try to find criteria for swapping between
|
|
|
|
|
// dominatorsSimple and dominatorsLT
|
2016-04-25 16:24:11 -04:00
|
|
|
return f.dominatorsLTOrig(f.Entry, preds, succs)
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
// dominatorsLTOrig runs Lengauer-Tarjan to compute a dominator tree starting at
|
2015-07-05 18:23:25 -05:00
|
|
|
// entry and using predFn/succFn to find predecessors/successors to allow
|
|
|
|
|
// computing both dominator and post-dominator trees.
|
2016-04-25 16:24:11 -04:00
|
|
|
func (f *Func) dominatorsLTOrig(entry *Block, predFn linkedBlocks, succFn linkedBlocks) []*Block {
|
|
|
|
|
// Adapted directly from the original TOPLAS article's "simple" algorithm
|
|
|
|
|
|
|
|
|
|
maxBlockID := entry.Func.NumBlocks()
|
|
|
|
|
semi, vertex, label, parent, ancestor, bucketHead, bucketLink := f.Config.scratchBlocksForDom(maxBlockID)
|
|
|
|
|
|
|
|
|
|
// This version uses integers for most of the computation,
|
|
|
|
|
// to make the work arrays smaller and pointer-free.
|
|
|
|
|
// fromID translates from ID to *Block where that is needed.
|
|
|
|
|
fromID := make([]*Block, maxBlockID)
|
|
|
|
|
for _, v := range f.Blocks {
|
|
|
|
|
fromID[v.ID] = v
|
|
|
|
|
}
|
|
|
|
|
idom := make([]*Block, maxBlockID)
|
2016-02-10 17:43:31 -05:00
|
|
|
|
2015-07-05 18:23:25 -05:00
|
|
|
// Step 1. Carry out a depth first search of the problem graph. Number
|
|
|
|
|
// the vertices from 1 to n as they are reached during the search.
|
2016-04-25 16:24:11 -04:00
|
|
|
n := f.dfsOrig(entry, succFn, semi, vertex, label, parent)
|
2015-07-05 18:23:25 -05:00
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
for i := n; i >= 2; i-- {
|
2015-07-05 18:23:25 -05:00
|
|
|
w := vertex[i]
|
|
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
// step2 in TOPLAS paper
|
2016-04-28 16:52:47 -07:00
|
|
|
for _, e := range predFn(fromID[w]) {
|
|
|
|
|
v := e.b
|
2016-04-25 16:24:11 -04:00
|
|
|
if semi[v.ID] == 0 {
|
2015-07-05 18:23:25 -05:00
|
|
|
// skip unreachable predecessor
|
2016-04-25 16:24:11 -04:00
|
|
|
// not in original, but we're using existing pred instead of building one.
|
2015-07-05 18:23:25 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2016-04-25 16:24:11 -04:00
|
|
|
u := evalOrig(v.ID, ancestor, semi, label)
|
|
|
|
|
if semi[u] < semi[w] {
|
|
|
|
|
semi[w] = semi[u]
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
// add w to bucket[vertex[semi[w]]]
|
|
|
|
|
// implement bucket as a linked list implemented
|
|
|
|
|
// in a pair of arrays.
|
|
|
|
|
vsw := vertex[semi[w]]
|
|
|
|
|
bucketLink[w] = bucketHead[vsw]
|
|
|
|
|
bucketHead[vsw] = w
|
|
|
|
|
|
|
|
|
|
linkOrig(parent[w], w, ancestor)
|
2015-07-05 18:23:25 -05:00
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
// step3 in TOPLAS paper
|
|
|
|
|
for v := bucketHead[parent[w]]; v != 0; v = bucketLink[v] {
|
|
|
|
|
u := evalOrig(v, ancestor, semi, label)
|
|
|
|
|
if semi[u] < semi[v] {
|
|
|
|
|
idom[v] = fromID[u]
|
|
|
|
|
} else {
|
|
|
|
|
idom[v] = fromID[parent[w]]
|
|
|
|
|
}
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
}
|
2016-04-25 16:24:11 -04:00
|
|
|
// step 4 in toplas paper
|
|
|
|
|
for i := ID(2); i <= n; i++ {
|
|
|
|
|
w := vertex[i]
|
|
|
|
|
if idom[w].ID != vertex[semi[w]] {
|
|
|
|
|
idom[w] = idom[idom[w].ID]
|
|
|
|
|
}
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
return idom
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dfs performs a depth first search over the blocks starting at entry block
|
|
|
|
|
// (in arbitrary order). This is a de-recursed version of dfs from the
|
|
|
|
|
// original Tarjan-Lengauer TOPLAS article. It's important to return the
|
|
|
|
|
// same values for parent as the original algorithm.
|
|
|
|
|
func (f *Func) dfsOrig(entry *Block, succFn linkedBlocks, semi, vertex, label, parent []ID) ID {
|
|
|
|
|
n := ID(0)
|
|
|
|
|
s := make([]*Block, 0, 256)
|
|
|
|
|
s = append(s, entry)
|
|
|
|
|
|
|
|
|
|
for len(s) > 0 {
|
|
|
|
|
v := s[len(s)-1]
|
|
|
|
|
s = s[:len(s)-1]
|
|
|
|
|
// recursing on v
|
|
|
|
|
|
|
|
|
|
if semi[v.ID] != 0 {
|
|
|
|
|
continue // already visited
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
2016-04-25 16:24:11 -04:00
|
|
|
n++
|
|
|
|
|
semi[v.ID] = n
|
|
|
|
|
vertex[n] = v.ID
|
|
|
|
|
label[v.ID] = v.ID
|
|
|
|
|
// ancestor[v] already zero
|
2016-04-28 16:52:47 -07:00
|
|
|
for _, e := range succFn(v) {
|
|
|
|
|
w := e.b
|
2016-04-25 16:24:11 -04:00
|
|
|
// if it has a dfnum, we've already visited it
|
|
|
|
|
if semi[w.ID] == 0 {
|
|
|
|
|
// yes, w can be pushed multiple times.
|
|
|
|
|
s = append(s, w)
|
|
|
|
|
parent[w.ID] = v.ID // keep overwriting this till it is visited.
|
|
|
|
|
}
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
}
|
2016-04-25 16:24:11 -04:00
|
|
|
return n
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
|
2016-04-25 16:24:11 -04:00
|
|
|
// compressOrig is the "simple" compress function from LT paper
|
|
|
|
|
func compressOrig(v ID, ancestor, semi, label []ID) {
|
|
|
|
|
if ancestor[ancestor[v]] != 0 {
|
|
|
|
|
compressOrig(ancestor[v], ancestor, semi, label)
|
|
|
|
|
if semi[label[ancestor[v]]] < semi[label[v]] {
|
|
|
|
|
label[v] = label[ancestor[v]]
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
2016-04-25 16:24:11 -04:00
|
|
|
ancestor[v] = ancestor[ancestor[v]]
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
2016-04-25 16:24:11 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// evalOrig is the "simple" eval function from LT paper
|
|
|
|
|
func evalOrig(v ID, ancestor, semi, label []ID) ID {
|
|
|
|
|
if ancestor[v] == 0 {
|
|
|
|
|
return v
|
|
|
|
|
}
|
|
|
|
|
compressOrig(v, ancestor, semi, label)
|
|
|
|
|
return label[v]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func linkOrig(v, w ID, ancestor []ID) {
|
|
|
|
|
ancestor[w] = v
|
2015-07-05 18:23:25 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// dominators computes the dominator tree for f. It returns a slice
|
2015-07-05 18:23:25 -05:00
|
|
|
// which maps block ID to the immediate dominator of that block.
|
2016-03-01 23:21:55 +00:00
|
|
|
// Unreachable blocks map to nil. The entry block maps to nil.
|
2015-07-05 18:23:25 -05:00
|
|
|
func dominatorsSimple(f *Func) []*Block {
|
2015-03-27 13:41:30 -07:00
|
|
|
// A simple algorithm for now
|
|
|
|
|
// Cooper, Harvey, Kennedy
|
|
|
|
|
idom := make([]*Block, f.NumBlocks())
|
|
|
|
|
|
|
|
|
|
// Compute postorder walk
|
2016-09-16 13:50:18 -07:00
|
|
|
post := f.postorder()
|
2015-03-27 13:41:30 -07:00
|
|
|
|
|
|
|
|
// Make map from block id to order index (for intersect call)
|
|
|
|
|
postnum := make([]int, f.NumBlocks())
|
|
|
|
|
for i, b := range post {
|
|
|
|
|
postnum[b.ID] = i
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Make the entry block a self-loop
|
|
|
|
|
idom[f.Entry.ID] = f.Entry
|
|
|
|
|
if postnum[f.Entry.ID] != len(post)-1 {
|
2015-06-24 14:03:39 -07:00
|
|
|
f.Fatalf("entry block %v not last in postorder", f.Entry)
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute relaxation of idom entries
|
|
|
|
|
for {
|
|
|
|
|
changed := false
|
|
|
|
|
|
|
|
|
|
for i := len(post) - 2; i >= 0; i-- {
|
|
|
|
|
b := post[i]
|
|
|
|
|
var d *Block
|
2016-04-28 16:52:47 -07:00
|
|
|
for _, e := range b.Preds {
|
|
|
|
|
p := e.b
|
2015-03-27 13:41:30 -07:00
|
|
|
if idom[p.ID] == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if d == nil {
|
|
|
|
|
d = p
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
d = intersect(d, p, postnum, idom)
|
|
|
|
|
}
|
|
|
|
|
if d != idom[b.ID] {
|
|
|
|
|
idom[b.ID] = d
|
|
|
|
|
changed = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !changed {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Set idom of entry block to nil instead of itself.
|
|
|
|
|
idom[f.Entry.ID] = nil
|
|
|
|
|
return idom
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// intersect finds the closest dominator of both b and c.
|
|
|
|
|
// It requires a postorder numbering of all the blocks.
|
|
|
|
|
func intersect(b, c *Block, postnum []int, idom []*Block) *Block {
|
2015-06-25 14:04:55 -07:00
|
|
|
// TODO: This loop is O(n^2). See BenchmarkNilCheckDeep*.
|
2015-03-27 13:41:30 -07:00
|
|
|
for b != c {
|
|
|
|
|
if postnum[b.ID] < postnum[c.ID] {
|
|
|
|
|
b = idom[b.ID]
|
|
|
|
|
} else {
|
|
|
|
|
c = idom[c.ID]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return b
|
|
|
|
|
}
|