2015-03-27 13:41:30 -07:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
2016-02-08 12:07:39 -05:00
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"sort"
|
|
|
|
|
)
|
2015-03-27 13:41:30 -07:00
|
|
|
|
|
|
|
|
// cse does common-subexpression elimination on the Function.
|
2016-03-01 23:21:55 +00:00
|
|
|
// Values are just relinked, nothing is deleted. A subsequent deadcode
|
2015-03-27 13:41:30 -07:00
|
|
|
// pass is required to actually remove duplicate expressions.
|
|
|
|
|
func cse(f *Func) {
|
|
|
|
|
// Two values are equivalent if they satisfy the following definition:
|
|
|
|
|
// equivalent(v, w):
|
|
|
|
|
// v.op == w.op
|
|
|
|
|
// v.type == w.type
|
|
|
|
|
// v.aux == w.aux
|
2015-06-23 16:44:06 -07:00
|
|
|
// v.auxint == w.auxint
|
2015-03-27 13:41:30 -07:00
|
|
|
// len(v.args) == len(w.args)
|
2015-07-20 18:50:17 -07:00
|
|
|
// v.block == w.block if v.op == OpPhi
|
2015-03-27 13:41:30 -07:00
|
|
|
// equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
|
|
|
|
|
|
|
|
|
|
// The algorithm searches for a partition of f's values into
|
|
|
|
|
// equivalence classes using the above definition.
|
|
|
|
|
// It starts with a coarse partition and iteratively refines it
|
|
|
|
|
// until it reaches a fixed point.
|
|
|
|
|
|
2016-01-27 16:47:23 -08:00
|
|
|
// Make initial coarse partitions by using a subset of the conditions above.
|
|
|
|
|
a := make([]*Value, 0, f.NumValues())
|
2017-02-09 10:45:35 -08:00
|
|
|
if f.auxmap == nil {
|
|
|
|
|
f.auxmap = auxmap{}
|
|
|
|
|
}
|
2015-03-27 13:41:30 -07:00
|
|
|
for _, b := range f.Blocks {
|
|
|
|
|
for _, v := range b.Values {
|
2016-01-27 16:47:23 -08:00
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
continue // memory values can never cse
|
2015-07-20 18:50:17 -07:00
|
|
|
}
|
2017-02-09 10:45:35 -08:00
|
|
|
if f.auxmap[v.Aux] == 0 {
|
|
|
|
|
f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1
|
|
|
|
|
}
|
2016-01-27 16:47:23 -08:00
|
|
|
a = append(a, v)
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
}
|
2017-02-09 10:45:35 -08:00
|
|
|
partition := partitionValues(a, f.auxmap)
|
2015-03-27 13:41:30 -07:00
|
|
|
|
|
|
|
|
// map from value id back to eqclass id
|
2016-01-27 16:47:23 -08:00
|
|
|
valueEqClass := make([]ID, f.NumValues())
|
|
|
|
|
for _, b := range f.Blocks {
|
|
|
|
|
for _, v := range b.Values {
|
|
|
|
|
// Use negative equivalence class #s for unique values.
|
|
|
|
|
valueEqClass[v.ID] = -v.ID
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-10-04 14:35:45 -07:00
|
|
|
var pNum ID = 1
|
|
|
|
|
for _, e := range partition {
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
if f.pass.debug > 1 && len(e) > 500 {
|
2016-02-10 19:39:32 -06:00
|
|
|
fmt.Printf("CSE.large partition (%d): ", len(e))
|
|
|
|
|
for j := 0; j < 3; j++ {
|
|
|
|
|
fmt.Printf("%s ", e[j].LongString())
|
|
|
|
|
}
|
|
|
|
|
fmt.Println()
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-27 13:41:30 -07:00
|
|
|
for _, v := range e {
|
2016-10-04 14:35:45 -07:00
|
|
|
valueEqClass[v.ID] = pNum
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
if f.pass.debug > 2 && len(e) > 1 {
|
2016-10-04 14:35:45 -07:00
|
|
|
fmt.Printf("CSE.partition #%d:", pNum)
|
2016-02-08 12:07:39 -05:00
|
|
|
for _, v := range e {
|
|
|
|
|
fmt.Printf(" %s", v.String())
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
2016-10-04 14:35:45 -07:00
|
|
|
pNum++
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
|
2016-10-04 14:35:45 -07:00
|
|
|
// Split equivalence classes at points where they have
|
|
|
|
|
// non-equivalent arguments. Repeat until we can't find any
|
|
|
|
|
// more splits.
|
|
|
|
|
var splitPoints []int
|
2016-10-26 22:05:20 -07:00
|
|
|
byArgClass := new(partitionByArgClass) // reuseable partitionByArgClass to reduce allocations
|
2015-03-27 13:41:30 -07:00
|
|
|
for {
|
|
|
|
|
changed := false
|
|
|
|
|
|
2015-07-15 14:38:19 -06:00
|
|
|
// partition can grow in the loop. By not using a range loop here,
|
|
|
|
|
// we process new additions as they arrive, avoiding O(n^2) behavior.
|
|
|
|
|
for i := 0; i < len(partition); i++ {
|
|
|
|
|
e := partition[i]
|
2016-10-04 14:35:45 -07:00
|
|
|
|
2016-11-27 10:41:37 -08:00
|
|
|
if opcodeTable[e[0].Op].commutative {
|
|
|
|
|
// Order the first two args before comparison.
|
|
|
|
|
for _, v := range e {
|
|
|
|
|
if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] {
|
|
|
|
|
v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-04 14:35:45 -07:00
|
|
|
// Sort by eq class of arguments.
|
2016-10-26 22:05:20 -07:00
|
|
|
byArgClass.a = e
|
|
|
|
|
byArgClass.eqClass = valueEqClass
|
|
|
|
|
sort.Sort(byArgClass)
|
2016-10-04 14:35:45 -07:00
|
|
|
|
|
|
|
|
// Find split points.
|
|
|
|
|
splitPoints = append(splitPoints[:0], 0)
|
|
|
|
|
for j := 1; j < len(e); j++ {
|
|
|
|
|
v, w := e[j-1], e[j]
|
2016-11-27 10:41:37 -08:00
|
|
|
// Note: commutative args already correctly ordered by byArgClass.
|
2016-10-04 14:35:45 -07:00
|
|
|
eqArgs := true
|
|
|
|
|
for k, a := range v.Args {
|
|
|
|
|
b := w.Args[k]
|
|
|
|
|
if valueEqClass[a.ID] != valueEqClass[b.ID] {
|
|
|
|
|
eqArgs = false
|
2016-02-06 20:56:50 -06:00
|
|
|
break
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-10-04 14:35:45 -07:00
|
|
|
if !eqArgs {
|
|
|
|
|
splitPoints = append(splitPoints, j)
|
2016-02-06 20:56:50 -06:00
|
|
|
}
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
2016-10-04 14:35:45 -07:00
|
|
|
if len(splitPoints) == 1 {
|
|
|
|
|
continue // no splits, leave equivalence class alone.
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
2016-10-04 14:35:45 -07:00
|
|
|
|
|
|
|
|
// Move another equivalence class down in place of e.
|
|
|
|
|
partition[i] = partition[len(partition)-1]
|
|
|
|
|
partition = partition[:len(partition)-1]
|
|
|
|
|
i--
|
|
|
|
|
|
|
|
|
|
// Add new equivalence classes for the parts of e we found.
|
|
|
|
|
splitPoints = append(splitPoints, len(e))
|
|
|
|
|
for j := 0; j < len(splitPoints)-1; j++ {
|
|
|
|
|
f := e[splitPoints[j]:splitPoints[j+1]]
|
|
|
|
|
if len(f) == 1 {
|
|
|
|
|
// Don't add singletons.
|
|
|
|
|
valueEqClass[f[0].ID] = -f[0].ID
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for _, v := range f {
|
|
|
|
|
valueEqClass[v.ID] = pNum
|
|
|
|
|
}
|
|
|
|
|
pNum++
|
|
|
|
|
partition = append(partition, f)
|
|
|
|
|
}
|
|
|
|
|
changed = true
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !changed {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-16 13:50:18 -07:00
|
|
|
sdom := f.sdom()
|
2015-03-27 13:41:30 -07:00
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Compute substitutions we would like to do. We substitute v for w
|
2015-03-27 13:41:30 -07:00
|
|
|
// if v and w are in the same equivalence class and v dominates w.
|
|
|
|
|
rewrite := make([]*Value, f.NumValues())
|
2016-10-26 22:05:20 -07:00
|
|
|
byDom := new(partitionByDom) // reusable partitionByDom to reduce allocs
|
2015-03-27 13:41:30 -07:00
|
|
|
for _, e := range partition {
|
2016-10-26 22:05:20 -07:00
|
|
|
byDom.a = e
|
|
|
|
|
byDom.sdom = sdom
|
|
|
|
|
sort.Sort(byDom)
|
2016-04-14 19:09:57 -04:00
|
|
|
for i := 0; i < len(e)-1; i++ {
|
2016-05-26 12:16:53 -07:00
|
|
|
// e is sorted by domorder, so a maximal dominant element is first in the slice
|
2016-04-14 19:09:57 -04:00
|
|
|
v := e[i]
|
|
|
|
|
if v == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
e[i] = nil
|
2015-03-27 13:41:30 -07:00
|
|
|
// Replace all elements of e which v dominates
|
2016-04-14 19:09:57 -04:00
|
|
|
for j := i + 1; j < len(e); j++ {
|
|
|
|
|
w := e[j]
|
|
|
|
|
if w == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2016-09-16 13:50:18 -07:00
|
|
|
if sdom.isAncestorEq(v.Block, w.Block) {
|
2015-03-27 13:41:30 -07:00
|
|
|
rewrite[w.ID] = v
|
2016-04-14 19:09:57 -04:00
|
|
|
e[j] = nil
|
2015-03-27 13:41:30 -07:00
|
|
|
} else {
|
2016-05-26 12:16:53 -07:00
|
|
|
// e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e
|
2016-04-14 19:09:57 -04:00
|
|
|
break
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-14 11:18:39 -04:00
|
|
|
// if we rewrite a tuple generator to a new one in a different block,
|
|
|
|
|
// copy its selectors to the new generator's block, so tuple generator
|
|
|
|
|
// and selectors stay together.
|
2016-08-17 13:29:19 -04:00
|
|
|
// be careful not to copy same selectors more than once (issue 16741).
|
|
|
|
|
copiedSelects := make(map[ID][]*Value)
|
2016-06-14 11:18:39 -04:00
|
|
|
for _, b := range f.Blocks {
|
2016-08-17 13:29:19 -04:00
|
|
|
out:
|
2016-06-14 11:18:39 -04:00
|
|
|
for _, v := range b.Values {
|
2016-11-14 17:05:46 -08:00
|
|
|
// New values are created when selectors are copied to
|
|
|
|
|
// a new block. We can safely ignore those new values,
|
|
|
|
|
// since they have already been copied (issue 17918).
|
|
|
|
|
if int(v.ID) >= len(rewrite) || rewrite[v.ID] != nil {
|
2016-06-14 11:18:39 -04:00
|
|
|
continue
|
|
|
|
|
}
|
2016-07-13 16:15:54 -07:00
|
|
|
if v.Op != OpSelect0 && v.Op != OpSelect1 {
|
2016-06-14 11:18:39 -04:00
|
|
|
continue
|
|
|
|
|
}
|
2016-07-13 16:15:54 -07:00
|
|
|
if !v.Args[0].Type.IsTuple() {
|
2016-06-14 11:18:39 -04:00
|
|
|
f.Fatalf("arg of tuple selector %s is not a tuple: %s", v.String(), v.Args[0].LongString())
|
|
|
|
|
}
|
|
|
|
|
t := rewrite[v.Args[0].ID]
|
|
|
|
|
if t != nil && t.Block != b {
|
|
|
|
|
// v.Args[0] is tuple generator, CSE'd into a different block as t, v is left behind
|
2016-08-17 13:29:19 -04:00
|
|
|
for _, c := range copiedSelects[t.ID] {
|
|
|
|
|
if v.Op == c.Op {
|
|
|
|
|
// an equivalent selector is already copied
|
|
|
|
|
rewrite[v.ID] = c
|
|
|
|
|
continue out
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-06-14 11:18:39 -04:00
|
|
|
c := v.copyInto(t.Block)
|
|
|
|
|
rewrite[v.ID] = c
|
2016-08-17 13:29:19 -04:00
|
|
|
copiedSelects[t.ID] = append(copiedSelects[t.ID], c)
|
2016-06-14 11:18:39 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
rewrites := int64(0)
|
2016-02-11 15:09:43 -05:00
|
|
|
|
2015-03-27 13:41:30 -07:00
|
|
|
// Apply substitutions
|
|
|
|
|
for _, b := range f.Blocks {
|
|
|
|
|
for _, v := range b.Values {
|
|
|
|
|
for i, w := range v.Args {
|
|
|
|
|
if x := rewrite[w.ID]; x != nil {
|
|
|
|
|
v.SetArg(i, x)
|
2016-02-11 15:09:43 -05:00
|
|
|
rewrites++
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-12-11 14:59:01 -08:00
|
|
|
if v := b.Control; v != nil {
|
|
|
|
|
if x := rewrite[v.ID]; x != nil {
|
|
|
|
|
if v.Op == OpNilCheck {
|
|
|
|
|
// nilcheck pass will remove the nil checks and log
|
|
|
|
|
// them appropriately, so don't mess with them here.
|
|
|
|
|
continue
|
|
|
|
|
}
|
2016-03-15 20:45:50 -07:00
|
|
|
b.SetControl(x)
|
2015-12-11 14:59:01 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
if f.pass.stats > 0 {
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
f.LogStat("CSE REWRITES", rewrites)
|
2016-02-11 15:09:43 -05:00
|
|
|
}
|
2015-03-27 13:41:30 -07:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// An eqclass approximates an equivalence class. During the
|
2015-03-27 13:41:30 -07:00
|
|
|
// algorithm it may represent the union of several of the
|
|
|
|
|
// final equivalence classes.
|
|
|
|
|
type eqclass []*Value
|
|
|
|
|
|
2016-01-27 16:47:23 -08:00
|
|
|
// partitionValues partitions the values into equivalence classes
|
|
|
|
|
// based on having all the following features match:
|
|
|
|
|
// - opcode
|
|
|
|
|
// - type
|
|
|
|
|
// - auxint
|
|
|
|
|
// - aux
|
|
|
|
|
// - nargs
|
|
|
|
|
// - block # if a phi op
|
2016-02-08 12:07:39 -05:00
|
|
|
// - first two arg's opcodes and auxint
|
|
|
|
|
// - NOT first two arg's aux; that can break CSE.
|
2016-01-27 16:47:23 -08:00
|
|
|
// partitionValues returns a list of equivalence classes, each
|
2016-03-01 23:21:55 +00:00
|
|
|
// being a sorted by ID list of *Values. The eqclass slices are
|
2016-01-27 16:47:23 -08:00
|
|
|
// backed by the same storage as the input slice.
|
|
|
|
|
// Equivalence classes of size 1 are ignored.
|
2016-02-23 17:52:17 -06:00
|
|
|
func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
|
2016-02-11 15:09:43 -05:00
|
|
|
sort.Sort(sortvalues{a, auxIDs})
|
2016-01-27 16:47:23 -08:00
|
|
|
|
|
|
|
|
var partition []eqclass
|
|
|
|
|
for len(a) > 0 {
|
|
|
|
|
v := a[0]
|
|
|
|
|
j := 1
|
|
|
|
|
for ; j < len(a); j++ {
|
|
|
|
|
w := a[j]
|
2016-10-04 14:35:45 -07:00
|
|
|
if cmpVal(v, w, auxIDs) != CMPeq {
|
2016-01-27 16:47:23 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if j > 1 {
|
|
|
|
|
partition = append(partition, a[:j])
|
|
|
|
|
}
|
|
|
|
|
a = a[j:]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return partition
|
|
|
|
|
}
|
2016-02-23 17:52:17 -06:00
|
|
|
func lt2Cmp(isLt bool) Cmp {
|
|
|
|
|
if isLt {
|
|
|
|
|
return CMPlt
|
|
|
|
|
}
|
|
|
|
|
return CMPgt
|
2016-01-27 16:47:23 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-23 17:52:17 -06:00
|
|
|
type auxmap map[interface{}]int32
|
|
|
|
|
|
2016-10-04 14:35:45 -07:00
|
|
|
func cmpVal(v, w *Value, auxIDs auxmap) Cmp {
|
2016-02-23 17:52:17 -06:00
|
|
|
// Try to order these comparison by cost (cheaper first)
|
2016-01-27 16:47:23 -08:00
|
|
|
if v.Op != w.Op {
|
2016-02-23 17:52:17 -06:00
|
|
|
return lt2Cmp(v.Op < w.Op)
|
2016-01-27 16:47:23 -08:00
|
|
|
}
|
|
|
|
|
if v.AuxInt != w.AuxInt {
|
2016-02-23 17:52:17 -06:00
|
|
|
return lt2Cmp(v.AuxInt < w.AuxInt)
|
2016-01-27 16:47:23 -08:00
|
|
|
}
|
|
|
|
|
if len(v.Args) != len(w.Args) {
|
2016-02-23 17:52:17 -06:00
|
|
|
return lt2Cmp(len(v.Args) < len(w.Args))
|
2016-01-27 16:47:23 -08:00
|
|
|
}
|
2016-02-23 17:52:17 -06:00
|
|
|
if v.Op == OpPhi && v.Block != w.Block {
|
|
|
|
|
return lt2Cmp(v.Block.ID < w.Block.ID)
|
2016-01-27 16:47:23 -08:00
|
|
|
}
|
2016-05-03 13:58:28 -07:00
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
// We will never be able to CSE two values
|
|
|
|
|
// that generate memory.
|
|
|
|
|
return lt2Cmp(v.ID < w.ID)
|
2016-04-12 17:12:26 -07:00
|
|
|
}
|
|
|
|
|
|
2016-02-23 17:52:17 -06:00
|
|
|
if tc := v.Type.Compare(w.Type); tc != CMPeq {
|
|
|
|
|
return tc
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if v.Aux != w.Aux {
|
|
|
|
|
if v.Aux == nil {
|
|
|
|
|
return CMPlt
|
2016-01-27 16:47:23 -08:00
|
|
|
}
|
2016-02-23 17:52:17 -06:00
|
|
|
if w.Aux == nil {
|
|
|
|
|
return CMPgt
|
|
|
|
|
}
|
|
|
|
|
return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
|
|
|
|
|
}
|
2016-02-06 20:56:50 -06:00
|
|
|
|
2016-02-23 17:52:17 -06:00
|
|
|
return CMPeq
|
|
|
|
|
}
|
2016-01-27 16:47:23 -08:00
|
|
|
|
2016-02-23 17:52:17 -06:00
|
|
|
// Sort values to make the initial partition.
|
|
|
|
|
type sortvalues struct {
|
|
|
|
|
a []*Value // array of values
|
|
|
|
|
auxIDs auxmap // aux -> aux ID map
|
|
|
|
|
}
|
2016-01-27 16:47:23 -08:00
|
|
|
|
2016-02-23 17:52:17 -06:00
|
|
|
func (sv sortvalues) Len() int { return len(sv.a) }
|
|
|
|
|
func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
|
|
|
|
|
func (sv sortvalues) Less(i, j int) bool {
|
|
|
|
|
v := sv.a[i]
|
|
|
|
|
w := sv.a[j]
|
2016-10-04 14:35:45 -07:00
|
|
|
if cmp := cmpVal(v, w, sv.auxIDs); cmp != CMPeq {
|
2016-02-23 17:52:17 -06:00
|
|
|
return cmp == CMPlt
|
|
|
|
|
}
|
2016-01-27 16:47:23 -08:00
|
|
|
|
|
|
|
|
// Sort by value ID last to keep the sort result deterministic.
|
|
|
|
|
return v.ID < w.ID
|
|
|
|
|
}
|
2016-04-13 08:51:46 -04:00
|
|
|
|
2016-05-26 12:16:53 -07:00
|
|
|
type partitionByDom struct {
|
2016-04-13 08:51:46 -04:00
|
|
|
a []*Value // array of values
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
sdom SparseTree
|
2016-04-13 08:51:46 -04:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 12:16:53 -07:00
|
|
|
func (sv partitionByDom) Len() int { return len(sv.a) }
|
|
|
|
|
func (sv partitionByDom) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
|
|
|
|
|
func (sv partitionByDom) Less(i, j int) bool {
|
2016-04-13 08:51:46 -04:00
|
|
|
v := sv.a[i]
|
|
|
|
|
w := sv.a[j]
|
2016-05-26 12:16:53 -07:00
|
|
|
return sv.sdom.domorder(v.Block) < sv.sdom.domorder(w.Block)
|
2016-04-13 08:51:46 -04:00
|
|
|
}
|
2016-10-04 14:35:45 -07:00
|
|
|
|
|
|
|
|
type partitionByArgClass struct {
|
|
|
|
|
a []*Value // array of values
|
|
|
|
|
eqClass []ID // equivalence class IDs of values
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (sv partitionByArgClass) Len() int { return len(sv.a) }
|
|
|
|
|
func (sv partitionByArgClass) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
|
|
|
|
|
func (sv partitionByArgClass) Less(i, j int) bool {
|
|
|
|
|
v := sv.a[i]
|
|
|
|
|
w := sv.a[j]
|
|
|
|
|
for i, a := range v.Args {
|
|
|
|
|
b := w.Args[i]
|
|
|
|
|
if sv.eqClass[a.ID] < sv.eqClass[b.ID] {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if sv.eqClass[a.ID] > sv.eqClass[b.ID] {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|