2015-03-03 13:38:14 -08:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
2015-07-30 10:28:57 -07:00
|
|
|
import (
|
2017-04-18 12:53:25 -07:00
|
|
|
"cmd/internal/objabi"
|
2016-12-07 16:02:42 -08:00
|
|
|
"cmd/internal/src"
|
2015-08-10 19:00:34 -05:00
|
|
|
"fmt"
|
2015-07-30 10:28:57 -07:00
|
|
|
"log"
|
2016-05-11 15:25:17 -04:00
|
|
|
"os"
|
2016-03-17 14:12:12 -04:00
|
|
|
"regexp"
|
2015-07-30 10:28:57 -07:00
|
|
|
"runtime"
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
"strings"
|
2015-08-10 19:00:34 -05:00
|
|
|
"time"
|
2015-07-30 10:28:57 -07:00
|
|
|
)
|
2015-03-03 13:38:14 -08:00
|
|
|
|
|
|
|
|
// Compile is the main entry point for this package.
|
|
|
|
|
// Compile modifies f so that on return:
|
|
|
|
|
// · all Values in f map to 0 or 1 assembly instructions of the target architecture
|
|
|
|
|
// · the order of f.Blocks is the order to emit the Blocks
|
|
|
|
|
// · the order of b.Values is the order to emit the Values in each Block
|
|
|
|
|
// · f has a non-nil regAlloc field
|
|
|
|
|
func Compile(f *Func) {
|
|
|
|
|
// TODO: debugging - set flags to control verbosity of compiler,
|
|
|
|
|
// which phases to dump IR before/after, etc.
|
2016-01-29 14:44:15 -05:00
|
|
|
if f.Log() {
|
|
|
|
|
f.Logf("compiling %s\n", f.Name)
|
|
|
|
|
}
|
2015-03-03 13:38:14 -08:00
|
|
|
|
|
|
|
|
// hook to print function & phase if panic happens
|
|
|
|
|
phaseName := "init"
|
|
|
|
|
defer func() {
|
|
|
|
|
if phaseName != "" {
|
2015-07-30 10:28:57 -07:00
|
|
|
err := recover()
|
|
|
|
|
stack := make([]byte, 16384)
|
|
|
|
|
n := runtime.Stack(stack, false)
|
|
|
|
|
stack = stack[:n]
|
|
|
|
|
f.Fatalf("panic during %s while compiling %s:\n\n%v\n\n%s\n", phaseName, f.Name, err, stack)
|
2015-03-03 13:38:14 -08:00
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Run all the passes
|
|
|
|
|
printFunc(f)
|
2018-06-08 00:25:12 +03:00
|
|
|
f.HTMLWriter.WriteFunc("start", "start", f)
|
2016-05-11 15:25:17 -04:00
|
|
|
if BuildDump != "" && BuildDump == f.Name {
|
|
|
|
|
f.dumpFile("build")
|
|
|
|
|
}
|
2016-03-03 22:06:57 -08:00
|
|
|
if checkEnabled {
|
|
|
|
|
checkFunc(f)
|
|
|
|
|
}
|
2015-08-10 19:00:34 -05:00
|
|
|
const logMemStats = false
|
2015-03-03 13:38:14 -08:00
|
|
|
for _, p := range passes {
|
2016-02-29 10:43:18 -05:00
|
|
|
if !f.Config.optimize && !p.required || p.disabled {
|
2016-01-27 16:47:23 -08:00
|
|
|
continue
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
f.pass = &p
|
2015-03-03 13:38:14 -08:00
|
|
|
phaseName = p.name
|
2016-01-29 14:44:15 -05:00
|
|
|
if f.Log() {
|
|
|
|
|
f.Logf(" pass %s begin\n", p.name)
|
|
|
|
|
}
|
2015-08-10 12:15:52 -07:00
|
|
|
// TODO: capture logging during this pass, add it to the HTML
|
2015-08-10 19:00:34 -05:00
|
|
|
var mStart runtime.MemStats
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
if logMemStats || p.mem {
|
2015-08-10 19:00:34 -05:00
|
|
|
runtime.ReadMemStats(&mStart)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tStart := time.Now()
|
2015-03-03 13:38:14 -08:00
|
|
|
p.fn(f)
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
tEnd := time.Now()
|
2015-08-10 19:00:34 -05:00
|
|
|
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
// Need something less crude than "Log the whole intermediate result".
|
cmd/compile: rearrange fields between ssa.Func, ssa.Cache, and ssa.Config
This makes ssa.Func, ssa.Cache, and ssa.Config fulfill
the roles laid out for them in CL 38160.
The only non-trivial change in this CL is how cached
values and blocks get IDs. Prior to this CL, their IDs were
assigned as part of resetting the cache, and only modified
IDs were reset. This required knowing how many values and
blocks were modified, which required a tight coupling between
ssa.Func and ssa.Config. To eliminate that coupling,
we now zero values and blocks during reset,
and assign their IDs when they are used.
Since unused values and blocks have ID == 0,
we can efficiently find the last used value/block,
to avoid zeroing everything.
Bulk zeroing is efficient, but not efficient enough
to obviate the need to avoid zeroing everything every time.
As a happy side-effect, ssa.Func.Free is no longer necessary.
DebugHashMatch and friends now belong in func.go.
They have been left in place for clarity and review.
I will move them in a subsequent CL.
Passes toolstash -cmp. No compiler performance impact.
No change in 'go test cmd/compile/internal/ssa' execution time.
Change-Id: I2eb7af58da067ef6a36e815a6f386cfe8634d098
Reviewed-on: https://go-review.googlesource.com/38167
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-03-15 11:15:13 -07:00
|
|
|
if f.Log() || f.HTMLWriter != nil {
|
2016-01-30 17:37:38 -05:00
|
|
|
time := tEnd.Sub(tStart).Nanoseconds()
|
|
|
|
|
var stats string
|
|
|
|
|
if logMemStats {
|
|
|
|
|
var mEnd runtime.MemStats
|
|
|
|
|
runtime.ReadMemStats(&mEnd)
|
|
|
|
|
nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
|
|
|
|
|
nAllocs := mEnd.Mallocs - mStart.Mallocs
|
|
|
|
|
stats = fmt.Sprintf("[%d ns %d allocs %d bytes]", time, nAllocs, nBytes)
|
|
|
|
|
} else {
|
|
|
|
|
stats = fmt.Sprintf("[%d ns]", time)
|
|
|
|
|
}
|
2015-08-10 19:00:34 -05:00
|
|
|
|
2016-01-29 14:44:15 -05:00
|
|
|
f.Logf(" pass %s end %s\n", p.name, stats)
|
2016-01-30 17:37:38 -05:00
|
|
|
printFunc(f)
|
2018-06-08 00:25:12 +03:00
|
|
|
f.HTMLWriter.WriteFunc(phaseName, fmt.Sprintf("%s <span class=\"stats\">%s</span>", phaseName, stats), f)
|
2016-01-29 14:44:15 -05:00
|
|
|
}
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
if p.time || p.mem {
|
|
|
|
|
// Surround timing information w/ enough context to allow comparisons.
|
|
|
|
|
time := tEnd.Sub(tStart).Nanoseconds()
|
|
|
|
|
if p.time {
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
f.LogStat("TIME(ns)", time)
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
}
|
|
|
|
|
if p.mem {
|
|
|
|
|
var mEnd runtime.MemStats
|
|
|
|
|
runtime.ReadMemStats(&mEnd)
|
|
|
|
|
nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
|
|
|
|
|
nAllocs := mEnd.Mallocs - mStart.Mallocs
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
f.LogStat("TIME(ns):BYTES:ALLOCS", time, nBytes, nAllocs)
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
}
|
|
|
|
|
}
|
2016-05-11 15:25:17 -04:00
|
|
|
if p.dump != nil && p.dump[f.Name] {
|
|
|
|
|
// Dump function to appropriately named file
|
|
|
|
|
f.dumpFile(phaseName)
|
|
|
|
|
}
|
2016-03-03 22:06:57 -08:00
|
|
|
if checkEnabled {
|
|
|
|
|
checkFunc(f)
|
|
|
|
|
}
|
2015-03-03 13:38:14 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Squash error printing defer
|
|
|
|
|
phaseName = ""
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-11 15:25:17 -04:00
|
|
|
// TODO: should be a config field
|
|
|
|
|
var dumpFileSeq int
|
|
|
|
|
|
|
|
|
|
// dumpFile creates a file from the phase name and function name
|
|
|
|
|
// Dumping is done to files to avoid buffering huge strings before
|
|
|
|
|
// output.
|
|
|
|
|
func (f *Func) dumpFile(phaseName string) {
|
|
|
|
|
dumpFileSeq++
|
2017-12-14 13:35:59 -05:00
|
|
|
fname := fmt.Sprintf("%s_%02d__%s.dump", f.Name, dumpFileSeq, phaseName)
|
2016-05-11 15:25:17 -04:00
|
|
|
fname = strings.Replace(fname, " ", "_", -1)
|
|
|
|
|
fname = strings.Replace(fname, "/", "_", -1)
|
|
|
|
|
fname = strings.Replace(fname, ":", "_", -1)
|
|
|
|
|
|
|
|
|
|
fi, err := os.Create(fname)
|
|
|
|
|
if err != nil {
|
2017-03-16 22:42:10 -07:00
|
|
|
f.Warnl(src.NoXPos, "Unable to create after-phase dump file %s", fname)
|
2016-05-11 15:25:17 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p := stringFuncPrinter{w: fi}
|
|
|
|
|
fprintFunc(p, f)
|
|
|
|
|
fi.Close()
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-03 13:38:14 -08:00
|
|
|
type pass struct {
|
2016-01-27 16:47:23 -08:00
|
|
|
name string
|
|
|
|
|
fn func(*Func)
|
|
|
|
|
required bool
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
disabled bool
|
2016-05-11 15:25:17 -04:00
|
|
|
time bool // report time to run pass
|
|
|
|
|
mem bool // report mem stats to run pass
|
|
|
|
|
stats int // pass reports own "stats" (e.g., branches removed)
|
|
|
|
|
debug int // pass performs some debugging. =1 should be in error-testing-friendly Warnl format.
|
|
|
|
|
test int // pass-specific ad-hoc option, perhaps useful in development
|
|
|
|
|
dump map[string]bool // dump if function name matches
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *pass) addDump(s string) {
|
|
|
|
|
if p.dump == nil {
|
|
|
|
|
p.dump = make(map[string]bool)
|
|
|
|
|
}
|
|
|
|
|
p.dump[s] = true
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-03 22:06:57 -08:00
|
|
|
// Run consistency checker between each phase
|
2016-03-15 15:00:57 -07:00
|
|
|
var checkEnabled = false
|
2016-03-03 22:06:57 -08:00
|
|
|
|
2016-03-11 00:10:52 -05:00
|
|
|
// Debug output
|
|
|
|
|
var IntrinsicsDebug int
|
|
|
|
|
var IntrinsicsDisable bool
|
|
|
|
|
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
var BuildDebug int
|
|
|
|
|
var BuildTest int
|
|
|
|
|
var BuildStats int
|
2016-05-11 15:25:17 -04:00
|
|
|
var BuildDump string // name of function to dump after initial build of ssa
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
// PhaseOption sets the specified flag in the specified ssa phase,
|
|
|
|
|
// returning empty string if this was successful or a string explaining
|
2016-03-17 14:12:12 -04:00
|
|
|
// the error if it was not.
|
|
|
|
|
// A version of the phase name with "_" replaced by " " is also checked for a match.
|
|
|
|
|
// If the phase name begins a '~' then the rest of the underscores-replaced-with-blanks
|
|
|
|
|
// version is used as a regular expression to match the phase name(s).
|
|
|
|
|
//
|
|
|
|
|
// Special cases that have turned out to be useful:
|
|
|
|
|
// ssa/check/on enables checking after each phase
|
|
|
|
|
// ssa/all/time enables time reporting for all phases
|
|
|
|
|
//
|
|
|
|
|
// See gc/lex.go for dissection of the option string.
|
|
|
|
|
// Example uses:
|
|
|
|
|
//
|
|
|
|
|
// GO_GCFLAGS=-d=ssa/generic_cse/time,ssa/generic_cse/stats,ssa/generic_cse/debug=3 ./make.bash
|
|
|
|
|
//
|
|
|
|
|
// BOOT_GO_GCFLAGS=-d='ssa/~^.*scc$/off' GO_GCFLAGS='-d=ssa/~^.*scc$/off' ./make.bash
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
//
|
2016-05-11 15:25:17 -04:00
|
|
|
func PhaseOption(phase, flag string, val int, valString string) string {
|
|
|
|
|
if phase == "help" {
|
|
|
|
|
lastcr := 0
|
cmd/compile: better formatting for ssa phases options doc
Change the help doc of
go tool compile -d=ssa/help
from this:
compile: GcFlag -d=ssa/<phase>/<flag>[=<value>|<function_name>]
<phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove, loopbce
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
fuse, dse, writebarrier, insert_resched_checks, tighten, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, phi_tighten, late_deadcode, critical
likelyadjust, layout, schedule, late_nilcheck, flagalloc, regalloc
loop_rotate, stackframe, trim
<flag> is one of on, off, debug, mem, time, test, stats, dump
<value> defaults to 1
<function_name> is required for "dump", specifies name of function to dump after <phase>
Except for dump, output is directed to standard out; dump appears in a file.
Phase "all" supports flags "time", "mem", and "dump".
Phases "intrinsics" supports flags "on", "off", and "debug".
Interpretation of the "debug" value depends on the phase.
Dump files are named <phase>__<function_name>_<seq>.dump.
To this:
compile: PhaseOptions usage:
go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
where:
- <phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
branchelim, fuse, dse, writebarrier, insert_resched_checks, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, tighten, phi_tighten, late_deadcode
critical, likelyadjust, layout, schedule, late_nilcheck, flagalloc
regalloc, loop_rotate, stackframe, trim
- <flag> is one of:
on, off, debug, mem, time, test, stats, dump
- <value> defaults to 1
- <function_name> is required for the "dump" flag, and specifies the
name of function to dump after <phase>
Phase "all" supports flags "time", "mem", and "dump".
Phase "intrinsics" supports flags "on", "off", and "debug".
If the "dump" flag is specified, the output is written on a file named
<phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
Also add a few examples at the bottom.
Fixes #20349
Change-Id: I334799e951e7b27855b3ace5d2d966c4d6ec4cff
Reviewed-on: https://go-review.googlesource.com/110062
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2018-04-29 14:57:30 +02:00
|
|
|
phasenames := " check, all, build, intrinsics"
|
2016-05-11 15:25:17 -04:00
|
|
|
for _, p := range passes {
|
|
|
|
|
pn := strings.Replace(p.name, " ", "_", -1)
|
|
|
|
|
if len(pn)+len(phasenames)-lastcr > 70 {
|
cmd/compile: better formatting for ssa phases options doc
Change the help doc of
go tool compile -d=ssa/help
from this:
compile: GcFlag -d=ssa/<phase>/<flag>[=<value>|<function_name>]
<phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove, loopbce
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
fuse, dse, writebarrier, insert_resched_checks, tighten, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, phi_tighten, late_deadcode, critical
likelyadjust, layout, schedule, late_nilcheck, flagalloc, regalloc
loop_rotate, stackframe, trim
<flag> is one of on, off, debug, mem, time, test, stats, dump
<value> defaults to 1
<function_name> is required for "dump", specifies name of function to dump after <phase>
Except for dump, output is directed to standard out; dump appears in a file.
Phase "all" supports flags "time", "mem", and "dump".
Phases "intrinsics" supports flags "on", "off", and "debug".
Interpretation of the "debug" value depends on the phase.
Dump files are named <phase>__<function_name>_<seq>.dump.
To this:
compile: PhaseOptions usage:
go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
where:
- <phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
branchelim, fuse, dse, writebarrier, insert_resched_checks, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, tighten, phi_tighten, late_deadcode
critical, likelyadjust, layout, schedule, late_nilcheck, flagalloc
regalloc, loop_rotate, stackframe, trim
- <flag> is one of:
on, off, debug, mem, time, test, stats, dump
- <value> defaults to 1
- <function_name> is required for the "dump" flag, and specifies the
name of function to dump after <phase>
Phase "all" supports flags "time", "mem", and "dump".
Phase "intrinsics" supports flags "on", "off", and "debug".
If the "dump" flag is specified, the output is written on a file named
<phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
Also add a few examples at the bottom.
Fixes #20349
Change-Id: I334799e951e7b27855b3ace5d2d966c4d6ec4cff
Reviewed-on: https://go-review.googlesource.com/110062
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2018-04-29 14:57:30 +02:00
|
|
|
phasenames += "\n "
|
2016-05-11 15:25:17 -04:00
|
|
|
lastcr = len(phasenames)
|
|
|
|
|
phasenames += pn
|
|
|
|
|
} else {
|
|
|
|
|
phasenames += ", " + pn
|
|
|
|
|
}
|
|
|
|
|
}
|
cmd/compile: better formatting for ssa phases options doc
Change the help doc of
go tool compile -d=ssa/help
from this:
compile: GcFlag -d=ssa/<phase>/<flag>[=<value>|<function_name>]
<phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove, loopbce
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
fuse, dse, writebarrier, insert_resched_checks, tighten, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, phi_tighten, late_deadcode, critical
likelyadjust, layout, schedule, late_nilcheck, flagalloc, regalloc
loop_rotate, stackframe, trim
<flag> is one of on, off, debug, mem, time, test, stats, dump
<value> defaults to 1
<function_name> is required for "dump", specifies name of function to dump after <phase>
Except for dump, output is directed to standard out; dump appears in a file.
Phase "all" supports flags "time", "mem", and "dump".
Phases "intrinsics" supports flags "on", "off", and "debug".
Interpretation of the "debug" value depends on the phase.
Dump files are named <phase>__<function_name>_<seq>.dump.
To this:
compile: PhaseOptions usage:
go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
where:
- <phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
branchelim, fuse, dse, writebarrier, insert_resched_checks, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, tighten, phi_tighten, late_deadcode
critical, likelyadjust, layout, schedule, late_nilcheck, flagalloc
regalloc, loop_rotate, stackframe, trim
- <flag> is one of:
on, off, debug, mem, time, test, stats, dump
- <value> defaults to 1
- <function_name> is required for the "dump" flag, and specifies the
name of function to dump after <phase>
Phase "all" supports flags "time", "mem", and "dump".
Phase "intrinsics" supports flags "on", "off", and "debug".
If the "dump" flag is specified, the output is written on a file named
<phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
Also add a few examples at the bottom.
Fixes #20349
Change-Id: I334799e951e7b27855b3ace5d2d966c4d6ec4cff
Reviewed-on: https://go-review.googlesource.com/110062
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2018-04-29 14:57:30 +02:00
|
|
|
return `PhaseOptions usage:
|
|
|
|
|
|
|
|
|
|
go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
|
|
|
|
|
|
|
|
|
|
where:
|
|
|
|
|
|
|
|
|
|
- <phase> is one of:
|
2016-05-11 15:25:17 -04:00
|
|
|
` + phasenames + `
|
cmd/compile: better formatting for ssa phases options doc
Change the help doc of
go tool compile -d=ssa/help
from this:
compile: GcFlag -d=ssa/<phase>/<flag>[=<value>|<function_name>]
<phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove, loopbce
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
fuse, dse, writebarrier, insert_resched_checks, tighten, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, phi_tighten, late_deadcode, critical
likelyadjust, layout, schedule, late_nilcheck, flagalloc, regalloc
loop_rotate, stackframe, trim
<flag> is one of on, off, debug, mem, time, test, stats, dump
<value> defaults to 1
<function_name> is required for "dump", specifies name of function to dump after <phase>
Except for dump, output is directed to standard out; dump appears in a file.
Phase "all" supports flags "time", "mem", and "dump".
Phases "intrinsics" supports flags "on", "off", and "debug".
Interpretation of the "debug" value depends on the phase.
Dump files are named <phase>__<function_name>_<seq>.dump.
To this:
compile: PhaseOptions usage:
go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
where:
- <phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
branchelim, fuse, dse, writebarrier, insert_resched_checks, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, tighten, phi_tighten, late_deadcode
critical, likelyadjust, layout, schedule, late_nilcheck, flagalloc
regalloc, loop_rotate, stackframe, trim
- <flag> is one of:
on, off, debug, mem, time, test, stats, dump
- <value> defaults to 1
- <function_name> is required for the "dump" flag, and specifies the
name of function to dump after <phase>
Phase "all" supports flags "time", "mem", and "dump".
Phase "intrinsics" supports flags "on", "off", and "debug".
If the "dump" flag is specified, the output is written on a file named
<phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
Also add a few examples at the bottom.
Fixes #20349
Change-Id: I334799e951e7b27855b3ace5d2d966c4d6ec4cff
Reviewed-on: https://go-review.googlesource.com/110062
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2018-04-29 14:57:30 +02:00
|
|
|
|
|
|
|
|
- <flag> is one of:
|
|
|
|
|
on, off, debug, mem, time, test, stats, dump
|
|
|
|
|
|
|
|
|
|
- <value> defaults to 1
|
|
|
|
|
|
|
|
|
|
- <function_name> is required for the "dump" flag, and specifies the
|
|
|
|
|
name of function to dump after <phase>
|
|
|
|
|
|
2016-05-11 15:25:17 -04:00
|
|
|
Phase "all" supports flags "time", "mem", and "dump".
|
cmd/compile: better formatting for ssa phases options doc
Change the help doc of
go tool compile -d=ssa/help
from this:
compile: GcFlag -d=ssa/<phase>/<flag>[=<value>|<function_name>]
<phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove, loopbce
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
fuse, dse, writebarrier, insert_resched_checks, tighten, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, phi_tighten, late_deadcode, critical
likelyadjust, layout, schedule, late_nilcheck, flagalloc, regalloc
loop_rotate, stackframe, trim
<flag> is one of on, off, debug, mem, time, test, stats, dump
<value> defaults to 1
<function_name> is required for "dump", specifies name of function to dump after <phase>
Except for dump, output is directed to standard out; dump appears in a file.
Phase "all" supports flags "time", "mem", and "dump".
Phases "intrinsics" supports flags "on", "off", and "debug".
Interpretation of the "debug" value depends on the phase.
Dump files are named <phase>__<function_name>_<seq>.dump.
To this:
compile: PhaseOptions usage:
go tool compile -d=ssa/<phase>/<flag>[=<value>|<function_name>]
where:
- <phase> is one of:
check, all, build, intrinsics, early_phielim, early_copyelim
early_deadcode, short_circuit, decompose_user, opt, zero_arg_cse
opt_deadcode, generic_cse, phiopt, nilcheckelim, prove
decompose_builtin, softfloat, late_opt, generic_deadcode, check_bce
branchelim, fuse, dse, writebarrier, insert_resched_checks, lower
lowered_cse, elim_unread_autos, lowered_deadcode, checkLower
late_phielim, late_copyelim, tighten, phi_tighten, late_deadcode
critical, likelyadjust, layout, schedule, late_nilcheck, flagalloc
regalloc, loop_rotate, stackframe, trim
- <flag> is one of:
on, off, debug, mem, time, test, stats, dump
- <value> defaults to 1
- <function_name> is required for the "dump" flag, and specifies the
name of function to dump after <phase>
Phase "all" supports flags "time", "mem", and "dump".
Phase "intrinsics" supports flags "on", "off", and "debug".
If the "dump" flag is specified, the output is written on a file named
<phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
Also add a few examples at the bottom.
Fixes #20349
Change-Id: I334799e951e7b27855b3ace5d2d966c4d6ec4cff
Reviewed-on: https://go-review.googlesource.com/110062
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2018-04-29 14:57:30 +02:00
|
|
|
Phase "intrinsics" supports flags "on", "off", and "debug".
|
|
|
|
|
|
|
|
|
|
If the "dump" flag is specified, the output is written on a file named
|
|
|
|
|
<phase>__<function_name>_<seq>.dump; otherwise it is directed to stdout.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
|
|
-d=ssa/check/on
|
|
|
|
|
enables checking after each phase
|
|
|
|
|
|
|
|
|
|
-d=ssa/all/time
|
|
|
|
|
enables time reporting for all phases
|
|
|
|
|
|
|
|
|
|
-d=ssa/prove/debug=2
|
|
|
|
|
sets debugging level to 2 in the prove pass
|
|
|
|
|
|
|
|
|
|
Multiple flags can be passed at once, by separating them with
|
|
|
|
|
commas. For example:
|
|
|
|
|
|
|
|
|
|
-d=ssa/check/on,ssa/all/time
|
2016-05-11 15:25:17 -04:00
|
|
|
`
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-03 22:06:57 -08:00
|
|
|
if phase == "check" && flag == "on" {
|
|
|
|
|
checkEnabled = val != 0
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
if phase == "check" && flag == "off" {
|
|
|
|
|
checkEnabled = val == 0
|
|
|
|
|
return ""
|
|
|
|
|
}
|
2016-03-17 14:12:12 -04:00
|
|
|
|
|
|
|
|
alltime := false
|
2016-05-11 15:25:17 -04:00
|
|
|
allmem := false
|
|
|
|
|
alldump := false
|
2016-03-17 14:12:12 -04:00
|
|
|
if phase == "all" {
|
|
|
|
|
if flag == "time" {
|
|
|
|
|
alltime = val != 0
|
2016-05-11 15:25:17 -04:00
|
|
|
} else if flag == "mem" {
|
|
|
|
|
allmem = val != 0
|
|
|
|
|
} else if flag == "dump" {
|
|
|
|
|
alldump = val != 0
|
|
|
|
|
if alldump {
|
|
|
|
|
BuildDump = valString
|
|
|
|
|
}
|
2016-03-17 14:12:12 -04:00
|
|
|
} else {
|
|
|
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 00:10:52 -05:00
|
|
|
if phase == "intrinsics" {
|
|
|
|
|
switch flag {
|
|
|
|
|
case "on":
|
|
|
|
|
IntrinsicsDisable = val == 0
|
|
|
|
|
case "off":
|
|
|
|
|
IntrinsicsDisable = val != 0
|
|
|
|
|
case "debug":
|
|
|
|
|
IntrinsicsDebug = val
|
|
|
|
|
default:
|
|
|
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
|
|
|
}
|
|
|
|
|
return ""
|
|
|
|
|
}
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
if phase == "build" {
|
|
|
|
|
switch flag {
|
|
|
|
|
case "debug":
|
|
|
|
|
BuildDebug = val
|
|
|
|
|
case "test":
|
|
|
|
|
BuildTest = val
|
|
|
|
|
case "stats":
|
|
|
|
|
BuildStats = val
|
2016-05-11 15:25:17 -04:00
|
|
|
case "dump":
|
|
|
|
|
BuildDump = valString
|
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-21 13:24:58 -04:00
|
|
|
default:
|
|
|
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
|
|
|
}
|
|
|
|
|
return ""
|
|
|
|
|
}
|
2016-03-11 00:10:52 -05:00
|
|
|
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
underphase := strings.Replace(phase, "_", " ", -1)
|
2016-03-17 14:12:12 -04:00
|
|
|
var re *regexp.Regexp
|
|
|
|
|
if phase[0] == '~' {
|
|
|
|
|
r, ok := regexp.Compile(underphase[1:])
|
|
|
|
|
if ok != nil {
|
|
|
|
|
return fmt.Sprintf("Error %s in regexp for phase %s, flag %s", ok.Error(), phase, flag)
|
|
|
|
|
}
|
|
|
|
|
re = r
|
|
|
|
|
}
|
|
|
|
|
matchedOne := false
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
for i, p := range passes {
|
2016-03-17 14:12:12 -04:00
|
|
|
if phase == "all" {
|
|
|
|
|
p.time = alltime
|
2016-05-11 15:25:17 -04:00
|
|
|
p.mem = allmem
|
|
|
|
|
if alldump {
|
|
|
|
|
p.addDump(valString)
|
|
|
|
|
}
|
2016-03-17 14:12:12 -04:00
|
|
|
passes[i] = p
|
|
|
|
|
matchedOne = true
|
|
|
|
|
} else if p.name == phase || p.name == underphase || re != nil && re.MatchString(p.name) {
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
switch flag {
|
|
|
|
|
case "on":
|
|
|
|
|
p.disabled = val == 0
|
|
|
|
|
case "off":
|
|
|
|
|
p.disabled = val != 0
|
|
|
|
|
case "time":
|
|
|
|
|
p.time = val != 0
|
|
|
|
|
case "mem":
|
|
|
|
|
p.mem = val != 0
|
|
|
|
|
case "debug":
|
|
|
|
|
p.debug = val
|
|
|
|
|
case "stats":
|
|
|
|
|
p.stats = val
|
|
|
|
|
case "test":
|
|
|
|
|
p.test = val
|
2016-05-11 15:25:17 -04:00
|
|
|
case "dump":
|
|
|
|
|
p.addDump(valString)
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
default:
|
|
|
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
|
|
|
}
|
|
|
|
|
if p.disabled && p.required {
|
|
|
|
|
return fmt.Sprintf("Cannot disable required SSA phase %s using -d=ssa/%s debug option", phase, phase)
|
|
|
|
|
}
|
|
|
|
|
passes[i] = p
|
2016-03-17 14:12:12 -04:00
|
|
|
matchedOne = true
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
}
|
|
|
|
|
}
|
2016-03-17 14:12:12 -04:00
|
|
|
if matchedOne {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
return fmt.Sprintf("Did not find a phase matching %s in -d=ssa/... debug option", phase)
|
2015-03-03 13:38:14 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// list of passes for the compiler
|
|
|
|
|
var passes = [...]pass{
|
2016-01-14 16:02:23 -08:00
|
|
|
// TODO: combine phielim and copyelim into a single pass?
|
cmd/compile: assign and preserve statement boundaries.
A new pass run after ssa building (before any other
optimization) identifies the "first" ssa node for each
statement. Other "noise" nodes are tagged as being never
appropriate for a statement boundary (e.g., VarKill, VarDef,
Phi).
Rewrite, deadcode, cse, and nilcheck are modified to move
the statement boundaries forward whenever possible if a
boundary-tagged ssa value is removed; never-boundary nodes
are ignored in this search (some operations involving
constants are also tagged as never-boundary and also ignored
because they are likely to be moved or removed during
optimization).
Code generation treats all nodes except those explicitly
marked as statement boundaries as "not statement" nodes,
and floats statement boundaries to the beginning of each
same-line run of instructions found within a basic block.
Line number html conversion was modified to make statement
boundary nodes a bit more obvious by prepending a "+".
The code in fuse.go that glued together the value slices
of two blocks produced a result that depended on the
former capacities (not lengths) of the two slices. This
causes differences in the 386 bootstrap, and also can
sometimes put values into an order that does a worse job
of preserving statement boundaries when values are removed.
Portions of two delve tests that had caught problems were
incorporated into ssa/debug_test.go. There are some
opportunities to do better with optimized code, but the
next-ing is not lying or overly jumpy.
Over 4 CLs, compilebench geomean measured binary size
increase of 3.5% and compile user time increase of 3.8%
(this is after optimization to reuse a sparse map instead
of creating multiple maps.)
This CL worsens the optimized-debugging experience with
Delve; we need to work with the delve team so that
they can use the is_stmt marks that we're emitting now.
The reference output changes from time to time depending
on other changes in the compiler, sometimes better,
sometimes worse.
This CL now includes a test ensuring that 99+% of the lines
in the Go command itself (a handy optimized binary) include
is_stmt markers.
Change-Id: I359c94e06843f1eb41f9da437bd614885aa9644a
Reviewed-on: https://go-review.googlesource.com/102435
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-03-23 22:46:06 -04:00
|
|
|
{name: "number lines", fn: numberLines, required: true},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "early phielim", fn: phielim},
|
|
|
|
|
{name: "early copyelim", fn: copyelim},
|
|
|
|
|
{name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
|
|
|
|
|
{name: "short circuit", fn: shortcircuit},
|
|
|
|
|
{name: "decompose user", fn: decomposeUser, required: true},
|
2016-04-11 12:22:26 -07:00
|
|
|
{name: "opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
|
|
|
|
|
{name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values
|
|
|
|
|
{name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt
|
2016-04-13 10:58:38 +02:00
|
|
|
{name: "generic cse", fn: cse},
|
2016-02-29 19:29:04 +01:00
|
|
|
{name: "phiopt", fn: phiopt},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "nilcheckelim", fn: nilcheckelim},
|
2016-02-19 12:14:42 +01:00
|
|
|
{name: "prove", fn: prove},
|
2016-03-11 01:10:35 +01:00
|
|
|
{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
|
2017-11-10 18:08:48 +01:00
|
|
|
{name: "softfloat", fn: softfloat, required: true},
|
2016-03-14 19:11:19 +01:00
|
|
|
{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
|
2018-04-11 22:47:24 +01:00
|
|
|
{name: "dead auto elim", fn: elimDeadAutosGeneric},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "generic deadcode", fn: deadcode},
|
2016-04-04 23:33:30 +02:00
|
|
|
{name: "check bce", fn: checkbce},
|
2017-08-13 22:36:47 +00:00
|
|
|
{name: "branchelim", fn: branchelim},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "fuse", fn: fuse},
|
|
|
|
|
{name: "dse", fn: dse},
|
2017-04-27 13:15:24 -07:00
|
|
|
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
|
2016-11-10 16:03:47 -05:00
|
|
|
{name: "insert resched checks", fn: insertLoopReschedChecks,
|
2017-04-18 12:53:25 -07:00
|
|
|
disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops.
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "lower", fn: lower, required: true},
|
|
|
|
|
{name: "lowered cse", fn: cse},
|
2017-03-29 14:01:41 -04:00
|
|
|
{name: "elim unread autos", fn: elimUnreadAutos},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "lowered deadcode", fn: deadcode, required: true},
|
|
|
|
|
{name: "checkLower", fn: checkLower, required: true},
|
|
|
|
|
{name: "late phielim", fn: phielim},
|
|
|
|
|
{name: "late copyelim", fn: copyelim},
|
2018-02-20 17:16:19 -08:00
|
|
|
{name: "tighten", fn: tighten}, // move values closer to their uses
|
cmd/compile: move phi args which are constants closer to the phi
entry:
x = MOVQconst [7]
...
b1:
goto b2
b2:
v = Phi(x, y, z)
Transform that program to:
entry:
...
b1:
x = MOVQconst [7]
goto b2
b2:
v = Phi(x, y, z)
This CL moves constant-generating instructions used by a phi to the
appropriate immediate predecessor of the phi's block.
We used to put all constants in the entry block. Unfortunately, in
large functions we have lots of constants at the start of the
function, all of which are used by lots of phis throughout the
function. This leads to the constants being live through most of the
function (especially if there is an outer loop). That's an O(n^2)
problem.
Note that most of the non-phi uses of constants have already been
folded into instructions (ADDQconst, MOVQstoreconst, etc.).
This CL may be generally useful for other instances of compiler
slowness, I'll have to check. It may cause some programs to run
slower, but probably not by much, as rematerializeable values like
these constants are allocated late (not at their originally scheduled
location) anyway.
This CL is definitely a minimal change that can be considered for 1.7.
We probably want to do a better job in the tighten pass generally, not
just for phi args. Leaving that for 1.8.
Update #16407
Change-Id: If112a8883b4ef172b2f37dea13e44bda9346c342
Reviewed-on: https://go-review.googlesource.com/25046
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2016-07-18 23:06:04 -07:00
|
|
|
{name: "phi tighten", fn: phiTighten},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "late deadcode", fn: deadcode},
|
2016-02-27 11:54:52 -05:00
|
|
|
{name: "critical", fn: critical, required: true}, // remove critical edges
|
|
|
|
|
{name: "likelyadjust", fn: likelyadjust},
|
2016-09-13 17:01:01 -07:00
|
|
|
{name: "layout", fn: layout, required: true}, // schedule blocks
|
|
|
|
|
{name: "schedule", fn: schedule, required: true}, // schedule values
|
|
|
|
|
{name: "late nilcheck", fn: nilcheckelim2},
|
[dev.ssa] cmd/compile: enhance command line option processing for SSA
The -d compiler flag can also specify ssa phase and flag,
for example -d=ssa/generic_cse/time,ssa/generic_cse/stats
Spaces in the phase names can be specified with an
underscore. Flags currently parsed (not necessarily
recognized by the phases yet) are:
on, off, mem, time, debug, stats, and test
On, off and time are handled in the harness,
debug, stats, and test are interpreted by the phase itself.
The pass is now attached to the Func being compiled, and a
new method logStats(key, ...value) on *Func to encourage a
semi-standardized format for that output. Output fields
are separated by tabs to ease digestion by awk and
spreadsheets. For example,
if f.pass.stats > 0 {
f.logStat("CSE REWRITES", rewrites)
}
Change-Id: I16db2b5af64c50ca9a47efeb51d961147a903abc
Reviewed-on: https://go-review.googlesource.com/19885
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
2016-02-25 13:10:51 -05:00
|
|
|
{name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register
|
|
|
|
|
{name: "regalloc", fn: regalloc, required: true}, // allocate int & float registers + stack slots
|
2017-03-21 14:51:38 -07:00
|
|
|
{name: "loop rotate", fn: loopRotate},
|
2016-10-03 12:26:25 -07:00
|
|
|
{name: "stackframe", fn: stackframe, required: true},
|
|
|
|
|
{name: "trim", fn: trim}, // remove empty blocks
|
2015-03-23 17:02:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Double-check phase ordering constraints.
|
|
|
|
|
// This code is intended to document the ordering requirements
|
2016-03-01 23:21:55 +00:00
|
|
|
// between different phases. It does not override the passes
|
2015-03-23 17:02:11 -07:00
|
|
|
// list above.
|
2015-05-05 16:19:12 -07:00
|
|
|
type constraint struct {
|
|
|
|
|
a, b string // a must come before b
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var passOrder = [...]constraint{
|
2016-11-10 16:03:47 -05:00
|
|
|
// "insert resched checks" uses mem, better to clean out stores first.
|
|
|
|
|
{"dse", "insert resched checks"},
|
|
|
|
|
// insert resched checks adds new blocks containing generic instructions
|
|
|
|
|
{"insert resched checks", "lower"},
|
|
|
|
|
{"insert resched checks", "tighten"},
|
|
|
|
|
|
|
|
|
|
// prove relies on common-subexpression elimination for maximum benefits.
|
2016-02-19 12:14:42 +01:00
|
|
|
{"generic cse", "prove"},
|
|
|
|
|
// deadcode after prove to eliminate all new dead blocks.
|
|
|
|
|
{"prove", "generic deadcode"},
|
2015-05-26 14:43:25 -07:00
|
|
|
// common-subexpression before dead-store elim, so that we recognize
|
|
|
|
|
// when two address expressions are the same.
|
|
|
|
|
{"generic cse", "dse"},
|
2015-06-03 12:31:47 -07:00
|
|
|
// cse substantially improves nilcheckelim efficacy
|
|
|
|
|
{"generic cse", "nilcheckelim"},
|
|
|
|
|
// allow deadcode to clean up after nilcheckelim
|
|
|
|
|
{"nilcheckelim", "generic deadcode"},
|
|
|
|
|
// nilcheckelim generates sequences of plain basic blocks
|
|
|
|
|
{"nilcheckelim", "fuse"},
|
2015-08-30 21:19:20 -05:00
|
|
|
// nilcheckelim relies on opt to rewrite user nil checks
|
|
|
|
|
{"opt", "nilcheckelim"},
|
[dev.ssa] cmd/compile: add SSA pass to move values closer to uses
Even this very simple, restricted initial implementation helps.
While running make.bash, it moves 84437 values
to new, closer homes.
As a concrete example:
func f_ssa(i, j int, b bool) int {
if !b {
return 0
}
return i + j
}
It cuts off one stack slot and two instructions:
Before:
"".f_ssa t=1 size=96 value=0 args=0x20 locals=0x18
0x0000 00000 (x.go:3) TEXT "".f_ssa(SB), $24-32
0x0000 00000 (x.go:3) SUBQ $24, SP
0x0004 00004 (x.go:3) FUNCDATA $0, "".gcargs·0(SB)
0x0004 00004 (x.go:3) FUNCDATA $1, "".gclocals·1(SB)
0x0004 00004 (x.go:5) MOVQ $0, AX
0x0006 00006 (x.go:3) MOVQ 32(SP), CX
0x000b 00011 (x.go:3) MOVQ 40(SP), DX
0x0010 00016 (x.go:3) LEAQ 48(SP), BX
0x0015 00021 (x.go:3) MOVB (BX), BPB
0x0018 00024 (x.go:3) MOVQ $0, SI
0x001a 00026 (x.go:3) MOVQ SI, 56(SP)
0x001f 00031 (x.go:3) TESTB BPB, BPB
0x0022 00034 (x.go:5) MOVQ AX, (SP)
0x0026 00038 (x.go:3) MOVQ CX, 8(SP)
0x002b 00043 (x.go:3) MOVQ DX, 16(SP)
0x0030 00048 (x.go:4) JEQ 74
0x0032 00050 (x.go:3) MOVQ 8(SP), AX
0x0037 00055 (x.go:3) MOVQ 16(SP), CX
0x003c 00060 (x.go:7) LEAQ (AX)(CX*1), DX
0x0040 00064 (x.go:7) MOVQ DX, 56(SP)
0x0045 00069 (x.go:3) ADDQ $24, SP
0x0049 00073 (x.go:3) RET
0x004a 00074 (x.go:5) MOVQ (SP), AX
0x004e 00078 (x.go:5) MOVQ AX, 56(SP)
0x0053 00083 (x.go:3) JMP 69
After:
"".f_ssa t=1 size=80 value=0 args=0x20 locals=0x10
0x0000 00000 (x.go:3) TEXT "".f_ssa(SB), $16-32
0x0000 00000 (x.go:3) SUBQ $16, SP
0x0004 00004 (x.go:3) FUNCDATA $0, "".gcargs·0(SB)
0x0004 00004 (x.go:3) FUNCDATA $1, "".gclocals·1(SB)
0x0004 00004 (x.go:3) MOVQ 32(SP), AX
0x0009 00009 (x.go:3) MOVQ 24(SP), CX
0x000e 00014 (x.go:3) LEAQ 40(SP), DX
0x0013 00019 (x.go:3) MOVB (DX), BL
0x0015 00021 (x.go:3) MOVQ $0, BP
0x0017 00023 (x.go:3) MOVQ BP, 48(SP)
0x001c 00028 (x.go:3) TESTB BL, BL
0x001e 00030 (x.go:3) MOVQ AX, (SP)
0x0022 00034 (x.go:3) MOVQ CX, 8(SP)
0x0027 00039 (x.go:4) JEQ 64
0x0029 00041 (x.go:3) MOVQ 8(SP), AX
0x002e 00046 (x.go:3) MOVQ (SP), CX
0x0032 00050 (x.go:7) LEAQ (AX)(CX*1), DX
0x0036 00054 (x.go:7) MOVQ DX, 48(SP)
0x003b 00059 (x.go:3) ADDQ $16, SP
0x003f 00063 (x.go:3) RET
0x0040 00064 (x.go:5) MOVQ $0, AX
0x0042 00066 (x.go:5) MOVQ AX, 48(SP)
0x0047 00071 (x.go:3) JMP 59
Of course, the old backend is still well ahead:
"".f_ssa t=1 size=48 value=0 args=0x20 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f_ssa(SB), $0-32
0x0000 00000 (x.go:3) NOP
0x0000 00000 (x.go:3) NOP
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·a8eabfc4a4514ed6b3b0c61e9680e440(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) CMPB "".b+24(FP), $0
0x0005 00005 (x.go:4) JNE 17
0x0007 00007 (x.go:5) MOVQ $0, "".~r3+32(FP)
0x0010 00016 (x.go:5) RET
0x0011 00017 (x.go:7) MOVQ "".i+8(FP), BX
0x0016 00022 (x.go:7) MOVQ "".j+16(FP), BP
0x001b 00027 (x.go:7) ADDQ BP, BX
0x001e 00030 (x.go:7) MOVQ BX, "".~r3+32(FP)
0x0023 00035 (x.go:7) RET
Some regalloc improvements should help considerably.
Change-Id: I95bb5dd83e56afd70ae4e983f1d32dffd0c3d46a
Reviewed-on: https://go-review.googlesource.com/13142
Reviewed-by: Keith Randall <khr@golang.org>
2015-08-04 14:55:35 -07:00
|
|
|
// tighten will be most effective when as many values have been removed as possible
|
|
|
|
|
{"generic deadcode", "tighten"},
|
|
|
|
|
{"generic cse", "tighten"},
|
2016-04-04 23:33:30 +02:00
|
|
|
// checkbce needs the values removed
|
|
|
|
|
{"generic deadcode", "check bce"},
|
2016-02-08 11:00:43 -08:00
|
|
|
// don't run optimization pass until we've decomposed builtin objects
|
2016-03-11 01:10:35 +01:00
|
|
|
{"decompose builtin", "late opt"},
|
2017-11-10 18:08:48 +01:00
|
|
|
// decompose builtin is the last pass that may introduce new float ops, so run softfloat after it
|
|
|
|
|
{"decompose builtin", "softfloat"},
|
2015-03-23 17:02:11 -07:00
|
|
|
// don't layout blocks until critical edges have been removed
|
2015-05-05 16:19:12 -07:00
|
|
|
{"critical", "layout"},
|
2015-03-23 17:02:11 -07:00
|
|
|
// regalloc requires the removal of all critical edges
|
2015-05-05 16:19:12 -07:00
|
|
|
{"critical", "regalloc"},
|
2015-03-23 17:02:11 -07:00
|
|
|
// regalloc requires all the values in a block to be scheduled
|
2015-05-05 16:19:12 -07:00
|
|
|
{"schedule", "regalloc"},
|
2015-08-04 15:47:22 -07:00
|
|
|
// checkLower must run after lowering & subsequent dead code elim
|
|
|
|
|
{"lower", "checkLower"},
|
|
|
|
|
{"lowered deadcode", "checkLower"},
|
2016-09-13 17:01:01 -07:00
|
|
|
// late nilcheck needs instructions to be scheduled.
|
|
|
|
|
{"schedule", "late nilcheck"},
|
2015-12-09 15:58:18 -08:00
|
|
|
// flagalloc needs instructions to be scheduled.
|
|
|
|
|
{"schedule", "flagalloc"},
|
|
|
|
|
// regalloc needs flags to be allocated first.
|
|
|
|
|
{"flagalloc", "regalloc"},
|
2017-03-21 14:51:38 -07:00
|
|
|
// loopRotate will confuse regalloc.
|
|
|
|
|
{"regalloc", "loop rotate"},
|
2016-10-03 12:26:25 -07:00
|
|
|
// stackframe needs to know about spilled registers.
|
|
|
|
|
{"regalloc", "stackframe"},
|
2016-01-21 13:27:01 -08:00
|
|
|
// trim needs regalloc to be done first.
|
|
|
|
|
{"regalloc", "trim"},
|
2015-03-23 17:02:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func init() {
|
2015-05-05 16:19:12 -07:00
|
|
|
for _, c := range passOrder {
|
|
|
|
|
a, b := c.a, c.b
|
2015-03-23 17:02:11 -07:00
|
|
|
i := -1
|
|
|
|
|
j := -1
|
|
|
|
|
for k, p := range passes {
|
|
|
|
|
if p.name == a {
|
|
|
|
|
i = k
|
|
|
|
|
}
|
|
|
|
|
if p.name == b {
|
|
|
|
|
j = k
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if i < 0 {
|
|
|
|
|
log.Panicf("pass %s not found", a)
|
|
|
|
|
}
|
|
|
|
|
if j < 0 {
|
|
|
|
|
log.Panicf("pass %s not found", b)
|
|
|
|
|
}
|
|
|
|
|
if i >= j {
|
|
|
|
|
log.Panicf("passes %s and %s out of order", a, b)
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-03-03 13:38:14 -08:00
|
|
|
}
|