mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
362 lines
11 KiB
Go
362 lines
11 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package ssa
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"regexp"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Compile is the main entry point for this package.
|
|
// Compile modifies f so that on return:
|
|
// · all Values in f map to 0 or 1 assembly instructions of the target architecture
|
|
// · the order of f.Blocks is the order to emit the Blocks
|
|
// · the order of b.Values is the order to emit the Values in each Block
|
|
// · f has a non-nil regAlloc field
|
|
func Compile(f *Func) {
|
|
// TODO: debugging - set flags to control verbosity of compiler,
|
|
// which phases to dump IR before/after, etc.
|
|
if f.Log() {
|
|
f.Logf("compiling %s\n", f.Name)
|
|
}
|
|
|
|
// hook to print function & phase if panic happens
|
|
phaseName := "init"
|
|
defer func() {
|
|
if phaseName != "" {
|
|
err := recover()
|
|
stack := make([]byte, 16384)
|
|
n := runtime.Stack(stack, false)
|
|
stack = stack[:n]
|
|
f.Fatalf("panic during %s while compiling %s:\n\n%v\n\n%s\n", phaseName, f.Name, err, stack)
|
|
}
|
|
}()
|
|
|
|
// Run all the passes
|
|
printFunc(f)
|
|
f.Config.HTML.WriteFunc("start", f)
|
|
if checkEnabled {
|
|
checkFunc(f)
|
|
}
|
|
const logMemStats = false
|
|
for _, p := range passes {
|
|
if !f.Config.optimize && !p.required || p.disabled {
|
|
continue
|
|
}
|
|
f.pass = &p
|
|
phaseName = p.name
|
|
if f.Log() {
|
|
f.Logf(" pass %s begin\n", p.name)
|
|
}
|
|
// TODO: capture logging during this pass, add it to the HTML
|
|
var mStart runtime.MemStats
|
|
if logMemStats || p.mem {
|
|
runtime.ReadMemStats(&mStart)
|
|
}
|
|
|
|
tStart := time.Now()
|
|
p.fn(f)
|
|
tEnd := time.Now()
|
|
|
|
// Need something less crude than "Log the whole intermediate result".
|
|
if f.Log() || f.Config.HTML != nil {
|
|
time := tEnd.Sub(tStart).Nanoseconds()
|
|
var stats string
|
|
if logMemStats {
|
|
var mEnd runtime.MemStats
|
|
runtime.ReadMemStats(&mEnd)
|
|
nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
|
|
nAllocs := mEnd.Mallocs - mStart.Mallocs
|
|
stats = fmt.Sprintf("[%d ns %d allocs %d bytes]", time, nAllocs, nBytes)
|
|
} else {
|
|
stats = fmt.Sprintf("[%d ns]", time)
|
|
}
|
|
|
|
f.Logf(" pass %s end %s\n", p.name, stats)
|
|
printFunc(f)
|
|
f.Config.HTML.WriteFunc(fmt.Sprintf("after %s <span class=\"stats\">%s</span>", phaseName, stats), f)
|
|
}
|
|
if p.time || p.mem {
|
|
// Surround timing information w/ enough context to allow comparisons.
|
|
time := tEnd.Sub(tStart).Nanoseconds()
|
|
if p.time {
|
|
f.LogStat("TIME(ns)", time)
|
|
}
|
|
if p.mem {
|
|
var mEnd runtime.MemStats
|
|
runtime.ReadMemStats(&mEnd)
|
|
nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
|
|
nAllocs := mEnd.Mallocs - mStart.Mallocs
|
|
f.LogStat("TIME(ns):BYTES:ALLOCS", time, nBytes, nAllocs)
|
|
}
|
|
}
|
|
if checkEnabled {
|
|
checkFunc(f)
|
|
}
|
|
}
|
|
|
|
// Squash error printing defer
|
|
phaseName = ""
|
|
}
|
|
|
|
type pass struct {
|
|
name string
|
|
fn func(*Func)
|
|
required bool
|
|
disabled bool
|
|
time bool // report time to run pass
|
|
mem bool // report mem stats to run pass
|
|
stats int // pass reports own "stats" (e.g., branches removed)
|
|
debug int // pass performs some debugging. =1 should be in error-testing-friendly Warnl format.
|
|
test int // pass-specific ad-hoc option, perhaps useful in development
|
|
}
|
|
|
|
// Run consistency checker between each phase
|
|
var checkEnabled = false
|
|
|
|
// Debug output
|
|
var IntrinsicsDebug int
|
|
var IntrinsicsDisable bool
|
|
|
|
var BuildDebug int
|
|
var BuildTest int
|
|
var BuildStats int
|
|
|
|
// PhaseOption sets the specified flag in the specified ssa phase,
|
|
// returning empty string if this was successful or a string explaining
|
|
// the error if it was not.
|
|
// A version of the phase name with "_" replaced by " " is also checked for a match.
|
|
// If the phase name begins a '~' then the rest of the underscores-replaced-with-blanks
|
|
// version is used as a regular expression to match the phase name(s).
|
|
//
|
|
// Special cases that have turned out to be useful:
|
|
// ssa/check/on enables checking after each phase
|
|
// ssa/all/time enables time reporting for all phases
|
|
//
|
|
// See gc/lex.go for dissection of the option string.
|
|
// Example uses:
|
|
//
|
|
// GO_GCFLAGS=-d=ssa/generic_cse/time,ssa/generic_cse/stats,ssa/generic_cse/debug=3 ./make.bash
|
|
//
|
|
// BOOT_GO_GCFLAGS=-d='ssa/~^.*scc$/off' GO_GCFLAGS='-d=ssa/~^.*scc$/off' ./make.bash
|
|
//
|
|
func PhaseOption(phase, flag string, val int) string {
|
|
if phase == "check" && flag == "on" {
|
|
checkEnabled = val != 0
|
|
return ""
|
|
}
|
|
if phase == "check" && flag == "off" {
|
|
checkEnabled = val == 0
|
|
return ""
|
|
}
|
|
|
|
alltime := false
|
|
if phase == "all" {
|
|
if flag == "time" {
|
|
alltime = val != 0
|
|
} else {
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
}
|
|
}
|
|
|
|
if phase == "intrinsics" {
|
|
switch flag {
|
|
case "on":
|
|
IntrinsicsDisable = val == 0
|
|
case "off":
|
|
IntrinsicsDisable = val != 0
|
|
case "debug":
|
|
IntrinsicsDebug = val
|
|
default:
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
}
|
|
return ""
|
|
}
|
|
if phase == "build" {
|
|
switch flag {
|
|
case "debug":
|
|
BuildDebug = val
|
|
case "test":
|
|
BuildTest = val
|
|
case "stats":
|
|
BuildStats = val
|
|
default:
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
underphase := strings.Replace(phase, "_", " ", -1)
|
|
var re *regexp.Regexp
|
|
if phase[0] == '~' {
|
|
r, ok := regexp.Compile(underphase[1:])
|
|
if ok != nil {
|
|
return fmt.Sprintf("Error %s in regexp for phase %s, flag %s", ok.Error(), phase, flag)
|
|
}
|
|
re = r
|
|
}
|
|
matchedOne := false
|
|
for i, p := range passes {
|
|
if phase == "all" {
|
|
p.time = alltime
|
|
passes[i] = p
|
|
matchedOne = true
|
|
} else if p.name == phase || p.name == underphase || re != nil && re.MatchString(p.name) {
|
|
switch flag {
|
|
case "on":
|
|
p.disabled = val == 0
|
|
case "off":
|
|
p.disabled = val != 0
|
|
case "time":
|
|
p.time = val != 0
|
|
case "mem":
|
|
p.mem = val != 0
|
|
case "debug":
|
|
p.debug = val
|
|
case "stats":
|
|
p.stats = val
|
|
case "test":
|
|
p.test = val
|
|
default:
|
|
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
|
}
|
|
if p.disabled && p.required {
|
|
return fmt.Sprintf("Cannot disable required SSA phase %s using -d=ssa/%s debug option", phase, phase)
|
|
}
|
|
passes[i] = p
|
|
matchedOne = true
|
|
}
|
|
}
|
|
if matchedOne {
|
|
return ""
|
|
}
|
|
return fmt.Sprintf("Did not find a phase matching %s in -d=ssa/... debug option", phase)
|
|
}
|
|
|
|
// list of passes for the compiler
|
|
var passes = [...]pass{
|
|
// TODO: combine phielim and copyelim into a single pass?
|
|
{name: "early phielim", fn: phielim},
|
|
{name: "early copyelim", fn: copyelim},
|
|
{name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
|
|
{name: "short circuit", fn: shortcircuit},
|
|
{name: "decompose user", fn: decomposeUser, required: true},
|
|
{name: "opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
|
|
{name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values
|
|
{name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt
|
|
{name: "generic domtree", fn: domTree},
|
|
{name: "generic cse", fn: cse},
|
|
{name: "phiopt", fn: phiopt},
|
|
{name: "nilcheckelim", fn: nilcheckelim},
|
|
{name: "prove", fn: prove},
|
|
{name: "loopbce", fn: loopbce},
|
|
{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
|
|
{name: "dec", fn: dec, required: true},
|
|
{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
|
|
{name: "generic deadcode", fn: deadcode},
|
|
{name: "check bce", fn: checkbce},
|
|
{name: "fuse", fn: fuse},
|
|
{name: "dse", fn: dse},
|
|
{name: "tighten", fn: tighten}, // move values closer to their uses
|
|
{name: "lower", fn: lower, required: true},
|
|
{name: "lowered cse", fn: cse},
|
|
{name: "lowered deadcode", fn: deadcode, required: true},
|
|
{name: "checkLower", fn: checkLower, required: true},
|
|
{name: "late phielim", fn: phielim},
|
|
{name: "late copyelim", fn: copyelim},
|
|
{name: "late deadcode", fn: deadcode},
|
|
{name: "critical", fn: critical, required: true}, // remove critical edges
|
|
{name: "likelyadjust", fn: likelyadjust},
|
|
{name: "layout", fn: layout, required: true}, // schedule blocks
|
|
{name: "schedule", fn: schedule, required: true}, // schedule values
|
|
{name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register
|
|
{name: "regalloc", fn: regalloc, required: true}, // allocate int & float registers + stack slots
|
|
{name: "trim", fn: trim}, // remove empty blocks
|
|
}
|
|
|
|
// Double-check phase ordering constraints.
|
|
// This code is intended to document the ordering requirements
|
|
// between different phases. It does not override the passes
|
|
// list above.
|
|
type constraint struct {
|
|
a, b string // a must come before b
|
|
}
|
|
|
|
var passOrder = [...]constraint{
|
|
// prove reliese on common-subexpression elimination for maximum benefits.
|
|
{"generic cse", "prove"},
|
|
// deadcode after prove to eliminate all new dead blocks.
|
|
{"prove", "generic deadcode"},
|
|
// common-subexpression before dead-store elim, so that we recognize
|
|
// when two address expressions are the same.
|
|
{"generic cse", "dse"},
|
|
// cse substantially improves nilcheckelim efficacy
|
|
{"generic cse", "nilcheckelim"},
|
|
// allow deadcode to clean up after nilcheckelim
|
|
{"nilcheckelim", "generic deadcode"},
|
|
// nilcheckelim generates sequences of plain basic blocks
|
|
{"nilcheckelim", "fuse"},
|
|
// nilcheckelim relies on opt to rewrite user nil checks
|
|
{"opt", "nilcheckelim"},
|
|
// tighten should happen before lowering to avoid splitting naturally paired instructions such as CMP/SET
|
|
{"tighten", "lower"},
|
|
// cse, phiopt, nilcheckelim, prove and loopbce share idom.
|
|
{"generic domtree", "generic cse"},
|
|
{"generic domtree", "phiopt"},
|
|
{"generic domtree", "nilcheckelim"},
|
|
{"generic domtree", "prove"},
|
|
{"generic domtree", "loopbce"},
|
|
// tighten will be most effective when as many values have been removed as possible
|
|
{"generic deadcode", "tighten"},
|
|
{"generic cse", "tighten"},
|
|
// checkbce needs the values removed
|
|
{"generic deadcode", "check bce"},
|
|
// don't run optimization pass until we've decomposed builtin objects
|
|
{"decompose builtin", "late opt"},
|
|
// don't layout blocks until critical edges have been removed
|
|
{"critical", "layout"},
|
|
// regalloc requires the removal of all critical edges
|
|
{"critical", "regalloc"},
|
|
// regalloc requires all the values in a block to be scheduled
|
|
{"schedule", "regalloc"},
|
|
// checkLower must run after lowering & subsequent dead code elim
|
|
{"lower", "checkLower"},
|
|
{"lowered deadcode", "checkLower"},
|
|
// flagalloc needs instructions to be scheduled.
|
|
{"schedule", "flagalloc"},
|
|
// regalloc needs flags to be allocated first.
|
|
{"flagalloc", "regalloc"},
|
|
// trim needs regalloc to be done first.
|
|
{"regalloc", "trim"},
|
|
}
|
|
|
|
func init() {
|
|
for _, c := range passOrder {
|
|
a, b := c.a, c.b
|
|
i := -1
|
|
j := -1
|
|
for k, p := range passes {
|
|
if p.name == a {
|
|
i = k
|
|
}
|
|
if p.name == b {
|
|
j = k
|
|
}
|
|
}
|
|
if i < 0 {
|
|
log.Panicf("pass %s not found", a)
|
|
}
|
|
if j < 0 {
|
|
log.Panicf("pass %s not found", b)
|
|
}
|
|
if i >= j {
|
|
log.Panicf("passes %s and %s out of order", a, b)
|
|
}
|
|
}
|
|
}
|