go/src/cmd/compile/internal/ssa/cse.go
David Chase c2c1822b12 cmd/compile: assign and preserve statement boundaries.
A new pass run after ssa building (before any other
optimization) identifies the "first" ssa node for each
statement. Other "noise" nodes are tagged as being never
appropriate for a statement boundary (e.g., VarKill, VarDef,
Phi).

Rewrite, deadcode, cse, and nilcheck are modified to move
the statement boundaries forward whenever possible if a
boundary-tagged ssa value is removed; never-boundary nodes
are ignored in this search (some operations involving
constants are also tagged as never-boundary and also ignored
because they are likely to be moved or removed during
optimization).

Code generation treats all nodes except those explicitly
marked as statement boundaries as "not statement" nodes,
and floats statement boundaries to the beginning of each
same-line run of instructions found within a basic block.

Line number html conversion was modified to make statement
boundary nodes a bit more obvious by prepending a "+".

The code in fuse.go that glued together the value slices
of two blocks produced a result that depended on the
former capacities (not lengths) of the two slices.  This
causes differences in the 386 bootstrap, and also can
sometimes put values into an order that does a worse job
of preserving statement boundaries when values are removed.

Portions of two delve tests that had caught problems were
incorporated into ssa/debug_test.go.  There are some
opportunities to do better with optimized code, but the
next-ing is not lying or overly jumpy.

Over 4 CLs, compilebench geomean measured binary size
increase of 3.5% and compile user time increase of 3.8%
(this is after optimization to reuse a sparse map instead
of creating multiple maps.)

This CL worsens the optimized-debugging experience with
Delve; we need to work with the delve team so that
they can use the is_stmt marks that we're emitting now.

The reference output changes from time to time depending
on other changes in the compiler, sometimes better,
sometimes worse.

This CL now includes a test ensuring that 99+% of the lines
in the Go command itself (a handy optimized binary) include
is_stmt markers.

Change-Id: I359c94e06843f1eb41f9da437bd614885aa9644a
Reviewed-on: https://go-review.googlesource.com/102435
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-05-14 14:09:49 +00:00

409 lines
11 KiB
Go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/src"
"fmt"
"sort"
)
// cse does common-subexpression elimination on the Function.
// Values are just relinked, nothing is deleted. A subsequent deadcode
// pass is required to actually remove duplicate expressions.
func cse(f *Func) {
// Two values are equivalent if they satisfy the following definition:
// equivalent(v, w):
// v.op == w.op
// v.type == w.type
// v.aux == w.aux
// v.auxint == w.auxint
// len(v.args) == len(w.args)
// v.block == w.block if v.op == OpPhi
// equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
// The algorithm searches for a partition of f's values into
// equivalence classes using the above definition.
// It starts with a coarse partition and iteratively refines it
// until it reaches a fixed point.
// Make initial coarse partitions by using a subset of the conditions above.
a := make([]*Value, 0, f.NumValues())
if f.auxmap == nil {
f.auxmap = auxmap{}
}
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Type.IsMemory() {
continue // memory values can never cse
}
if f.auxmap[v.Aux] == 0 {
f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1
}
a = append(a, v)
}
}
partition := partitionValues(a, f.auxmap)
// map from value id back to eqclass id
valueEqClass := make([]ID, f.NumValues())
for _, b := range f.Blocks {
for _, v := range b.Values {
// Use negative equivalence class #s for unique values.
valueEqClass[v.ID] = -v.ID
}
}
var pNum ID = 1
for _, e := range partition {
if f.pass.debug > 1 && len(e) > 500 {
fmt.Printf("CSE.large partition (%d): ", len(e))
for j := 0; j < 3; j++ {
fmt.Printf("%s ", e[j].LongString())
}
fmt.Println()
}
for _, v := range e {
valueEqClass[v.ID] = pNum
}
if f.pass.debug > 2 && len(e) > 1 {
fmt.Printf("CSE.partition #%d:", pNum)
for _, v := range e {
fmt.Printf(" %s", v.String())
}
fmt.Printf("\n")
}
pNum++
}
// Split equivalence classes at points where they have
// non-equivalent arguments. Repeat until we can't find any
// more splits.
var splitPoints []int
byArgClass := new(partitionByArgClass) // reuseable partitionByArgClass to reduce allocations
for {
changed := false
// partition can grow in the loop. By not using a range loop here,
// we process new additions as they arrive, avoiding O(n^2) behavior.
for i := 0; i < len(partition); i++ {
e := partition[i]
if opcodeTable[e[0].Op].commutative {
// Order the first two args before comparison.
for _, v := range e {
if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] {
v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
}
}
}
// Sort by eq class of arguments.
byArgClass.a = e
byArgClass.eqClass = valueEqClass
sort.Sort(byArgClass)
// Find split points.
splitPoints = append(splitPoints[:0], 0)
for j := 1; j < len(e); j++ {
v, w := e[j-1], e[j]
// Note: commutative args already correctly ordered by byArgClass.
eqArgs := true
for k, a := range v.Args {
b := w.Args[k]
if valueEqClass[a.ID] != valueEqClass[b.ID] {
eqArgs = false
break
}
}
if !eqArgs {
splitPoints = append(splitPoints, j)
}
}
if len(splitPoints) == 1 {
continue // no splits, leave equivalence class alone.
}
// Move another equivalence class down in place of e.
partition[i] = partition[len(partition)-1]
partition = partition[:len(partition)-1]
i--
// Add new equivalence classes for the parts of e we found.
splitPoints = append(splitPoints, len(e))
for j := 0; j < len(splitPoints)-1; j++ {
f := e[splitPoints[j]:splitPoints[j+1]]
if len(f) == 1 {
// Don't add singletons.
valueEqClass[f[0].ID] = -f[0].ID
continue
}
for _, v := range f {
valueEqClass[v.ID] = pNum
}
pNum++
partition = append(partition, f)
}
changed = true
}
if !changed {
break
}
}
sdom := f.sdom()
// Compute substitutions we would like to do. We substitute v for w
// if v and w are in the same equivalence class and v dominates w.
rewrite := make([]*Value, f.NumValues())
byDom := new(partitionByDom) // reusable partitionByDom to reduce allocs
for _, e := range partition {
byDom.a = e
byDom.sdom = sdom
sort.Sort(byDom)
for i := 0; i < len(e)-1; i++ {
// e is sorted by domorder, so a maximal dominant element is first in the slice
v := e[i]
if v == nil {
continue
}
e[i] = nil
// Replace all elements of e which v dominates
for j := i + 1; j < len(e); j++ {
w := e[j]
if w == nil {
continue
}
if sdom.isAncestorEq(v.Block, w.Block) {
rewrite[w.ID] = v
e[j] = nil
} else {
// e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e
break
}
}
}
}
// if we rewrite a tuple generator to a new one in a different block,
// copy its selectors to the new generator's block, so tuple generator
// and selectors stay together.
// be careful not to copy same selectors more than once (issue 16741).
copiedSelects := make(map[ID][]*Value)
for _, b := range f.Blocks {
out:
for _, v := range b.Values {
// New values are created when selectors are copied to
// a new block. We can safely ignore those new values,
// since they have already been copied (issue 17918).
if int(v.ID) >= len(rewrite) || rewrite[v.ID] != nil {
continue
}
if v.Op != OpSelect0 && v.Op != OpSelect1 {
continue
}
if !v.Args[0].Type.IsTuple() {
f.Fatalf("arg of tuple selector %s is not a tuple: %s", v.String(), v.Args[0].LongString())
}
t := rewrite[v.Args[0].ID]
if t != nil && t.Block != b {
// v.Args[0] is tuple generator, CSE'd into a different block as t, v is left behind
for _, c := range copiedSelects[t.ID] {
if v.Op == c.Op {
// an equivalent selector is already copied
rewrite[v.ID] = c
continue out
}
}
c := v.copyInto(t.Block)
rewrite[v.ID] = c
copiedSelects[t.ID] = append(copiedSelects[t.ID], c)
}
}
}
rewrites := int64(0)
// Apply substitutions
for _, b := range f.Blocks {
for _, v := range b.Values {
for i, w := range v.Args {
if x := rewrite[w.ID]; x != nil {
if w.Pos.IsStmt() == src.PosIsStmt {
// about to lose a statement marker, w
// w is an input to v; if they're in the same block
// and the same line, v is a good-enough new statement boundary.
if w.Block == v.Block && w.Pos.Line() == v.Pos.Line() {
v.Pos = v.Pos.WithIsStmt()
w.Pos = w.Pos.WithNotStmt()
} // TODO and if this fails?
}
v.SetArg(i, x)
rewrites++
}
}
}
if v := b.Control; v != nil {
if x := rewrite[v.ID]; x != nil {
if v.Op == OpNilCheck {
// nilcheck pass will remove the nil checks and log
// them appropriately, so don't mess with them here.
continue
}
b.SetControl(x)
}
}
}
if f.pass.stats > 0 {
f.LogStat("CSE REWRITES", rewrites)
}
}
// An eqclass approximates an equivalence class. During the
// algorithm it may represent the union of several of the
// final equivalence classes.
type eqclass []*Value
// partitionValues partitions the values into equivalence classes
// based on having all the following features match:
// - opcode
// - type
// - auxint
// - aux
// - nargs
// - block # if a phi op
// - first two arg's opcodes and auxint
// - NOT first two arg's aux; that can break CSE.
// partitionValues returns a list of equivalence classes, each
// being a sorted by ID list of *Values. The eqclass slices are
// backed by the same storage as the input slice.
// Equivalence classes of size 1 are ignored.
func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
sort.Sort(sortvalues{a, auxIDs})
var partition []eqclass
for len(a) > 0 {
v := a[0]
j := 1
for ; j < len(a); j++ {
w := a[j]
if cmpVal(v, w, auxIDs) != types.CMPeq {
break
}
}
if j > 1 {
partition = append(partition, a[:j])
}
a = a[j:]
}
return partition
}
func lt2Cmp(isLt bool) types.Cmp {
if isLt {
return types.CMPlt
}
return types.CMPgt
}
type auxmap map[interface{}]int32
func cmpVal(v, w *Value, auxIDs auxmap) types.Cmp {
// Try to order these comparison by cost (cheaper first)
if v.Op != w.Op {
return lt2Cmp(v.Op < w.Op)
}
if v.AuxInt != w.AuxInt {
return lt2Cmp(v.AuxInt < w.AuxInt)
}
if len(v.Args) != len(w.Args) {
return lt2Cmp(len(v.Args) < len(w.Args))
}
if v.Op == OpPhi && v.Block != w.Block {
return lt2Cmp(v.Block.ID < w.Block.ID)
}
if v.Type.IsMemory() {
// We will never be able to CSE two values
// that generate memory.
return lt2Cmp(v.ID < w.ID)
}
// OpSelect is a pseudo-op. We need to be more aggressive
// regarding CSE to keep multiple OpSelect's of the same
// argument from existing.
if v.Op != OpSelect0 && v.Op != OpSelect1 {
if tc := v.Type.Compare(w.Type); tc != types.CMPeq {
return tc
}
}
if v.Aux != w.Aux {
if v.Aux == nil {
return types.CMPlt
}
if w.Aux == nil {
return types.CMPgt
}
return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
}
return types.CMPeq
}
// Sort values to make the initial partition.
type sortvalues struct {
a []*Value // array of values
auxIDs auxmap // aux -> aux ID map
}
func (sv sortvalues) Len() int { return len(sv.a) }
func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
func (sv sortvalues) Less(i, j int) bool {
v := sv.a[i]
w := sv.a[j]
if cmp := cmpVal(v, w, sv.auxIDs); cmp != types.CMPeq {
return cmp == types.CMPlt
}
// Sort by value ID last to keep the sort result deterministic.
return v.ID < w.ID
}
type partitionByDom struct {
a []*Value // array of values
sdom SparseTree
}
func (sv partitionByDom) Len() int { return len(sv.a) }
func (sv partitionByDom) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
func (sv partitionByDom) Less(i, j int) bool {
v := sv.a[i]
w := sv.a[j]
return sv.sdom.domorder(v.Block) < sv.sdom.domorder(w.Block)
}
type partitionByArgClass struct {
a []*Value // array of values
eqClass []ID // equivalence class IDs of values
}
func (sv partitionByArgClass) Len() int { return len(sv.a) }
func (sv partitionByArgClass) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
func (sv partitionByArgClass) Less(i, j int) bool {
v := sv.a[i]
w := sv.a[j]
for i, a := range v.Args {
b := w.Args[i]
if sv.eqClass[a.ID] < sv.eqClass[b.ID] {
return true
}
if sv.eqClass[a.ID] > sv.eqClass[b.ID] {
return false
}
}
return false
}