go/src/cmd/compile/internal/ssa/cse.go

197 lines
5.6 KiB
Go
Raw Normal View History

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import "sort"
// cse does common-subexpression elimination on the Function.
// Values are just relinked, nothing is deleted. A subsequent deadcode
// pass is required to actually remove duplicate expressions.
func cse(f *Func) {
// Two values are equivalent if they satisfy the following definition:
// equivalent(v, w):
// v.op == w.op
// v.type == w.type
// v.aux == w.aux
// v.auxint == w.auxint
// len(v.args) == len(w.args)
[dev.ssa] cmd/compile: don't combine phi vars from different blocks in CSE Here is a concrete case in which this goes wrong. func f_ssa() int { var n int Next: for j := 0; j < 3; j++ { for i := 0; i < 10; i++ { if i == 6 { continue Next } n = i } n += j + j + j + j + j + j + j + j + j + j // j * 10 } return n } What follows is the function printout before and after CSE. Note blocks b8 and b10 in the before case. b8 is the inner loop's condition: i < 10. b10 is the inner loop's increment: i++. v82 is i. On entry to b8, it is either 0 (v19) the first time, or the result of incrementing v82, by way of v29. The CSE pass considered v82 and v49 to be common subexpressions, and eliminated v82 in favor of v49. In the after case, v82 is now dead and will shortly be eliminated. As a result, v29 is also dead, and we have lost the increment. The loop runs forever. BEFORE CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v15 v44 v68 = Phi <int> v14 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v82 v82 = Phi <int> v19 v29 v22 = Less <bool> v82 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v82 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v82 v28 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 AFTER CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v19 v44 v68 = Phi <int> v19 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v49 v82 = Phi <int> v19 v29 v22 = Less <bool> v49 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v49 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v49 v43 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 Change-Id: I16fc4ec527ec63f24f7d0d79d1a4a59bf37269de Reviewed-on: https://go-review.googlesource.com/12444 Reviewed-by: Keith Randall <khr@golang.org>
2015-07-20 18:50:17 -07:00
// v.block == w.block if v.op == OpPhi
// equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
// The algorithm searches for a partition of f's values into
// equivalence classes using the above definition.
// It starts with a coarse partition and iteratively refines it
// until it reaches a fixed point.
// Make initial partition based on opcode, type-name, aux, auxint, nargs, phi-block, and the ops of v's first args
type key struct {
op Op
typ string
aux interface{}
auxint int64
nargs int
[dev.ssa] cmd/compile: don't combine phi vars from different blocks in CSE Here is a concrete case in which this goes wrong. func f_ssa() int { var n int Next: for j := 0; j < 3; j++ { for i := 0; i < 10; i++ { if i == 6 { continue Next } n = i } n += j + j + j + j + j + j + j + j + j + j // j * 10 } return n } What follows is the function printout before and after CSE. Note blocks b8 and b10 in the before case. b8 is the inner loop's condition: i < 10. b10 is the inner loop's increment: i++. v82 is i. On entry to b8, it is either 0 (v19) the first time, or the result of incrementing v82, by way of v29. The CSE pass considered v82 and v49 to be common subexpressions, and eliminated v82 in favor of v49. In the after case, v82 is now dead and will shortly be eliminated. As a result, v29 is also dead, and we have lost the increment. The loop runs forever. BEFORE CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v15 v44 v68 = Phi <int> v14 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v82 v82 = Phi <int> v19 v29 v22 = Less <bool> v82 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v82 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v82 v28 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 AFTER CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v19 v44 v68 = Phi <int> v19 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v49 v82 = Phi <int> v19 v29 v22 = Less <bool> v49 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v49 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v49 v43 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 Change-Id: I16fc4ec527ec63f24f7d0d79d1a4a59bf37269de Reviewed-on: https://go-review.googlesource.com/12444 Reviewed-by: Keith Randall <khr@golang.org>
2015-07-20 18:50:17 -07:00
block ID // block id for phi vars, -1 otherwise
arg0op Op // v.Args[0].Op if len(v.Args) > 0, OpInvalid otherwise
arg1op Op // v.Args[1].Op if len(v.Args) > 1, OpInvalid otherwise
}
m := map[key]eqclass{}
for _, b := range f.Blocks {
for _, v := range b.Values {
[dev.ssa] cmd/compile: don't combine phi vars from different blocks in CSE Here is a concrete case in which this goes wrong. func f_ssa() int { var n int Next: for j := 0; j < 3; j++ { for i := 0; i < 10; i++ { if i == 6 { continue Next } n = i } n += j + j + j + j + j + j + j + j + j + j // j * 10 } return n } What follows is the function printout before and after CSE. Note blocks b8 and b10 in the before case. b8 is the inner loop's condition: i < 10. b10 is the inner loop's increment: i++. v82 is i. On entry to b8, it is either 0 (v19) the first time, or the result of incrementing v82, by way of v29. The CSE pass considered v82 and v49 to be common subexpressions, and eliminated v82 in favor of v49. In the after case, v82 is now dead and will shortly be eliminated. As a result, v29 is also dead, and we have lost the increment. The loop runs forever. BEFORE CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v15 v44 v68 = Phi <int> v14 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v82 v82 = Phi <int> v19 v29 v22 = Less <bool> v82 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v82 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v82 v28 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 AFTER CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v19 v44 v68 = Phi <int> v19 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v49 v82 = Phi <int> v19 v29 v22 = Less <bool> v49 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v49 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v49 v43 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 Change-Id: I16fc4ec527ec63f24f7d0d79d1a4a59bf37269de Reviewed-on: https://go-review.googlesource.com/12444 Reviewed-by: Keith Randall <khr@golang.org>
2015-07-20 18:50:17 -07:00
bid := ID(-1)
if v.Op == OpPhi {
bid = b.ID
}
arg0op := OpInvalid
if len(v.Args) > 0 {
arg0op = v.Args[0].Op
}
arg1op := OpInvalid
if len(v.Args) > 1 {
arg1op = v.Args[1].Op
}
// This assumes that floats are stored in AuxInt
// instead of Aux. If not, then we need to use the
// float bits as part of the key, otherwise since 0.0 == -0.0
// this would incorrectly treat 0.0 and -0.0 as identical values
k := key{v.Op, v.Type.String(), v.Aux, v.AuxInt, len(v.Args), bid, arg0op, arg1op}
m[k] = append(m[k], v)
}
}
// A partition is a set of disjoint eqclasses.
var partition []eqclass
for _, v := range m {
partition = append(partition, v)
}
[dev.ssa] cmd/compile: don't combine phi vars from different blocks in CSE Here is a concrete case in which this goes wrong. func f_ssa() int { var n int Next: for j := 0; j < 3; j++ { for i := 0; i < 10; i++ { if i == 6 { continue Next } n = i } n += j + j + j + j + j + j + j + j + j + j // j * 10 } return n } What follows is the function printout before and after CSE. Note blocks b8 and b10 in the before case. b8 is the inner loop's condition: i < 10. b10 is the inner loop's increment: i++. v82 is i. On entry to b8, it is either 0 (v19) the first time, or the result of incrementing v82, by way of v29. The CSE pass considered v82 and v49 to be common subexpressions, and eliminated v82 in favor of v49. In the after case, v82 is now dead and will shortly be eliminated. As a result, v29 is also dead, and we have lost the increment. The loop runs forever. BEFORE CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v15 v44 v68 = Phi <int> v14 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v82 v82 = Phi <int> v19 v29 v22 = Less <bool> v82 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v82 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v82 v28 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 AFTER CSE f_ssa <nil> b1: v1 = Arg <mem> v2 = SP <uint64> v4 = Addr <*int> {~r0} v2 v13 = Zero <mem> [8] v4 v1 v14 = Const <int> v15 = Const <int> v17 = Const <int> [3] v19 = Const <int> v21 = Const <int> [10] v24 = Const <int> [6] v28 = Const <int> [1] v43 = Const <int> [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi <int> v19 v44 v68 = Phi <int> v19 v67 v81 = Phi <mem> v13 v81 v18 = Less <bool> v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi <int> v66 v41 v44 = Add <int> v49 v43 Plain -> b4 b7: <- b4 v47 = Store <mem> v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi <int> v68 v49 v82 = Phi <int> v19 v29 v22 = Less <bool> v49 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq <bool> v49 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add <int> v49 v43 Plain -> b8 b11: <- b8 v32 = Add <int> v49 v49 v33 = Add <int> v32 v49 v34 = Add <int> v33 v49 v35 = Add <int> v34 v49 v36 = Add <int> v35 v49 v37 = Add <int> v36 v49 v38 = Add <int> v37 v49 v39 = Add <int> v38 v49 v40 = Add <int> v39 v49 v41 = Add <int> v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 Change-Id: I16fc4ec527ec63f24f7d0d79d1a4a59bf37269de Reviewed-on: https://go-review.googlesource.com/12444 Reviewed-by: Keith Randall <khr@golang.org>
2015-07-20 18:50:17 -07:00
// TODO: Sort partition here for perfect reproducibility?
// Sort by what? Partition size?
// (Could that improve efficiency by discovering splits earlier?)
// map from value id back to eqclass id
valueEqClass := make([]int, f.NumValues())
for i, e := range partition {
for _, v := range e {
valueEqClass[v.ID] = i
}
}
// Find an equivalence class where some members of the class have
// non-equivalent arguments. Split the equivalence class appropriately.
// Repeat until we can't find any more splits.
for {
changed := false
// partition can grow in the loop. By not using a range loop here,
// we process new additions as they arrive, avoiding O(n^2) behavior.
for i := 0; i < len(partition); i++ {
e := partition[i]
v := e[0]
// all values in this equiv class that are not equivalent to v get moved
// into another equiv class.
// To avoid allocating while building that equivalence class,
// move the values equivalent to v to the beginning of e,
// other values to the end of e, and track where the split is.
allvals := e
split := len(e)
eqloop:
for j := 1; j < len(e); {
w := e[j]
for i := 0; i < len(v.Args); i++ {
if valueEqClass[v.Args[i].ID] != valueEqClass[w.Args[i].ID] || !v.Type.Equal(w.Type) {
// w is not equivalent to v.
// move it to the end, shrink e, and move the split.
e[j], e[len(e)-1] = e[len(e)-1], e[j]
e = e[:len(e)-1]
split--
valueEqClass[w.ID] = len(partition)
changed = true
continue eqloop
}
}
// v and w are equivalent. Keep w in e.
j++
}
partition[i] = e
if split < len(allvals) {
partition = append(partition, allvals[split:])
}
}
if !changed {
break
}
}
// Compute dominator tree
idom := dominators(f)
sdom := newSparseTree(f, idom)
// Compute substitutions we would like to do. We substitute v for w
// if v and w are in the same equivalence class and v dominates w.
rewrite := make([]*Value, f.NumValues())
for _, e := range partition {
sort.Sort(e) // ensure deterministic ordering
for len(e) > 1 {
// Find a maximal dominant element in e
v := e[0]
for _, w := range e[1:] {
if sdom.isAncestorEq(w.Block, v.Block) {
v = w
}
}
// Replace all elements of e which v dominates
for i := 0; i < len(e); {
w := e[i]
if w == v {
e, e[i] = e[:len(e)-1], e[len(e)-1]
} else if sdom.isAncestorEq(v.Block, w.Block) {
rewrite[w.ID] = v
e, e[i] = e[:len(e)-1], e[len(e)-1]
} else {
i++
}
}
// TODO(khr): if value is a control value, do we need to keep it block-local?
}
}
// Apply substitutions
for _, b := range f.Blocks {
for _, v := range b.Values {
for i, w := range v.Args {
if x := rewrite[w.ID]; x != nil {
v.SetArg(i, x)
}
}
}
}
}
// returns true if b dominates c.
// simple and iterative, has O(depth) complexity in tall trees.
func dom(b, c *Block, idom []*Block) bool {
// Walk up from c in the dominator tree looking for b.
for c != nil {
if c == b {
return true
}
c = idom[c.ID]
}
// Reached the entry block, never saw b.
return false
}
// An eqclass approximates an equivalence class. During the
// algorithm it may represent the union of several of the
// final equivalence classes.
type eqclass []*Value
// Sort an equivalence class by value ID.
func (e eqclass) Len() int { return len(e) }
func (e eqclass) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e eqclass) Less(i, j int) bool { return e[i].ID < e[j].ID }