2016-11-10 16:03:47 -05:00
|
|
|
// Copyright 2016 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
import (
|
|
|
|
|
"cmd/compile/internal/types"
|
|
|
|
|
"fmt"
|
|
|
|
|
)
|
2016-11-10 16:03:47 -05:00
|
|
|
|
2017-02-02 11:53:41 -05:00
|
|
|
// an edgeMem records a backedge, together with the memory
|
|
|
|
|
// phi functions at the target of the backedge that must
|
2016-11-10 16:03:47 -05:00
|
|
|
// be updated when a rescheduling check replaces the backedge.
|
2017-02-02 11:53:41 -05:00
|
|
|
type edgeMem struct {
|
2016-11-10 16:03:47 -05:00
|
|
|
e Edge
|
|
|
|
|
m *Value // phi for memory at dest of e
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// a rewriteTarget is a a value-argindex pair indicating
|
|
|
|
|
// where a rewrite is applied. Note that this is for values,
|
|
|
|
|
// not for block controls, because block controls are not targets
|
|
|
|
|
// for the rewrites performed in inserting rescheduling checks.
|
|
|
|
|
type rewriteTarget struct {
|
|
|
|
|
v *Value
|
|
|
|
|
i int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type rewrite struct {
|
|
|
|
|
before, after *Value // before is the expected value before rewrite, after is the new value installed.
|
|
|
|
|
rewrites []rewriteTarget // all the targets for this rewrite.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (r *rewrite) String() string {
|
|
|
|
|
s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
|
|
|
|
|
for _, rw := range r.rewrites {
|
|
|
|
|
s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
|
|
|
|
|
}
|
|
|
|
|
s += "\n"
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// insertLoopReschedChecks inserts rescheduling checks on loop backedges.
|
|
|
|
|
func insertLoopReschedChecks(f *Func) {
|
|
|
|
|
// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
|
|
|
|
|
|
2017-02-02 11:53:41 -05:00
|
|
|
// Loop reschedule checks compare the stack pointer with
|
|
|
|
|
// the per-g stack bound. If the pointer appears invalid,
|
|
|
|
|
// that means a reschedule check is needed.
|
2016-11-10 16:03:47 -05:00
|
|
|
//
|
|
|
|
|
// Steps:
|
|
|
|
|
// 1. locate backedges.
|
|
|
|
|
// 2. Record memory definitions at block end so that
|
2017-02-02 11:53:41 -05:00
|
|
|
// the SSA graph for mem can be properly modified.
|
|
|
|
|
// 3. Ensure that phi functions that will-be-needed for mem
|
2016-11-10 16:03:47 -05:00
|
|
|
// are present in the graph, initially with trivial inputs.
|
2017-02-02 11:53:41 -05:00
|
|
|
// 4. Record all to-be-modified uses of mem;
|
2016-11-10 16:03:47 -05:00
|
|
|
// apply modifications (split into two steps to simplify and
|
|
|
|
|
// avoided nagging order-dependences).
|
2017-02-02 11:53:41 -05:00
|
|
|
// 5. Rewrite backedges to include reschedule check,
|
2016-11-10 16:03:47 -05:00
|
|
|
// and modify destination phi function appropriately with new
|
2017-02-02 11:53:41 -05:00
|
|
|
// definitions for mem.
|
2016-11-10 16:03:47 -05:00
|
|
|
|
|
|
|
|
if f.NoSplit { // nosplit functions don't reschedule.
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
backedges := backedges(f)
|
|
|
|
|
if len(backedges) == 0 { // no backedges means no rescheduling checks.
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lastMems := findLastMems(f)
|
|
|
|
|
|
|
|
|
|
idom := f.Idom()
|
2017-06-14 17:28:28 -04:00
|
|
|
po := f.postorder()
|
|
|
|
|
// The ordering in the dominator tree matters; it's important that
|
|
|
|
|
// the walk of the dominator tree also be a preorder (i.e., a node is
|
|
|
|
|
// visited only after all its non-backedge predecessors have been visited).
|
|
|
|
|
sdom := newSparseOrderedTree(f, idom, po)
|
2016-11-10 16:03:47 -05:00
|
|
|
|
2017-06-14 17:28:28 -04:00
|
|
|
if f.pass.debug > 1 {
|
2016-11-10 16:03:47 -05:00
|
|
|
fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-02 11:53:41 -05:00
|
|
|
tofixBackedges := []edgeMem{}
|
2016-11-10 16:03:47 -05:00
|
|
|
|
|
|
|
|
for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
|
2017-02-02 11:53:41 -05:00
|
|
|
tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// It's possible that there is no memory state (no global/pointer loads/stores or calls)
|
|
|
|
|
if lastMems[f.Entry.ID] == nil {
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
|
|
|
|
|
|
|
|
|
|
// Propagate last mem definitions forward through successor blocks.
|
|
|
|
|
for i := len(po) - 1; i >= 0; i-- {
|
|
|
|
|
b := po[i]
|
|
|
|
|
mem := lastMems[b.ID]
|
|
|
|
|
for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
|
|
|
|
|
// loop because there might be backedges that haven't been visited yet.
|
|
|
|
|
mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
|
|
|
|
|
}
|
|
|
|
|
memDefsAtBlockEnds[b.ID] = mem
|
2017-06-14 17:28:28 -04:00
|
|
|
if f.pass.debug > 2 {
|
|
|
|
|
fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
|
|
|
|
|
}
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Maps from block to newly-inserted phi function in block.
|
|
|
|
|
newmemphis := make(map[*Block]rewrite)
|
|
|
|
|
|
|
|
|
|
// Insert phi functions as necessary for future changes to flow graph.
|
|
|
|
|
for i, emc := range tofixBackedges {
|
|
|
|
|
e := emc.e
|
|
|
|
|
h := e.b
|
|
|
|
|
|
|
|
|
|
// find the phi function for the memory input at "h", if there is one.
|
|
|
|
|
var headerMemPhi *Value // look for header mem phi
|
|
|
|
|
|
|
|
|
|
for _, v := range h.Values {
|
|
|
|
|
if v.Op == OpPhi && v.Type.IsMemory() {
|
|
|
|
|
headerMemPhi = v
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if headerMemPhi == nil {
|
|
|
|
|
// if the header is nil, make a trivial phi from the dominator
|
|
|
|
|
mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
|
|
|
|
|
headerMemPhi = newPhiFor(h, mem0)
|
|
|
|
|
newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
|
2017-06-14 17:28:28 -04:00
|
|
|
addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)
|
2016-11-10 16:03:47 -05:00
|
|
|
|
|
|
|
|
}
|
|
|
|
|
tofixBackedges[i].m = headerMemPhi
|
|
|
|
|
|
|
|
|
|
}
|
2017-06-14 17:28:28 -04:00
|
|
|
if f.pass.debug > 0 {
|
|
|
|
|
for b, r := range newmemphis {
|
|
|
|
|
fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dfPhiTargets notes inputs to phis in dominance frontiers that should not
|
|
|
|
|
// be rewritten as part of the dominated children of some outer rewrite.
|
|
|
|
|
dfPhiTargets := make(map[rewriteTarget]bool)
|
2016-11-10 16:03:47 -05:00
|
|
|
|
2017-06-14 17:28:28 -04:00
|
|
|
rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)
|
2016-11-10 16:03:47 -05:00
|
|
|
|
|
|
|
|
if f.pass.debug > 0 {
|
|
|
|
|
for b, r := range newmemphis {
|
2017-06-14 17:28:28 -04:00
|
|
|
fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Apply collected rewrites.
|
|
|
|
|
for _, r := range newmemphis {
|
|
|
|
|
for _, rw := range r.rewrites {
|
|
|
|
|
rw.v.SetArg(rw.i, r.after)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rewrite backedges to include reschedule checks.
|
|
|
|
|
for _, emc := range tofixBackedges {
|
|
|
|
|
e := emc.e
|
|
|
|
|
headerMemPhi := emc.m
|
|
|
|
|
h := e.b
|
|
|
|
|
i := e.i
|
|
|
|
|
p := h.Preds[i]
|
|
|
|
|
bb := p.b
|
|
|
|
|
mem0 := headerMemPhi.Args[i]
|
|
|
|
|
// bb e->p h,
|
|
|
|
|
// Because we're going to insert a rare-call, make sure the
|
|
|
|
|
// looping edge still looks likely.
|
|
|
|
|
likely := BranchLikely
|
|
|
|
|
if p.i != 0 {
|
|
|
|
|
likely = BranchUnlikely
|
|
|
|
|
}
|
|
|
|
|
bb.Likely = likely
|
|
|
|
|
|
|
|
|
|
// rewrite edge to include reschedule check
|
|
|
|
|
// existing edges:
|
|
|
|
|
//
|
|
|
|
|
// bb.Succs[p.i] == Edge{h, i}
|
|
|
|
|
// h.Preds[i] == p == Edge{bb,p.i}
|
|
|
|
|
//
|
|
|
|
|
// new block(s):
|
|
|
|
|
// test:
|
2017-02-02 11:53:41 -05:00
|
|
|
// if sp < g.limit { goto sched }
|
2016-11-10 16:03:47 -05:00
|
|
|
// goto join
|
|
|
|
|
// sched:
|
|
|
|
|
// mem1 := call resched (mem0)
|
|
|
|
|
// goto join
|
|
|
|
|
// join:
|
|
|
|
|
// mem2 := phi(mem0, mem1)
|
|
|
|
|
// goto h
|
|
|
|
|
//
|
|
|
|
|
// and correct arg i of headerMemPhi and headerCtrPhi
|
|
|
|
|
//
|
2017-02-02 11:53:41 -05:00
|
|
|
// EXCEPT: join block containing only phi functions is bad
|
2016-11-10 16:03:47 -05:00
|
|
|
// for the register allocator. Therefore, there is no
|
2017-02-02 11:53:41 -05:00
|
|
|
// join, and branches targeting join must instead target
|
2016-11-10 16:03:47 -05:00
|
|
|
// the header, and the other phi functions within header are
|
|
|
|
|
// adjusted for the additional input.
|
|
|
|
|
|
|
|
|
|
test := f.NewBlock(BlockIf)
|
|
|
|
|
sched := f.NewBlock(BlockPlain)
|
|
|
|
|
|
2017-02-01 09:35:27 -05:00
|
|
|
test.Pos = bb.Pos
|
|
|
|
|
sched.Pos = bb.Pos
|
2016-11-10 16:03:47 -05:00
|
|
|
|
2017-02-02 11:53:41 -05:00
|
|
|
// if sp < g.limit { goto sched }
|
|
|
|
|
// goto header
|
|
|
|
|
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
cfgtypes := &f.Config.Types
|
|
|
|
|
pt := cfgtypes.Uintptr
|
2017-02-02 11:53:41 -05:00
|
|
|
g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
|
|
|
|
|
sp := test.NewValue0(bb.Pos, OpSP, pt)
|
|
|
|
|
cmpOp := OpLess64U
|
2017-04-28 00:19:49 +00:00
|
|
|
if pt.Size() == 4 {
|
2017-02-02 11:53:41 -05:00
|
|
|
cmpOp = OpLess32U
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
|
2017-02-02 11:53:41 -05:00
|
|
|
lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
|
2016-11-10 16:03:47 -05:00
|
|
|
test.SetControl(cmp)
|
2017-02-02 11:53:41 -05:00
|
|
|
|
|
|
|
|
// if true, goto sched
|
|
|
|
|
test.AddEdgeTo(sched)
|
|
|
|
|
|
|
|
|
|
// if false, rewrite edge to header.
|
2016-11-10 16:03:47 -05:00
|
|
|
// do NOT remove+add, because that will perturb all the other phi functions
|
|
|
|
|
// as well as messing up other edges to the header.
|
|
|
|
|
test.Succs = append(test.Succs, Edge{h, i})
|
|
|
|
|
h.Preds[i] = Edge{test, 1}
|
|
|
|
|
headerMemPhi.SetArg(i, mem0)
|
|
|
|
|
|
|
|
|
|
test.Likely = BranchUnlikely
|
|
|
|
|
|
|
|
|
|
// sched:
|
|
|
|
|
// mem1 := call resched (mem0)
|
|
|
|
|
// goto header
|
2017-03-16 22:42:10 -07:00
|
|
|
resched := f.fe.Syslook("goschedguarded")
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
mem1 := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeMem, resched, mem0)
|
2016-11-10 16:03:47 -05:00
|
|
|
sched.AddEdgeTo(h)
|
|
|
|
|
headerMemPhi.AddArg(mem1)
|
|
|
|
|
|
|
|
|
|
bb.Succs[p.i] = Edge{test, 0}
|
|
|
|
|
test.Preds = append(test.Preds, Edge{bb, p.i})
|
|
|
|
|
|
|
|
|
|
// Must correct all the other phi functions in the header for new incoming edge.
|
2017-02-02 11:53:41 -05:00
|
|
|
// Except for mem phis, it will be the same value seen on the original
|
2016-11-10 16:03:47 -05:00
|
|
|
// backedge at index i.
|
|
|
|
|
for _, v := range h.Values {
|
2017-02-02 11:53:41 -05:00
|
|
|
if v.Op == OpPhi && v != headerMemPhi {
|
2016-11-10 16:03:47 -05:00
|
|
|
v.AddArg(v.Args[i])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
f.invalidateCFG()
|
|
|
|
|
|
2017-06-14 17:28:28 -04:00
|
|
|
if f.pass.debug > 1 {
|
2016-11-10 16:03:47 -05:00
|
|
|
sdom = newSparseTree(f, f.Idom())
|
|
|
|
|
fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// newPhiFor inserts a new Phi function into b,
|
|
|
|
|
// with all inputs set to v.
|
|
|
|
|
func newPhiFor(b *Block, v *Value) *Value {
|
2017-02-01 09:35:27 -05:00
|
|
|
phiV := b.NewValue0(b.Pos, OpPhi, v.Type)
|
2016-11-10 16:03:47 -05:00
|
|
|
|
|
|
|
|
for range b.Preds {
|
|
|
|
|
phiV.AddArg(v)
|
|
|
|
|
}
|
|
|
|
|
return phiV
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
|
|
|
|
|
// in block h will replace a previous definition. Block b is the block currently being processed;
|
|
|
|
|
// if b has its own phi definition then it takes the place of h.
|
|
|
|
|
// defsForUses provides information about other definitions of the variable that are present
|
|
|
|
|
// (and if nil, indicates that the variable is no longer live)
|
2017-06-14 17:28:28 -04:00
|
|
|
// sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
|
|
|
|
|
// The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
|
|
|
|
|
// requirement.
|
|
|
|
|
func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
|
2016-11-10 16:03:47 -05:00
|
|
|
// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
|
|
|
|
|
if _, ok := newphis[b]; ok {
|
|
|
|
|
h = b
|
|
|
|
|
}
|
|
|
|
|
change := newphis[h]
|
|
|
|
|
x := change.before
|
|
|
|
|
y := change.after
|
|
|
|
|
|
|
|
|
|
// Apply rewrites to this block
|
|
|
|
|
if x != nil { // don't waste time on the common case of no definition.
|
|
|
|
|
p := &change.rewrites
|
|
|
|
|
for _, v := range b.Values {
|
|
|
|
|
if v == y { // don't rewrite self -- phi inputs are handled below.
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for i, w := range v.Args {
|
|
|
|
|
if w != x {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2017-06-14 17:28:28 -04:00
|
|
|
tgt := rewriteTarget{v, i}
|
|
|
|
|
|
|
|
|
|
// It's possible dominated control flow will rewrite this instead.
|
|
|
|
|
// Visiting in preorder (a property of how sdom was constructed)
|
|
|
|
|
// ensures that these are seen in the proper order.
|
|
|
|
|
if dfPhiTargets[tgt] {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
*p = append(*p, tgt)
|
|
|
|
|
if f.pass.debug > 1 {
|
|
|
|
|
fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
|
|
|
|
|
h, b, x, y, v, i)
|
|
|
|
|
}
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rewrite appropriate inputs of phis reached in successors
|
|
|
|
|
// in dominance frontier, self, and dominated.
|
|
|
|
|
// If the variable def reaching uses in b is itself defined in b, then the new phi function
|
|
|
|
|
// does not reach the successors of b. (This assumes a bit about the structure of the
|
2017-02-02 11:53:41 -05:00
|
|
|
// phi use-def graph, but it's true for memory.)
|
2016-11-10 16:03:47 -05:00
|
|
|
if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
|
|
|
|
|
for _, e := range b.Succs {
|
|
|
|
|
s := e.b
|
2017-06-14 17:28:28 -04:00
|
|
|
|
2016-11-10 16:03:47 -05:00
|
|
|
for _, v := range s.Values {
|
|
|
|
|
if v.Op == OpPhi && v.Args[e.i] == x {
|
2017-06-14 17:28:28 -04:00
|
|
|
tgt := rewriteTarget{v, e.i}
|
|
|
|
|
*p = append(*p, tgt)
|
|
|
|
|
dfPhiTargets[tgt] = true
|
|
|
|
|
if f.pass.debug > 1 {
|
|
|
|
|
fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
|
|
|
|
|
h, b, s, x, y, v.LongString(), e.i)
|
|
|
|
|
}
|
2016-11-10 16:03:47 -05:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
newphis[h] = change
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
|
2017-06-14 17:28:28 -04:00
|
|
|
rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
|
|
|
|
|
// a new definition for variable "x" inserted at h (usually but not necessarily a phi).
|
|
|
|
|
// These new phis can only occur at the dominance frontier of h; block s is in the dominance
|
|
|
|
|
// frontier of h if h does not strictly dominate s and if s is a successor of a block b where
|
|
|
|
|
// either b = h or h strictly dominates b.
|
|
|
|
|
// These newly created phis are themselves new definitions that may require addition of their
|
|
|
|
|
// own trivial phi functions in their own dominance frontier, and this is handled recursively.
|
2017-06-14 17:28:28 -04:00
|
|
|
func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) {
|
2016-11-10 16:03:47 -05:00
|
|
|
oldv := defForUses[b.ID]
|
|
|
|
|
if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
idom := f.Idom()
|
|
|
|
|
outer:
|
|
|
|
|
for _, e := range b.Succs {
|
|
|
|
|
s := e.b
|
|
|
|
|
// check phi functions in the dominance frontier
|
|
|
|
|
if sdom.isAncestor(h, s) {
|
|
|
|
|
continue // h dominates s, successor of b, therefore s is not in the frontier.
|
|
|
|
|
}
|
|
|
|
|
if _, ok := newphis[s]; ok {
|
|
|
|
|
continue // successor s of b already has a new phi function, so there is no need to add another.
|
|
|
|
|
}
|
|
|
|
|
if x != nil {
|
|
|
|
|
for _, v := range s.Values {
|
|
|
|
|
if v.Op == OpPhi && v.Args[e.i] == x {
|
|
|
|
|
continue outer // successor s of b has an old phi function, so there is no need to add another.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
|
|
|
|
|
headerPhi := newPhiFor(s, old)
|
|
|
|
|
// the new phi will replace "old" in block s and all blocks dominated by s.
|
|
|
|
|
newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
|
2017-06-14 17:28:28 -04:00
|
|
|
addDFphis(old, s, s, f, defForUses, newphis, sdom) // the new definition may also create new phi functions.
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
|
2017-06-14 17:28:28 -04:00
|
|
|
addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
|
2016-11-10 16:03:47 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// findLastMems maps block ids to last memory-output op in a block, if any
|
|
|
|
|
func findLastMems(f *Func) []*Value {
|
|
|
|
|
|
|
|
|
|
var stores []*Value
|
|
|
|
|
lastMems := make([]*Value, f.NumBlocks())
|
|
|
|
|
storeUse := f.newSparseSet(f.NumValues())
|
|
|
|
|
defer f.retSparseSet(storeUse)
|
|
|
|
|
for _, b := range f.Blocks {
|
|
|
|
|
// Find all the stores in this block. Categorize their uses:
|
|
|
|
|
// storeUse contains stores which are used by a subsequent store.
|
|
|
|
|
storeUse.clear()
|
|
|
|
|
stores = stores[:0]
|
|
|
|
|
var memPhi *Value
|
|
|
|
|
for _, v := range b.Values {
|
|
|
|
|
if v.Op == OpPhi {
|
|
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
memPhi = v
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
stores = append(stores, v)
|
|
|
|
|
for _, a := range v.Args {
|
|
|
|
|
if a.Block == b && a.Type.IsMemory() {
|
|
|
|
|
storeUse.add(a.ID)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(stores) == 0 {
|
|
|
|
|
lastMems[b.ID] = memPhi
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// find last store in the block
|
|
|
|
|
var last *Value
|
|
|
|
|
for _, v := range stores {
|
|
|
|
|
if storeUse.contains(v.ID) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if last != nil {
|
|
|
|
|
b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
|
|
|
|
|
}
|
|
|
|
|
last = v
|
|
|
|
|
}
|
|
|
|
|
if last == nil {
|
|
|
|
|
b.Fatalf("no last store found - cycle?")
|
|
|
|
|
}
|
|
|
|
|
lastMems[b.ID] = last
|
|
|
|
|
}
|
|
|
|
|
return lastMems
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type backedgesState struct {
|
|
|
|
|
b *Block
|
|
|
|
|
i int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// backedges returns a slice of successor edges that are back
|
|
|
|
|
// edges. For reducible loops, edge.b is the header.
|
|
|
|
|
func backedges(f *Func) []Edge {
|
|
|
|
|
edges := []Edge{}
|
|
|
|
|
mark := make([]markKind, f.NumBlocks())
|
|
|
|
|
stack := []backedgesState{}
|
|
|
|
|
|
|
|
|
|
mark[f.Entry.ID] = notExplored
|
|
|
|
|
stack = append(stack, backedgesState{f.Entry, 0})
|
|
|
|
|
|
|
|
|
|
for len(stack) > 0 {
|
|
|
|
|
l := len(stack)
|
|
|
|
|
x := stack[l-1]
|
|
|
|
|
if x.i < len(x.b.Succs) {
|
|
|
|
|
e := x.b.Succs[x.i]
|
|
|
|
|
stack[l-1].i++
|
|
|
|
|
s := e.b
|
|
|
|
|
if mark[s.ID] == notFound {
|
|
|
|
|
mark[s.ID] = notExplored
|
|
|
|
|
stack = append(stack, backedgesState{s, 0})
|
|
|
|
|
} else if mark[s.ID] == notExplored {
|
|
|
|
|
edges = append(edges, e)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
mark[x.b.ID] = done
|
|
|
|
|
stack = stack[0 : l-1]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return edges
|
|
|
|
|
}
|