go/src/cmd/compile/internal/ssa/trim.go

173 lines
4.2 KiB
Go
Raw Normal View History

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import "cmd/internal/src"
// trim removes blocks with no code in them.
// These blocks were inserted to remove critical edges.
func trim(f *Func) {
n := 0
for _, b := range f.Blocks {
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
if !trimmableBlock(b) {
f.Blocks[n] = b
n++
continue
}
bPos := b.Pos
bIsStmt := bPos.IsStmt() == src.PosIsStmt
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
// Splice b out of the graph. NOTE: `mergePhi` depends on the
// order, in which the predecessors edges are merged here.
p, i := b.Preds[0].b, b.Preds[0].i
s, j := b.Succs[0].b, b.Succs[0].i
ns := len(s.Preds)
p.Succs[i] = Edge{s, j}
s.Preds[j] = Edge{p, i}
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
for _, e := range b.Preds[1:] {
p, i := e.b, e.i
p.Succs[i] = Edge{s, len(s.Preds)}
s.Preds = append(s.Preds, Edge{p, i})
}
// Attempt to preserve a statement boundary
if bIsStmt {
sawStmt := false
for _, v := range s.Values {
if isPoorStatementOp(v.Op) {
continue
}
if v.Pos.SameFileAndLine(bPos) {
v.Pos = v.Pos.WithIsStmt()
}
sawStmt = true
break
}
if !sawStmt && s.Pos.SameFileAndLine(bPos) {
s.Pos = s.Pos.WithIsStmt()
}
}
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
// If `s` had more than one predecessor, update its phi-ops to
// account for the merge.
if ns > 1 {
for _, v := range s.Values {
if v.Op == OpPhi {
mergePhi(v, j, b)
}
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
}
// Remove the phi-ops from `b` if they were merged into the
// phi-ops of `s`.
k := 0
for _, v := range b.Values {
if v.Op == OpPhi {
if v.Uses == 0 {
v.resetArgs()
continue
}
// Pad the arguments of the remaining phi-ops so
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
// they match the new predecessor count of `s`.
// Since s did not have a Phi op corresponding to
// the phi op in b, the other edges coming into s
// must be loopback edges from s, so v is the right
// argument to v!
args := make([]*Value, len(v.Args))
copy(args, v.Args)
v.resetArgs()
for x := 0; x < j; x++ {
v.AddArg(v)
}
v.AddArg(args[0])
for x := j + 1; x < ns; x++ {
v.AddArg(v)
}
for _, a := range args[1:] {
v.AddArg(a)
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
}
}
b.Values[k] = v
k++
}
b.Values = b.Values[:k]
}
// Merge the blocks' values.
for _, v := range b.Values {
v.Block = s
}
k := len(b.Values)
m := len(s.Values)
for i := 0; i < k; i++ {
s.Values = append(s.Values, nil)
}
copy(s.Values[k:], s.Values[:m])
copy(s.Values, b.Values)
}
if n < len(f.Blocks) {
cmd/compile: improve tighten pass Move a value to the block which is the lowest common ancestor in the dominator tree of all of its uses. Make sure not to move a value into a loop. Makes the tighten pass on average (across go1 benchmarks) 40% slower. Still not a big contributor to overall compile time. Binary size is just a tad smaller. name old time/op new time/op delta BinaryTree17-12 2.77s ± 9% 2.76s ± 9% ~ (p=0.878 n=8+8) Fannkuch11-12 2.75s ± 1% 2.74s ± 1% ~ (p=0.232 n=8+7) FmtFprintfEmpty-12 48.9ns ± 9% 47.7ns ± 0% ~ (p=0.431 n=8+8) FmtFprintfString-12 143ns ± 8% 142ns ± 1% ~ (p=0.257 n=8+7) FmtFprintfInt-12 123ns ± 1% 122ns ± 1% -1.04% (p=0.026 n=7+8) FmtFprintfIntInt-12 195ns ± 7% 185ns ± 0% -5.32% (p=0.000 n=8+8) FmtFprintfPrefixedInt-12 194ns ± 4% 195ns ± 0% +0.81% (p=0.015 n=7+7) FmtFprintfFloat-12 267ns ± 0% 268ns ± 0% +0.37% (p=0.001 n=7+6) FmtManyArgs-12 800ns ± 0% 762ns ± 1% -4.78% (p=0.000 n=8+8) GobDecode-12 7.67ms ± 2% 7.60ms ± 2% ~ (p=0.234 n=8+8) GobEncode-12 6.55ms ± 0% 6.57ms ± 1% ~ (p=0.336 n=7+8) Gzip-12 237ms ± 0% 238ms ± 0% +0.40% (p=0.017 n=7+7) Gunzip-12 40.8ms ± 0% 40.2ms ± 0% -1.52% (p=0.000 n=7+8) HTTPClientServer-12 208µs ± 3% 209µs ± 3% ~ (p=0.955 n=8+7) JSONEncode-12 16.2ms ± 1% 17.2ms ±11% +5.80% (p=0.001 n=7+8) JSONDecode-12 57.3ms ±12% 55.5ms ± 3% ~ (p=0.867 n=8+7) Mandelbrot200-12 4.68ms ± 6% 4.46ms ± 1% ~ (p=0.442 n=8+8) GoParse-12 4.27ms ±44% 3.42ms ± 1% -19.95% (p=0.005 n=8+8) RegexpMatchEasy0_32-12 75.1ns ± 0% 75.8ns ± 1% +0.99% (p=0.002 n=7+7) RegexpMatchEasy0_1K-12 963ns ± 0% 1021ns ± 6% +5.98% (p=0.001 n=7+7) RegexpMatchEasy1_32-12 72.4ns ±11% 70.8ns ± 1% ~ (p=0.368 n=8+8) RegexpMatchEasy1_1K-12 394ns ± 1% 399ns ± 0% +1.23% (p=0.000 n=8+7) RegexpMatchMedium_32-12 114ns ± 0% 115ns ± 1% +0.63% (p=0.021 n=7+7) RegexpMatchMedium_1K-12 35.9µs ± 0% 37.6µs ± 1% +4.72% (p=0.000 n=7+8) RegexpMatchHard_32-12 1.93µs ± 2% 1.91µs ± 0% -0.91% (p=0.001 n=7+7) RegexpMatchHard_1K-12 60.2µs ± 3% 61.2µs ±10% ~ (p=0.442 n=8+8) Revcomp-12 404ms ± 1% 406ms ± 1% ~ (p=0.054 n=8+7) Template-12 64.6ms ± 1% 63.5ms ± 1% -1.66% (p=0.000 n=8+8) TimeParse-12 347ns ± 8% 309ns ± 0% -11.13% (p=0.000 n=8+7) TimeFormat-12 343ns ± 4% 331ns ± 0% -3.34% (p=0.000 n=8+7) Change-Id: Id6da1239ddd4d0cb074ff29cffb06302d1c6d08f Reviewed-on: https://go-review.googlesource.com/28712 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2016-09-07 14:04:31 -07:00
f.invalidateCFG()
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
tail := f.Blocks[n:]
for i := range tail {
tail[i] = nil
}
f.Blocks = f.Blocks[:n]
}
}
// emptyBlock reports whether the block does not contain actual
// instructions.
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
func emptyBlock(b *Block) bool {
for _, v := range b.Values {
if v.Op != OpPhi {
return false
}
}
return true
}
// trimmableBlock reports whether the block can be trimmed from the CFG,
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
// subject to the following criteria:
// - it should not be the first block.
// - it should be BlockPlain.
// - it should not loop back to itself.
// - it either is the single predecessor of the successor block or
// contains no actual instructions.
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
func trimmableBlock(b *Block) bool {
if b.Kind != BlockPlain || b == b.Func.Entry {
return false
}
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
s := b.Succs[0].b
return s != b && (len(s.Preds) == 1 || emptyBlock(b))
}
// mergePhi adjusts the number of `v`s arguments to account for merge
// of `b`, which was `i`th predecessor of the `v`s block.
func mergePhi(v *Value, i int, b *Block) {
cmd/compile: trim more blocks - trim blocks with multiple predecessors - trim blocks, which contain only phi-functions - trim blocks, which can be merged into the successor block As an example, compiling the following source: ---8<------ package p type Node struct { Key int Left, Right *Node } func Search(r *Node, k int) *Node { for r != nil { switch { case k == r.Key: return r case k < r.Key: r = r.Left default: r = r.Right } } return nil } ---8<------ with `GOSSAFUNC=Search" go tool compile t.go`, results in the following code: ---8<------ genssa 00000 (t.go:8) TEXT "".Search(SB), $0 00001 (t.go:8) FUNCDATA $0, "".gcargs·0(SB) 00002 (t.go:8) FUNCDATA $1, "".gclocals·1(SB) 00003 (t.go:8) TYPE "".r(FP)type.*"".Node, $8 00004 (t.go:8) TYPE "".k+8(FP)type.int, $8 00005 (t.go:8) TYPE "".~r2+16(FP)type.*"".Node, $8 v40 00006 (t.go:9) MOVQ "".k+8(FP), AX v34 00007 (t.go:9) MOVQ "".r(FP), CX v33 00008 (t.go:9) TESTQ CX, CX b2 00009 (t.go:9) JEQ $0, 22 v16 00010 (t.go:11) MOVQ (CX), DX v21 00011 (t.go:11) CMPQ DX, AX b9 00012 (t.go:11) JEQ $0, 19 v64 00013 (t.go:13) CMPQ AX, DX b13 00014 (t.go:13) JGE 17 v36 00015 (t.go:14) MOVQ 8(CX), CX b4 00016 (t.go:9) JMP 8 <---+ v42 00017 (t.go:16) MOVQ 16(CX), CX | b21 00018 (t.go:10) JMP 16 ----+ v28 00019 (t.go:12) VARDEF "".~r2+16(FP) v29 00020 (t.go:12) MOVQ CX, "".~r2+16(FP) b10 00021 (t.go:12) RET v44 00022 (t.go:19) VARDEF "".~r2+16(FP) v45 00023 (t.go:19) MOVQ $0, "".~r2+16(FP) b5 00024 (t.go:19) RET 00025 (<unknown line number>) END ---8<------ Note the jump at 18 jumps to another jump at 16. Looking at the function after trimming: --8<------ after trim [199 ns] b1: v1 = InitMem <mem> v2 = SP <uintptr> : SP v67 = Arg <*Node> {r} : r[*Node] v59 = Arg <int> {k} : k[int] v40 = LoadReg <int> v59 : AX v34 = LoadReg <*Node> v67 : CX Plain → b2 b2: ← b1 b4 v8 = Phi <*Node> v34 v68 : CX v33 = TESTQ <flags> v8 v8 NE v33 → b9 b5 (likely) b9: ← b2 v16 = MOVQload <int> v8 v1 : DX v21 = CMPQ <flags> v16 v40 EQ v21 → b10 b13 (unlikely) b13: ← b9 v64 = CMPQ <flags> v40 v16 LT v64 → b19 b21 b19: ← b13 v36 = MOVQload <*Node> [8] v8 v1 : CX Plain → b4 b4: ← b21 b19 < v68 = Phi <*Node> v42 v36 : CX <- no actual code Plain → b2 < b21: ← b13 v42 = MOVQload <*Node> [16] v8 v1 : CX Plain → b4 b10: ← b9 v28 = VarDef <mem> {~r2} v1 v29 = MOVQstore <mem> {~r2} v2 v8 v28 v30 = Copy <mem> v29 Ret v30 b5: ← b2 v44 = VarDef <mem> {~r2} v1 v45 = MOVQstoreconst <mem> {~r2} [val=0,off=0] v2 v44 v47 = Copy <mem> v45 Ret v47 --8<------ The jump at 16 corresponds to the edge b21 -> b4. The block b4 contains only phi-ops, i.e. no actual code besides the jump to b2. However b4 is not trimmed, because it a) has more than one predecessor, and b) it is not empty. This change enhances trim.go to remove more blocks, subject to the following criteria: - block has predecessors (i.e. not the start block) - block is BlockPlain - block does not loop back to itself - block is the single predecessor of its successor; the instructions of the block are merged into the successor - block does no emit actual code, besides a possible unconditional jump. Currently only OpPhi are considered to not be actual code, perhaps OpKeepAlive/others should be considered too? As an example, after the change, the block b4 is trimmed and the jump at 18 jumps directly to 8. Revision 1: Adjust phi-ops arguments after merge Ensure the number of phi-ops arguments matches the new number of predecessors in the merged block. When moving values, make them refer to the merged block. Revision 2: - Make clear the intent that we do not want to trim the entry block - Double check that we are merging a phi operation - Minor code style fix - Fix a potentially dangerous situation when a blocks refers to the inline value space in another block Change-Id: I0ab91779f931f404d11008f5c45606d985d7fbaa Reviewed-on: https://go-review.googlesource.com/28812 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-09-07 15:10:07 +03:00
u := v.Args[i]
if u.Block == b {
if u.Op != OpPhi {
b.Func.Fatalf("value %s is not a phi operation", u.LongString())
}
// If the original block contained u = φ(u0, u1, ..., un) and
// the current phi is
// v = φ(v0, v1, ..., u, ..., vk)
// then the merged phi is
// v = φ(v0, v1, ..., u0, ..., vk, u1, ..., un)
v.SetArg(i, u.Args[0])
v.AddArgs(u.Args[1:]...)
} else {
// If the original block contained u = φ(u0, u1, ..., un) and
// the current phi is
// v = φ(v0, v1, ..., vi, ..., vk)
// i.e. it does not use a value from the predecessor block,
// then the merged phi is
// v = φ(v0, v1, ..., vk, vi, vi, ...)
for j := 1; j < len(b.Preds); j++ {
v.AddArg(v.Args[i])
}
}
}