go/src/cmd/compile/internal/ssa/block.go

422 lines
11 KiB
Go
Raw Normal View History

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/internal/src"
"fmt"
)
// Block represents a basic block in the control flow graph of a function.
type Block struct {
// A unique identifier for the block. The system will attempt to allocate
// these IDs densely, but no guarantees.
ID ID
// Source position for block's control operation
2016-12-15 17:17:01 -08:00
Pos src.XPos
// The kind of block this is.
Kind BlockKind
// Likely direction for branches.
// If BranchLikely, Succs[0] is the most likely branch taken.
// If BranchUnlikely, Succs[1] is the most likely branch taken.
// Ignored if len(Succs) < 2.
// Fatal if not BranchUnknown and len(Succs) > 2.
Likely BranchPrediction
// After flagalloc, records whether flags are live at the end of the block.
FlagsLiveAtEnd bool
// Subsequent blocks, if any. The number and order depend on the block kind.
Succs []Edge
// Inverse of successors.
// The order is significant to Phi nodes in the block.
// TODO: predecessors is a pain to maintain. Can we somehow order phi
// arguments by block id and have this field computed explicitly when needed?
Preds []Edge
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
// A list of values that determine how the block is exited. The number
// and type of control values depends on the Kind of the block. For
// instance, a BlockIf has a single boolean control value and BlockExit
// has a single memory control value.
//
// The ControlValues() method may be used to get a slice with the non-nil
// control values that can be ranged over.
//
// Controls[1] must be nil if Controls[0] is nil.
Controls [2]*Value
// Auxiliary info for the block. Its value depends on the Kind.
Aux Aux
cmd/compile: add SSA rules for s390x compare-and-branch instructions This commit adds SSA rules for the s390x combined compare-and-branch instructions. These have a shorter encoding than separate compare and branch instructions and they also don't clobber the condition code (a.k.a. flag register) reducing pressure on the flag allocator. I have deleted the 'loop_test.go' file and replaced it with a new codegen test which performs a wider range of checks. Object sizes from compilebench: name old object-bytes new object-bytes delta Template 562kB ± 0% 561kB ± 0% -0.28% (p=0.000 n=10+10) Unicode 217kB ± 0% 217kB ± 0% -0.17% (p=0.000 n=10+10) GoTypes 2.03MB ± 0% 2.02MB ± 0% -0.59% (p=0.000 n=10+10) Compiler 8.16MB ± 0% 8.11MB ± 0% -0.62% (p=0.000 n=10+10) SSA 27.4MB ± 0% 27.0MB ± 0% -1.45% (p=0.000 n=10+10) Flate 356kB ± 0% 356kB ± 0% -0.12% (p=0.000 n=10+10) GoParser 438kB ± 0% 436kB ± 0% -0.51% (p=0.000 n=10+10) Reflect 1.37MB ± 0% 1.37MB ± 0% -0.42% (p=0.000 n=10+10) Tar 485kB ± 0% 483kB ± 0% -0.39% (p=0.000 n=10+10) XML 630kB ± 0% 621kB ± 0% -1.45% (p=0.000 n=10+10) [Geo mean] 1.14MB 1.13MB -0.60% name old text-bytes new text-bytes delta HelloSize 763kB ± 0% 754kB ± 0% -1.30% (p=0.000 n=10+10) CmdGoSize 10.7MB ± 0% 10.6MB ± 0% -0.91% (p=0.000 n=10+10) [Geo mean] 2.86MB 2.82MB -1.10% Change-Id: Ibca55d9c0aa1254aee69433731ab5d26a43a7c18 Reviewed-on: https://go-review.googlesource.com/c/go/+/198037 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2019-09-17 07:29:31 -07:00
AuxInt int64
// The unordered set of Values that define the operation of this block.
// After the scheduling pass, this list is ordered.
Values []*Value
// The containing function
Func *Func
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
// Storage for Succs, Preds and Values.
succstorage [2]Edge
predstorage [4]Edge
valstorage [9]*Value
}
// Edge represents a CFG edge.
// Example edges for b branching to either c or d.
// (c and d have other predecessors.)
//
// b.Succs = [{c,3}, {d,1}]
// c.Preds = [?, ?, ?, {b,0}]
// d.Preds = [?, {b,1}, ?]
//
// These indexes allow us to edit the CFG in constant time.
// In addition, it informs phi ops in degenerate cases like:
//
// b:
// if k then c else c
// c:
// v = Phi(x, y)
//
// Then the indexes tell you whether x is chosen from
// the if or else branch from b.
//
// b.Succs = [{c,0},{c,1}]
// c.Preds = [{b,0},{b,1}]
//
// means x is chosen if k is true.
type Edge struct {
// block edge goes to (in a Succs list) or from (in a Preds list)
b *Block
// index of reverse edge. Invariant:
// e := x.Succs[idx]
// e.b.Preds[e.i] = Edge{x,idx}
// and similarly for predecessors.
i int
}
func (e Edge) Block() *Block {
return e.b
}
func (e Edge) Index() int {
return e.i
}
func (e Edge) String() string {
return fmt.Sprintf("{%v,%d}", e.b, e.i)
}
// BlockKind is the kind of SSA block.
//
// kind controls successors
// ------------------------------------------
// Exit [return mem] []
// Plain [] [next]
// If [boolean Value] [then, else]
// Defer [mem] [nopanic, panic] (control opcode should be OpStaticCall to runtime.deferproc)
type BlockKind int16
// short form print
func (b *Block) String() string {
return fmt.Sprintf("b%d", b.ID)
}
// long form print
func (b *Block) LongString() string {
s := b.Kind.String()
if b.Aux != nil {
cmd/compile: add SSA rules for s390x compare-and-branch instructions This commit adds SSA rules for the s390x combined compare-and-branch instructions. These have a shorter encoding than separate compare and branch instructions and they also don't clobber the condition code (a.k.a. flag register) reducing pressure on the flag allocator. I have deleted the 'loop_test.go' file and replaced it with a new codegen test which performs a wider range of checks. Object sizes from compilebench: name old object-bytes new object-bytes delta Template 562kB ± 0% 561kB ± 0% -0.28% (p=0.000 n=10+10) Unicode 217kB ± 0% 217kB ± 0% -0.17% (p=0.000 n=10+10) GoTypes 2.03MB ± 0% 2.02MB ± 0% -0.59% (p=0.000 n=10+10) Compiler 8.16MB ± 0% 8.11MB ± 0% -0.62% (p=0.000 n=10+10) SSA 27.4MB ± 0% 27.0MB ± 0% -1.45% (p=0.000 n=10+10) Flate 356kB ± 0% 356kB ± 0% -0.12% (p=0.000 n=10+10) GoParser 438kB ± 0% 436kB ± 0% -0.51% (p=0.000 n=10+10) Reflect 1.37MB ± 0% 1.37MB ± 0% -0.42% (p=0.000 n=10+10) Tar 485kB ± 0% 483kB ± 0% -0.39% (p=0.000 n=10+10) XML 630kB ± 0% 621kB ± 0% -1.45% (p=0.000 n=10+10) [Geo mean] 1.14MB 1.13MB -0.60% name old text-bytes new text-bytes delta HelloSize 763kB ± 0% 754kB ± 0% -1.30% (p=0.000 n=10+10) CmdGoSize 10.7MB ± 0% 10.6MB ± 0% -0.91% (p=0.000 n=10+10) [Geo mean] 2.86MB 2.82MB -1.10% Change-Id: Ibca55d9c0aa1254aee69433731ab5d26a43a7c18 Reviewed-on: https://go-review.googlesource.com/c/go/+/198037 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2019-09-17 07:29:31 -07:00
s += fmt.Sprintf(" {%s}", b.Aux)
}
if t := b.AuxIntString(); t != "" {
s += fmt.Sprintf(" [%s]", t)
}
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
for _, c := range b.ControlValues() {
s += fmt.Sprintf(" %s", c)
}
if len(b.Succs) > 0 {
s += " ->"
for _, c := range b.Succs {
s += " " + c.b.String()
}
}
switch b.Likely {
case BranchUnlikely:
s += " (unlikely)"
case BranchLikely:
s += " (likely)"
}
return s
}
[dev.ssa] cmd/compile/ssa: separate logging, work in progress, and fatal errors The SSA implementation logs for three purposes: * debug logging * fatal errors * unimplemented features Separating these three uses lets us attempt an SSA implementation for all functions, not just _ssa functions. This turns the entire standard library into a compilation test, and makes it easy to figure out things like "how much coverage does SSA have now" and "what should we do next to get more coverage?". Functions called _ssa are still special. They log profusely by default and the output of the SSA implementation is used. For all other functions, logging is off, and the implementation is built and discarded, due to lack of support for the runtime. While we're here, fix a few minor bugs and add some extra Unimplementeds to allow all.bash to pass. As of now, SSA handles 20.79% of the functions in the standard library (689 of 3314). The top missing features are: 10.03% 2597 SSA unimplemented: zero for type error not implemented 7.79% 2016 SSA unimplemented: addr: bad op DOTPTR 7.33% 1898 SSA unimplemented: unhandled expr EQ 6.10% 1579 SSA unimplemented: unhandled expr OROR 4.91% 1271 SSA unimplemented: unhandled expr NE 4.49% 1163 SSA unimplemented: unhandled expr LROT 4.00% 1036 SSA unimplemented: unhandled expr LEN 3.56% 923 SSA unimplemented: unhandled stmt CALLFUNC 2.37% 615 SSA unimplemented: zero for type []byte not implemented 1.90% 492 SSA unimplemented: unhandled stmt CALLMETH 1.74% 450 SSA unimplemented: unhandled expr CALLINTER 1.74% 450 SSA unimplemented: unhandled expr DOT 1.71% 444 SSA unimplemented: unhandled expr ANDAND 1.65% 426 SSA unimplemented: unhandled expr CLOSUREVAR 1.54% 400 SSA unimplemented: unhandled expr CALLMETH 1.51% 390 SSA unimplemented: unhandled stmt SWITCH 1.47% 380 SSA unimplemented: unhandled expr CONV 1.33% 345 SSA unimplemented: addr: bad op * 1.30% 336 SSA unimplemented: unhandled OLITERAL 6 Change-Id: I4ca07951e276714dc13c31de28640aead17a1be7 Reviewed-on: https://go-review.googlesource.com/11160 Reviewed-by: Keith Randall <khr@golang.org>
2015-06-12 11:01:13 -07:00
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
// NumControls returns the number of non-nil control values the
// block has.
func (b *Block) NumControls() int {
if b.Controls[0] == nil {
return 0
}
if b.Controls[1] == nil {
return 1
}
return 2
}
// ControlValues returns a slice containing the non-nil control
// values of the block. The index of each control value will be
// the same as it is in the Controls property and can be used
// in ReplaceControl calls.
func (b *Block) ControlValues() []*Value {
if b.Controls[0] == nil {
return b.Controls[:0]
}
if b.Controls[1] == nil {
return b.Controls[:1]
}
return b.Controls[:2]
}
// SetControl removes all existing control values and then adds
// the control value provided. The number of control values after
// a call to SetControl will always be 1.
func (b *Block) SetControl(v *Value) {
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
b.ResetControls()
b.Controls[0] = v
v.Uses++
}
// ResetControls sets the number of controls for the block to 0.
func (b *Block) ResetControls() {
if b.Controls[0] != nil {
b.Controls[0].Uses--
}
if b.Controls[1] != nil {
b.Controls[1].Uses--
}
b.Controls = [2]*Value{} // reset both controls to nil
}
// AddControl appends a control value to the existing list of control values.
func (b *Block) AddControl(v *Value) {
i := b.NumControls()
b.Controls[i] = v // panics if array is full
v.Uses++
}
// ReplaceControl exchanges the existing control value at the index provided
// for the new value. The index must refer to a valid control value.
func (b *Block) ReplaceControl(i int, v *Value) {
b.Controls[i].Uses--
b.Controls[i] = v
v.Uses++
}
// CopyControls replaces the controls for this block with those from the
// provided block. The provided block is not modified.
func (b *Block) CopyControls(from *Block) {
if b == from {
return
}
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
b.ResetControls()
for _, c := range from.ControlValues() {
b.AddControl(c)
}
}
cmd/compile: reduce amount of code generated for block rewrite rules Add a Reset method to blocks that allows us to reduce the amount of code we generate for block rewrite rules. Thanks to Cherry for suggesting a similar fix to this in CL 196557. Compilebench result: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 1% -0.30% (p=0.028 n=19+20) Unicode 83.7ms ± 3% 83.0ms ± 2% -0.79% (p=0.029 n=18+19) GoTypes 757ms ± 1% 755ms ± 1% -0.31% (p=0.034 n=19+19) Compiler 3.51s ± 1% 3.50s ± 1% -0.20% (p=0.013 n=18+18) SSA 11.7s ± 1% 11.7s ± 1% -0.38% (p=0.000 n=19+19) Flate 131ms ± 1% 130ms ± 1% -0.32% (p=0.024 n=18+18) GoParser 162ms ± 1% 162ms ± 1% ~ (p=0.059 n=20+18) Reflect 471ms ± 0% 470ms ± 0% -0.24% (p=0.045 n=20+17) Tar 187ms ± 1% 186ms ± 1% ~ (p=0.157 n=20+20) XML 255ms ± 1% 255ms ± 1% ~ (p=0.461 n=19+20) LinkCompiler 754ms ± 2% 755ms ± 2% ~ (p=0.919 n=17+17) ExternalLinkCompiler 2.82s ±16% 2.37s ±10% -15.94% (p=0.000 n=20+20) LinkWithoutDebugCompiler 439ms ± 4% 442ms ± 6% ~ (p=0.461 n=18+19) StdCmd 25.8s ± 2% 25.5s ± 1% -0.95% (p=0.000 n=20+20) name old user-time/op new user-time/op delta Template 240ms ± 8% 238ms ± 7% ~ (p=0.301 n=20+20) Unicode 107ms ±18% 104ms ±13% ~ (p=0.149 n=20+20) GoTypes 883ms ± 3% 888ms ± 2% ~ (p=0.211 n=20+20) Compiler 4.22s ± 1% 4.20s ± 1% ~ (p=0.077 n=20+18) SSA 14.1s ± 1% 14.1s ± 2% ~ (p=0.192 n=20+20) Flate 145ms ±10% 148ms ± 5% ~ (p=0.126 n=20+18) GoParser 186ms ± 7% 186ms ± 7% ~ (p=0.779 n=20+20) Reflect 538ms ± 3% 541ms ± 3% ~ (p=0.192 n=20+20) Tar 218ms ± 4% 217ms ± 6% ~ (p=0.835 n=19+20) XML 298ms ± 5% 298ms ± 5% ~ (p=0.749 n=19+20) LinkCompiler 818ms ± 5% 825ms ± 8% ~ (p=0.461 n=20+20) ExternalLinkCompiler 1.55s ± 4% 1.53s ± 5% ~ (p=0.063 n=20+18) LinkWithoutDebugCompiler 460ms ±12% 460ms ± 7% ~ (p=0.925 n=20+20) name old object-bytes new object-bytes delta Template 554kB ± 0% 554kB ± 0% ~ (all equal) Unicode 215kB ± 0% 215kB ± 0% ~ (all equal) GoTypes 2.01MB ± 0% 2.01MB ± 0% ~ (all equal) Compiler 7.97MB ± 0% 7.97MB ± 0% +0.00% (p=0.000 n=20+20) SSA 26.8MB ± 0% 26.9MB ± 0% +0.27% (p=0.000 n=20+20) Flate 340kB ± 0% 340kB ± 0% ~ (all equal) GoParser 434kB ± 0% 434kB ± 0% ~ (all equal) Reflect 1.34MB ± 0% 1.34MB ± 0% ~ (all equal) Tar 480kB ± 0% 480kB ± 0% ~ (all equal) XML 622kB ± 0% 622kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 20.4kB ± 0% 20.4kB ± 0% ~ (all equal) Unicode 8.21kB ± 0% 8.21kB ± 0% ~ (all equal) GoTypes 36.6kB ± 0% 36.6kB ± 0% ~ (all equal) Compiler 115kB ± 0% 115kB ± 0% +0.08% (p=0.000 n=20+20) SSA 141kB ± 0% 141kB ± 0% +0.07% (p=0.000 n=20+20) Flate 5.11kB ± 0% 5.11kB ± 0% ~ (all equal) GoParser 8.93kB ± 0% 8.93kB ± 0% ~ (all equal) Reflect 11.8kB ± 0% 11.8kB ± 0% ~ (all equal) Tar 10.9kB ± 0% 10.9kB ± 0% ~ (all equal) XML 17.4kB ± 0% 17.4kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 742kB ± 0% 742kB ± 0% ~ (all equal) CmdGoSize 10.7MB ± 0% 10.7MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.10MB ± 0% 1.10MB ± 0% ~ (all equal) CmdGoSize 14.9MB ± 0% 14.9MB ± 0% ~ (all equal) Change-Id: Ic89a8e62423b3d9fd9391159e0663acf450803b5 Reviewed-on: https://go-review.googlesource.com/c/go/+/198419 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
2019-10-02 11:02:41 +01:00
// Reset sets the block to the provided kind and clears all the blocks control
// and auxiliary values. Other properties of the block, such as its successors,
cmd/compile: reduce amount of code generated for block rewrite rules Add a Reset method to blocks that allows us to reduce the amount of code we generate for block rewrite rules. Thanks to Cherry for suggesting a similar fix to this in CL 196557. Compilebench result: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 1% -0.30% (p=0.028 n=19+20) Unicode 83.7ms ± 3% 83.0ms ± 2% -0.79% (p=0.029 n=18+19) GoTypes 757ms ± 1% 755ms ± 1% -0.31% (p=0.034 n=19+19) Compiler 3.51s ± 1% 3.50s ± 1% -0.20% (p=0.013 n=18+18) SSA 11.7s ± 1% 11.7s ± 1% -0.38% (p=0.000 n=19+19) Flate 131ms ± 1% 130ms ± 1% -0.32% (p=0.024 n=18+18) GoParser 162ms ± 1% 162ms ± 1% ~ (p=0.059 n=20+18) Reflect 471ms ± 0% 470ms ± 0% -0.24% (p=0.045 n=20+17) Tar 187ms ± 1% 186ms ± 1% ~ (p=0.157 n=20+20) XML 255ms ± 1% 255ms ± 1% ~ (p=0.461 n=19+20) LinkCompiler 754ms ± 2% 755ms ± 2% ~ (p=0.919 n=17+17) ExternalLinkCompiler 2.82s ±16% 2.37s ±10% -15.94% (p=0.000 n=20+20) LinkWithoutDebugCompiler 439ms ± 4% 442ms ± 6% ~ (p=0.461 n=18+19) StdCmd 25.8s ± 2% 25.5s ± 1% -0.95% (p=0.000 n=20+20) name old user-time/op new user-time/op delta Template 240ms ± 8% 238ms ± 7% ~ (p=0.301 n=20+20) Unicode 107ms ±18% 104ms ±13% ~ (p=0.149 n=20+20) GoTypes 883ms ± 3% 888ms ± 2% ~ (p=0.211 n=20+20) Compiler 4.22s ± 1% 4.20s ± 1% ~ (p=0.077 n=20+18) SSA 14.1s ± 1% 14.1s ± 2% ~ (p=0.192 n=20+20) Flate 145ms ±10% 148ms ± 5% ~ (p=0.126 n=20+18) GoParser 186ms ± 7% 186ms ± 7% ~ (p=0.779 n=20+20) Reflect 538ms ± 3% 541ms ± 3% ~ (p=0.192 n=20+20) Tar 218ms ± 4% 217ms ± 6% ~ (p=0.835 n=19+20) XML 298ms ± 5% 298ms ± 5% ~ (p=0.749 n=19+20) LinkCompiler 818ms ± 5% 825ms ± 8% ~ (p=0.461 n=20+20) ExternalLinkCompiler 1.55s ± 4% 1.53s ± 5% ~ (p=0.063 n=20+18) LinkWithoutDebugCompiler 460ms ±12% 460ms ± 7% ~ (p=0.925 n=20+20) name old object-bytes new object-bytes delta Template 554kB ± 0% 554kB ± 0% ~ (all equal) Unicode 215kB ± 0% 215kB ± 0% ~ (all equal) GoTypes 2.01MB ± 0% 2.01MB ± 0% ~ (all equal) Compiler 7.97MB ± 0% 7.97MB ± 0% +0.00% (p=0.000 n=20+20) SSA 26.8MB ± 0% 26.9MB ± 0% +0.27% (p=0.000 n=20+20) Flate 340kB ± 0% 340kB ± 0% ~ (all equal) GoParser 434kB ± 0% 434kB ± 0% ~ (all equal) Reflect 1.34MB ± 0% 1.34MB ± 0% ~ (all equal) Tar 480kB ± 0% 480kB ± 0% ~ (all equal) XML 622kB ± 0% 622kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 20.4kB ± 0% 20.4kB ± 0% ~ (all equal) Unicode 8.21kB ± 0% 8.21kB ± 0% ~ (all equal) GoTypes 36.6kB ± 0% 36.6kB ± 0% ~ (all equal) Compiler 115kB ± 0% 115kB ± 0% +0.08% (p=0.000 n=20+20) SSA 141kB ± 0% 141kB ± 0% +0.07% (p=0.000 n=20+20) Flate 5.11kB ± 0% 5.11kB ± 0% ~ (all equal) GoParser 8.93kB ± 0% 8.93kB ± 0% ~ (all equal) Reflect 11.8kB ± 0% 11.8kB ± 0% ~ (all equal) Tar 10.9kB ± 0% 10.9kB ± 0% ~ (all equal) XML 17.4kB ± 0% 17.4kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 742kB ± 0% 742kB ± 0% ~ (all equal) CmdGoSize 10.7MB ± 0% 10.7MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.10MB ± 0% 1.10MB ± 0% ~ (all equal) CmdGoSize 14.9MB ± 0% 14.9MB ± 0% ~ (all equal) Change-Id: Ic89a8e62423b3d9fd9391159e0663acf450803b5 Reviewed-on: https://go-review.googlesource.com/c/go/+/198419 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
2019-10-02 11:02:41 +01:00
// predecessors and values are left unmodified.
func (b *Block) Reset(kind BlockKind) {
b.Kind = kind
b.ResetControls()
b.Aux = nil
cmd/compile: add SSA rules for s390x compare-and-branch instructions This commit adds SSA rules for the s390x combined compare-and-branch instructions. These have a shorter encoding than separate compare and branch instructions and they also don't clobber the condition code (a.k.a. flag register) reducing pressure on the flag allocator. I have deleted the 'loop_test.go' file and replaced it with a new codegen test which performs a wider range of checks. Object sizes from compilebench: name old object-bytes new object-bytes delta Template 562kB ± 0% 561kB ± 0% -0.28% (p=0.000 n=10+10) Unicode 217kB ± 0% 217kB ± 0% -0.17% (p=0.000 n=10+10) GoTypes 2.03MB ± 0% 2.02MB ± 0% -0.59% (p=0.000 n=10+10) Compiler 8.16MB ± 0% 8.11MB ± 0% -0.62% (p=0.000 n=10+10) SSA 27.4MB ± 0% 27.0MB ± 0% -1.45% (p=0.000 n=10+10) Flate 356kB ± 0% 356kB ± 0% -0.12% (p=0.000 n=10+10) GoParser 438kB ± 0% 436kB ± 0% -0.51% (p=0.000 n=10+10) Reflect 1.37MB ± 0% 1.37MB ± 0% -0.42% (p=0.000 n=10+10) Tar 485kB ± 0% 483kB ± 0% -0.39% (p=0.000 n=10+10) XML 630kB ± 0% 621kB ± 0% -1.45% (p=0.000 n=10+10) [Geo mean] 1.14MB 1.13MB -0.60% name old text-bytes new text-bytes delta HelloSize 763kB ± 0% 754kB ± 0% -1.30% (p=0.000 n=10+10) CmdGoSize 10.7MB ± 0% 10.6MB ± 0% -0.91% (p=0.000 n=10+10) [Geo mean] 2.86MB 2.82MB -1.10% Change-Id: Ibca55d9c0aa1254aee69433731ab5d26a43a7c18 Reviewed-on: https://go-review.googlesource.com/c/go/+/198037 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2019-09-17 07:29:31 -07:00
b.AuxInt = 0
cmd/compile: reduce amount of code generated for block rewrite rules Add a Reset method to blocks that allows us to reduce the amount of code we generate for block rewrite rules. Thanks to Cherry for suggesting a similar fix to this in CL 196557. Compilebench result: name old time/op new time/op delta Template 211ms ± 1% 211ms ± 1% -0.30% (p=0.028 n=19+20) Unicode 83.7ms ± 3% 83.0ms ± 2% -0.79% (p=0.029 n=18+19) GoTypes 757ms ± 1% 755ms ± 1% -0.31% (p=0.034 n=19+19) Compiler 3.51s ± 1% 3.50s ± 1% -0.20% (p=0.013 n=18+18) SSA 11.7s ± 1% 11.7s ± 1% -0.38% (p=0.000 n=19+19) Flate 131ms ± 1% 130ms ± 1% -0.32% (p=0.024 n=18+18) GoParser 162ms ± 1% 162ms ± 1% ~ (p=0.059 n=20+18) Reflect 471ms ± 0% 470ms ± 0% -0.24% (p=0.045 n=20+17) Tar 187ms ± 1% 186ms ± 1% ~ (p=0.157 n=20+20) XML 255ms ± 1% 255ms ± 1% ~ (p=0.461 n=19+20) LinkCompiler 754ms ± 2% 755ms ± 2% ~ (p=0.919 n=17+17) ExternalLinkCompiler 2.82s ±16% 2.37s ±10% -15.94% (p=0.000 n=20+20) LinkWithoutDebugCompiler 439ms ± 4% 442ms ± 6% ~ (p=0.461 n=18+19) StdCmd 25.8s ± 2% 25.5s ± 1% -0.95% (p=0.000 n=20+20) name old user-time/op new user-time/op delta Template 240ms ± 8% 238ms ± 7% ~ (p=0.301 n=20+20) Unicode 107ms ±18% 104ms ±13% ~ (p=0.149 n=20+20) GoTypes 883ms ± 3% 888ms ± 2% ~ (p=0.211 n=20+20) Compiler 4.22s ± 1% 4.20s ± 1% ~ (p=0.077 n=20+18) SSA 14.1s ± 1% 14.1s ± 2% ~ (p=0.192 n=20+20) Flate 145ms ±10% 148ms ± 5% ~ (p=0.126 n=20+18) GoParser 186ms ± 7% 186ms ± 7% ~ (p=0.779 n=20+20) Reflect 538ms ± 3% 541ms ± 3% ~ (p=0.192 n=20+20) Tar 218ms ± 4% 217ms ± 6% ~ (p=0.835 n=19+20) XML 298ms ± 5% 298ms ± 5% ~ (p=0.749 n=19+20) LinkCompiler 818ms ± 5% 825ms ± 8% ~ (p=0.461 n=20+20) ExternalLinkCompiler 1.55s ± 4% 1.53s ± 5% ~ (p=0.063 n=20+18) LinkWithoutDebugCompiler 460ms ±12% 460ms ± 7% ~ (p=0.925 n=20+20) name old object-bytes new object-bytes delta Template 554kB ± 0% 554kB ± 0% ~ (all equal) Unicode 215kB ± 0% 215kB ± 0% ~ (all equal) GoTypes 2.01MB ± 0% 2.01MB ± 0% ~ (all equal) Compiler 7.97MB ± 0% 7.97MB ± 0% +0.00% (p=0.000 n=20+20) SSA 26.8MB ± 0% 26.9MB ± 0% +0.27% (p=0.000 n=20+20) Flate 340kB ± 0% 340kB ± 0% ~ (all equal) GoParser 434kB ± 0% 434kB ± 0% ~ (all equal) Reflect 1.34MB ± 0% 1.34MB ± 0% ~ (all equal) Tar 480kB ± 0% 480kB ± 0% ~ (all equal) XML 622kB ± 0% 622kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 20.4kB ± 0% 20.4kB ± 0% ~ (all equal) Unicode 8.21kB ± 0% 8.21kB ± 0% ~ (all equal) GoTypes 36.6kB ± 0% 36.6kB ± 0% ~ (all equal) Compiler 115kB ± 0% 115kB ± 0% +0.08% (p=0.000 n=20+20) SSA 141kB ± 0% 141kB ± 0% +0.07% (p=0.000 n=20+20) Flate 5.11kB ± 0% 5.11kB ± 0% ~ (all equal) GoParser 8.93kB ± 0% 8.93kB ± 0% ~ (all equal) Reflect 11.8kB ± 0% 11.8kB ± 0% ~ (all equal) Tar 10.9kB ± 0% 10.9kB ± 0% ~ (all equal) XML 17.4kB ± 0% 17.4kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 742kB ± 0% 742kB ± 0% ~ (all equal) CmdGoSize 10.7MB ± 0% 10.7MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.10MB ± 0% 1.10MB ± 0% ~ (all equal) CmdGoSize 14.9MB ± 0% 14.9MB ± 0% ~ (all equal) Change-Id: Ic89a8e62423b3d9fd9391159e0663acf450803b5 Reviewed-on: https://go-review.googlesource.com/c/go/+/198419 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
2019-10-02 11:02:41 +01:00
}
// resetWithControl resets b and adds control v.
// It is equivalent to b.Reset(kind); b.AddControl(v),
// except that it is one call instead of two and avoids a bounds check.
// It is intended for use by rewrite rules, where this matters.
func (b *Block) resetWithControl(kind BlockKind, v *Value) {
b.Kind = kind
b.ResetControls()
b.Aux = nil
b.AuxInt = 0
b.Controls[0] = v
v.Uses++
}
// resetWithControl2 resets b and adds controls v and w.
// It is equivalent to b.Reset(kind); b.AddControl(v); b.AddControl(w),
// except that it is one call instead of three and avoids two bounds checks.
// It is intended for use by rewrite rules, where this matters.
func (b *Block) resetWithControl2(kind BlockKind, v, w *Value) {
b.Kind = kind
b.ResetControls()
b.Aux = nil
b.AuxInt = 0
b.Controls[0] = v
b.Controls[1] = w
v.Uses++
w.Uses++
}
// truncateValues truncates b.Values at the ith element, zeroing subsequent elements.
// The values in b.Values after i must already have had their args reset,
// to maintain correct value uses counts.
func (b *Block) truncateValues(i int) {
tail := b.Values[i:]
for j := range tail {
tail[j] = nil
}
b.Values = b.Values[:i]
}
// AddEdgeTo adds an edge from block b to block c. Used during building of the
// SSA graph; do not use on an already-completed SSA graph.
func (b *Block) AddEdgeTo(c *Block) {
i := len(b.Succs)
j := len(c.Preds)
b.Succs = append(b.Succs, Edge{c, j})
c.Preds = append(c.Preds, Edge{b, i})
b.Func.invalidateCFG()
}
// removePred removes the ith input edge from b.
// It is the responsibility of the caller to remove
// the corresponding successor edge, and adjust any
// phi values by calling b.removePhiArg(v, i).
func (b *Block) removePred(i int) {
n := len(b.Preds) - 1
if i != n {
e := b.Preds[n]
b.Preds[i] = e
// Update the other end of the edge we moved.
e.b.Succs[e.i].i = i
}
b.Preds[n] = Edge{}
b.Preds = b.Preds[:n]
b.Func.invalidateCFG()
}
// removeSucc removes the ith output edge from b.
// It is the responsibility of the caller to remove
// the corresponding predecessor edge.
func (b *Block) removeSucc(i int) {
n := len(b.Succs) - 1
if i != n {
e := b.Succs[n]
b.Succs[i] = e
// Update the other end of the edge we moved.
e.b.Preds[e.i].i = i
}
b.Succs[n] = Edge{}
b.Succs = b.Succs[:n]
b.Func.invalidateCFG()
}
func (b *Block) swapSuccessors() {
if len(b.Succs) != 2 {
b.Fatalf("swapSuccessors with len(Succs)=%d", len(b.Succs))
}
e0 := b.Succs[0]
e1 := b.Succs[1]
b.Succs[0] = e1
b.Succs[1] = e0
e0.b.Preds[e0.i].i = 1
e1.b.Preds[e1.i].i = 0
b.Likely *= -1
}
// removePhiArg removes the ith arg from phi.
// It must be called after calling b.removePred(i) to
// adjust the corresponding phi value of the block:
//
// b.removePred(i)
// for _, v := range b.Values {
//
// if v.Op != OpPhi {
// continue
// }
// b.removePhiArg(v, i)
//
// }
func (b *Block) removePhiArg(phi *Value, i int) {
n := len(b.Preds)
if numPhiArgs := len(phi.Args); numPhiArgs-1 != n {
b.Fatalf("inconsistent state, num predecessors: %d, num phi args: %d", n, numPhiArgs)
}
phi.Args[i].Uses--
phi.Args[i] = phi.Args[n]
phi.Args[n] = nil
phi.Args = phi.Args[:n]
phielimValue(phi)
}
// LackingPos indicates whether b is a block whose position should be inherited
// from its successors. This is true if all the values within it have unreliable positions
// and if it is "plain", meaning that there is no control flow that is also very likely
// to correspond to a well-understood source position.
func (b *Block) LackingPos() bool {
// Non-plain predecessors are If or Defer, which both (1) have two successors,
// which might have different line numbers and (2) correspond to statements
// in the source code that have positions, so this case ought not occur anyway.
if b.Kind != BlockPlain {
return false
}
if b.Pos != src.NoXPos {
return false
}
for _, v := range b.Values {
if v.LackingPos() {
continue
}
return false
}
return true
}
func (b *Block) AuxIntString() string {
switch b.Kind.AuxIntType() {
case "int8":
return fmt.Sprintf("%v", int8(b.AuxInt))
case "uint8":
return fmt.Sprintf("%v", uint8(b.AuxInt))
default: // type specified but not implemented - print as int64
return fmt.Sprintf("%v", b.AuxInt)
case "": // no aux int type
return ""
}
}
cmd/compile: use depth first topological sort algorithm for layout The current layout algorithm tries to put consecutive blocks together, so the priority of the successor block is higher than the priority of the zero indegree block. This algorithm is beneficial for subsequent register allocation, but will result in more branch instructions. The depth-first topological sorting algorithm is a well-known layout algorithm, which has applications in many languages, and it helps to reduce branch instructions. This CL applies it to the layout pass. The test results show that it helps to reduce the code size. This CL also includes the following changes: 1, Removed the primary predecessor mechanism. The new layout algorithm is not very friendly to register allocator in some cases, in order to adapt to the new layout algorithm, a new primary predecessor selection strategy is introduced. 2, Since the new layout implementation may place non-loop blocks between loop blocks, some adaptive modifications have also been made to looprotate pass. 3, The layout also affects the results of codegen, so this CL also adjusted several codegen tests accordingly. It is inevitable that this CL will cause the code size or performance of a few functions to decrease, but the number of cases it improves is much larger than the number of cases it drops. Statistical data from compilecmp on linux/amd64 is as follow: name old time/op new time/op delta Template 382ms ± 4% 382ms ± 4% ~ (p=0.497 n=49+50) Unicode 170ms ± 9% 169ms ± 8% ~ (p=0.344 n=48+50) GoTypes 2.01s ± 4% 2.01s ± 4% ~ (p=0.628 n=50+48) Compiler 190ms ±10% 189ms ± 9% ~ (p=0.734 n=50+50) SSA 11.8s ± 2% 11.8s ± 3% ~ (p=0.877 n=50+50) Flate 241ms ± 9% 241ms ± 8% ~ (p=0.897 n=50+49) GoParser 366ms ± 3% 361ms ± 4% -1.21% (p=0.004 n=47+50) Reflect 835ms ± 3% 838ms ± 3% ~ (p=0.275 n=50+49) Tar 336ms ± 4% 335ms ± 3% ~ (p=0.454 n=48+48) XML 433ms ± 4% 431ms ± 3% ~ (p=0.071 n=49+48) LinkCompiler 706ms ± 4% 705ms ± 4% ~ (p=0.608 n=50+49) ExternalLinkCompiler 1.85s ± 3% 1.83s ± 2% -1.47% (p=0.000 n=49+48) LinkWithoutDebugCompiler 437ms ± 5% 437ms ± 6% ~ (p=0.953 n=49+50) [Geo mean] 615ms 613ms -0.37% name old alloc/op new alloc/op delta Template 38.7MB ± 1% 38.7MB ± 1% ~ (p=0.834 n=50+50) Unicode 28.1MB ± 0% 28.1MB ± 0% -0.22% (p=0.000 n=49+50) GoTypes 168MB ± 1% 168MB ± 1% ~ (p=0.054 n=47+47) Compiler 23.0MB ± 1% 23.0MB ± 1% ~ (p=0.432 n=50+50) SSA 1.54GB ± 0% 1.54GB ± 0% +0.21% (p=0.000 n=50+50) Flate 23.6MB ± 1% 23.6MB ± 1% ~ (p=0.153 n=43+46) GoParser 35.1MB ± 1% 35.1MB ± 2% ~ (p=0.202 n=50+50) Reflect 84.7MB ± 1% 84.7MB ± 1% ~ (p=0.333 n=48+49) Tar 34.5MB ± 1% 34.5MB ± 1% ~ (p=0.406 n=46+49) XML 44.3MB ± 2% 44.2MB ± 3% ~ (p=0.981 n=50+50) LinkCompiler 131MB ± 0% 128MB ± 0% -2.74% (p=0.000 n=50+50) ExternalLinkCompiler 120MB ± 0% 120MB ± 0% +0.01% (p=0.007 n=50+50) LinkWithoutDebugCompiler 77.3MB ± 0% 77.3MB ± 0% -0.02% (p=0.000 n=50+50) [Geo mean] 69.3MB 69.1MB -0.22% file before after Δ % addr2line 4104220 4043684 -60536 -1.475% api 5342502 5249678 -92824 -1.737% asm 4973785 4858257 -115528 -2.323% buildid 2667844 2625660 -42184 -1.581% cgo 4686849 4616313 -70536 -1.505% compile 23667431 23268406 -399025 -1.686% cover 4959676 4874108 -85568 -1.725% dist 3515934 3450422 -65512 -1.863% doc 3995581 3925469 -70112 -1.755% fix 3379202 3318522 -60680 -1.796% link 6743249 6629913 -113336 -1.681% nm 4047529 3991777 -55752 -1.377% objdump 4456151 4388151 -68000 -1.526% pack 2435040 2398072 -36968 -1.518% pprof 13804080 13565808 -238272 -1.726% test2json 2690043 2645987 -44056 -1.638% trace 10418492 10232716 -185776 -1.783% vet 7258259 7121259 -137000 -1.888% total 113145867 111204202 -1941665 -1.716% The situation on linux/arm64 is as follow: name old time/op new time/op delta Template 280ms ± 1% 282ms ± 1% +0.75% (p=0.000 n=46+48) Unicode 124ms ± 2% 124ms ± 2% +0.37% (p=0.045 n=50+50) GoTypes 1.69s ± 1% 1.70s ± 1% +0.56% (p=0.000 n=49+50) Compiler 122ms ± 1% 123ms ± 1% +0.93% (p=0.000 n=50+50) SSA 12.6s ± 1% 12.7s ± 0% +0.72% (p=0.000 n=50+50) Flate 170ms ± 1% 172ms ± 1% +0.97% (p=0.000 n=49+49) GoParser 262ms ± 1% 263ms ± 1% +0.39% (p=0.000 n=49+48) Reflect 639ms ± 1% 650ms ± 1% +1.63% (p=0.000 n=49+49) Tar 243ms ± 1% 245ms ± 1% +0.82% (p=0.000 n=50+50) XML 324ms ± 1% 327ms ± 1% +0.72% (p=0.000 n=50+49) LinkCompiler 597ms ± 1% 596ms ± 1% -0.27% (p=0.001 n=48+47) ExternalLinkCompiler 1.90s ± 1% 1.88s ± 1% -1.00% (p=0.000 n=50+50) LinkWithoutDebugCompiler 364ms ± 1% 363ms ± 1% ~ (p=0.220 n=49+50) [Geo mean] 485ms 488ms +0.49% name old alloc/op new alloc/op delta Template 38.7MB ± 0% 38.8MB ± 1% ~ (p=0.093 n=43+49) Unicode 28.4MB ± 0% 28.4MB ± 0% +0.03% (p=0.000 n=49+45) GoTypes 169MB ± 1% 169MB ± 1% +0.23% (p=0.010 n=50+50) Compiler 23.2MB ± 1% 23.2MB ± 1% +0.11% (p=0.000 n=40+44) SSA 1.54GB ± 0% 1.55GB ± 0% +0.45% (p=0.000 n=47+49) Flate 23.8MB ± 2% 23.8MB ± 1% ~ (p=0.543 n=50+50) GoParser 35.3MB ± 1% 35.4MB ± 1% ~ (p=0.792 n=50+50) Reflect 85.2MB ± 1% 85.2MB ± 0% ~ (p=0.055 n=50+47) Tar 34.5MB ± 1% 34.5MB ± 1% +0.06% (p=0.015 n=50+50) XML 43.8MB ± 2% 43.9MB ± 2% +0.19% (p=0.000 n=48+48) LinkCompiler 137MB ± 0% 136MB ± 0% -0.92% (p=0.000 n=50+50) ExternalLinkCompiler 127MB ± 0% 127MB ± 0% ~ (p=0.516 n=50+50) LinkWithoutDebugCompiler 84.0MB ± 0% 84.0MB ± 0% ~ (p=0.057 n=50+50) [Geo mean] 70.4MB 70.4MB +0.01% file before after Δ % addr2line 4021557 4002933 -18624 -0.463% api 5127847 5028503 -99344 -1.937% asm 5034716 4936836 -97880 -1.944% buildid 2608118 2594094 -14024 -0.538% cgo 4488592 4398320 -90272 -2.011% compile 22501129 22213592 -287537 -1.278% cover 4742301 4713573 -28728 -0.606% dist 3388071 3365311 -22760 -0.672% doc 3802250 3776082 -26168 -0.688% fix 3306147 3216939 -89208 -2.698% link 6404483 6363699 -40784 -0.637% nm 3941026 3921930 -19096 -0.485% objdump 4383330 4295122 -88208 -2.012% pack 2404547 2389515 -15032 -0.625% pprof 12996234 12856818 -139416 -1.073% test2json 2668500 2586788 -81712 -3.062% trace 9816276 9609580 -206696 -2.106% vet 6900682 6787338 -113344 -1.643% total 108535806 107056973 -1478833 -1.363% Change-Id: Iaec1cdcaacca8025e9babb0fb8a532fddb70c87d Reviewed-on: https://go-review.googlesource.com/c/go/+/255239 Reviewed-by: eric fang <eric.fang@arm.com> Reviewed-by: Keith Randall <khr@golang.org> Trust: eric fang <eric.fang@arm.com>
2020-07-23 10:24:56 +08:00
// likelyBranch reports whether block b is the likely branch of all of its predecessors.
func (b *Block) likelyBranch() bool {
if len(b.Preds) == 0 {
return false
}
for _, e := range b.Preds {
p := e.b
if len(p.Succs) == 1 || len(p.Succs) == 2 && (p.Likely == BranchLikely && p.Succs[0].b == b ||
p.Likely == BranchUnlikely && p.Succs[1].b == b) {
continue
}
return false
}
return true
}
func (b *Block) Logf(msg string, args ...interface{}) { b.Func.Logf(msg, args...) }
func (b *Block) Log() bool { return b.Func.Log() }
func (b *Block) Fatalf(msg string, args ...interface{}) { b.Func.Fatalf(msg, args...) }
type BranchPrediction int8
const (
BranchUnlikely = BranchPrediction(-1)
BranchUnknown = BranchPrediction(0)
BranchLikely = BranchPrediction(+1)
)