2015-02-13 14:40:36 -05:00
|
|
|
// Copyright 2013 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2015-03-05 13:57:36 -05:00
|
|
|
// Garbage collector liveness bitmap generation.
|
|
|
|
|
|
|
|
|
|
// The command line flag -live causes this code to print debug information.
|
|
|
|
|
// The levels are:
|
|
|
|
|
//
|
|
|
|
|
// -live (aka -live=1): print liveness lists as code warnings at safe points
|
|
|
|
|
// -live=2: print an assembly listing with liveness annotations
|
|
|
|
|
// -live=3: print information during each computation phase (much chattier)
|
|
|
|
|
//
|
|
|
|
|
// Each level includes the earlier output as well.
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"cmd/internal/obj"
|
2016-04-06 12:01:40 -07:00
|
|
|
"cmd/internal/sys"
|
2016-03-28 22:27:36 +13:00
|
|
|
"crypto/md5"
|
2015-02-13 14:40:36 -05:00
|
|
|
"fmt"
|
|
|
|
|
"sort"
|
2015-10-28 10:40:47 -07:00
|
|
|
"strings"
|
2015-02-13 14:40:36 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
UNVISITED = 0
|
|
|
|
|
VISITED = 1
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// An ordinary basic block.
|
|
|
|
|
//
|
2016-03-01 23:21:55 +00:00
|
|
|
// Instructions are threaded together in a doubly-linked list. To iterate in
|
2015-02-13 14:40:36 -05:00
|
|
|
// program order follow the link pointer from the first node and stop after the
|
|
|
|
|
// last node has been visited
|
|
|
|
|
//
|
2016-03-13 10:23:18 +09:00
|
|
|
// for p = bb.first; ; p = p.link {
|
2015-02-13 14:40:36 -05:00
|
|
|
// ...
|
2016-03-13 10:23:18 +09:00
|
|
|
// if p == bb.last {
|
|
|
|
|
// break
|
|
|
|
|
// }
|
2015-02-13 14:40:36 -05:00
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// To iterate in reverse program order by following the opt pointer from the
|
|
|
|
|
// last node
|
|
|
|
|
//
|
2016-03-13 10:23:18 +09:00
|
|
|
// for p = bb.last; p != nil; p = p.opt {
|
2015-02-13 14:40:36 -05:00
|
|
|
// ...
|
|
|
|
|
// }
|
|
|
|
|
type BasicBlock struct {
|
2015-03-05 13:57:36 -05:00
|
|
|
pred []*BasicBlock // predecessors; if none, probably start of CFG
|
|
|
|
|
succ []*BasicBlock // successors; if none, probably ends in return statement
|
|
|
|
|
first *obj.Prog // first instruction in block
|
|
|
|
|
last *obj.Prog // last instruction in block
|
|
|
|
|
rpo int // reverse post-order number (also index in cfg)
|
|
|
|
|
mark int // mark bit for traversals
|
|
|
|
|
lastbitmapindex int // for livenessepilogue
|
|
|
|
|
|
|
|
|
|
// Summary sets of block effects.
|
|
|
|
|
|
|
|
|
|
// Computed during livenessprologue using only the content of
|
|
|
|
|
// individual blocks:
|
|
|
|
|
//
|
|
|
|
|
// uevar: upward exposed variables (used before set in block)
|
|
|
|
|
// varkill: killed variables (set in block)
|
|
|
|
|
// avarinit: addrtaken variables set or used (proof of initialization)
|
2016-04-29 14:17:04 +10:00
|
|
|
uevar bvec
|
|
|
|
|
varkill bvec
|
|
|
|
|
avarinit bvec
|
2015-03-05 13:57:36 -05:00
|
|
|
|
|
|
|
|
// Computed during livenesssolve using control flow information:
|
|
|
|
|
//
|
|
|
|
|
// livein: variables live at block entry
|
|
|
|
|
// liveout: variables live at block exit
|
|
|
|
|
// avarinitany: addrtaken variables possibly initialized at block exit
|
|
|
|
|
// (initialized in block or at exit from any predecessor block)
|
|
|
|
|
// avarinitall: addrtaken variables certainly initialized at block exit
|
|
|
|
|
// (initialized in block or at exit from all predecessor blocks)
|
2016-04-29 14:17:04 +10:00
|
|
|
livein bvec
|
|
|
|
|
liveout bvec
|
|
|
|
|
avarinitany bvec
|
|
|
|
|
avarinitall bvec
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A collection of global state used by liveness analysis.
|
|
|
|
|
type Liveness struct {
|
2015-03-05 13:57:36 -05:00
|
|
|
fn *Node
|
|
|
|
|
ptxt *obj.Prog
|
|
|
|
|
vars []*Node
|
|
|
|
|
cfg []*BasicBlock
|
|
|
|
|
|
|
|
|
|
// An array with a bit vector for each safe point tracking live pointers
|
|
|
|
|
// in the arguments and locals area, indexed by bb.rpo.
|
2016-04-29 14:17:04 +10:00
|
|
|
argslivepointers []bvec
|
|
|
|
|
livepointers []bvec
|
2017-01-14 23:43:26 -08:00
|
|
|
|
|
|
|
|
cache progeffectscache
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type progeffectscache struct {
|
|
|
|
|
tailuevar []int32
|
|
|
|
|
retuevar []int32
|
|
|
|
|
textvarkill []int32
|
|
|
|
|
textavarinit []int32
|
|
|
|
|
uevar [3]int32
|
|
|
|
|
varkill [3]int32
|
|
|
|
|
avarinit [3]int32
|
|
|
|
|
initialized bool
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-09-15 01:42:58 -07:00
|
|
|
// ProgInfo holds information about the instruction for use
|
|
|
|
|
// by clients such as the compiler. The exact meaning of this
|
|
|
|
|
// data is up to the client and is not interpreted by the cmd/internal/obj/... packages.
|
|
|
|
|
type ProgInfo struct {
|
|
|
|
|
_ struct{} // to prevent unkeyed literals. Trailing zero-sized field will take space.
|
|
|
|
|
Flags uint32 // flag bits
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Constructs a new basic block containing a single instruction.
|
|
|
|
|
func newblock(prog *obj.Prog) *BasicBlock {
|
|
|
|
|
if prog == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("newblock: prog cannot be nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-27 19:53:50 -07:00
|
|
|
// type block allows us to allocate a BasicBlock
|
|
|
|
|
// and its pred/succ slice together.
|
|
|
|
|
type block struct {
|
|
|
|
|
result BasicBlock
|
|
|
|
|
pred [2]*BasicBlock
|
|
|
|
|
succ [2]*BasicBlock
|
|
|
|
|
}
|
|
|
|
|
b := new(block)
|
|
|
|
|
|
|
|
|
|
result := &b.result
|
2015-02-13 14:40:36 -05:00
|
|
|
result.rpo = -1
|
|
|
|
|
result.mark = UNVISITED
|
|
|
|
|
result.first = prog
|
|
|
|
|
result.last = prog
|
2016-10-27 19:53:50 -07:00
|
|
|
result.pred = b.pred[:0]
|
|
|
|
|
result.succ = b.succ[:0]
|
2015-02-13 14:40:36 -05:00
|
|
|
return result
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Adds an edge between two basic blocks by making from a predecessor of to and
|
|
|
|
|
// to a successor of from.
|
|
|
|
|
func addedge(from *BasicBlock, to *BasicBlock) {
|
|
|
|
|
if from == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("addedge: from is nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if to == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("addedge: to is nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
from.succ = append(from.succ, to)
|
|
|
|
|
to.pred = append(to.pred, from)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Inserts prev before curr in the instruction
|
2016-03-01 23:21:55 +00:00
|
|
|
// stream. Any control flow, such as branches or fall-throughs, that target the
|
2015-02-13 14:40:36 -05:00
|
|
|
// existing instruction are adjusted to target the new instruction.
|
|
|
|
|
func splicebefore(lv *Liveness, bb *BasicBlock, prev *obj.Prog, curr *obj.Prog) {
|
|
|
|
|
// There may be other instructions pointing at curr,
|
|
|
|
|
// and we want them to now point at prev. Instead of
|
|
|
|
|
// trying to find all such instructions, swap the contents
|
|
|
|
|
// so that the problem becomes inserting next after curr.
|
|
|
|
|
// The "opt" field is the backward link in the linked list.
|
|
|
|
|
|
|
|
|
|
// Overwrite curr's data with prev, but keep the list links.
|
2015-02-23 16:07:24 -05:00
|
|
|
tmp := *curr
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
*curr = *prev
|
|
|
|
|
curr.Opt = tmp.Opt
|
|
|
|
|
curr.Link = tmp.Link
|
|
|
|
|
|
|
|
|
|
// Overwrite prev (now next) with curr's old data.
|
2015-02-23 16:07:24 -05:00
|
|
|
next := prev
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
*next = tmp
|
|
|
|
|
next.Opt = nil
|
|
|
|
|
next.Link = nil
|
|
|
|
|
|
|
|
|
|
// Now insert next after curr.
|
|
|
|
|
next.Link = curr.Link
|
|
|
|
|
|
|
|
|
|
next.Opt = curr
|
|
|
|
|
curr.Link = next
|
|
|
|
|
if next.Link != nil && next.Link.Opt == curr {
|
|
|
|
|
next.Link.Opt = next
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if bb.last == curr {
|
|
|
|
|
bb.last = next
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A pretty printer for basic blocks.
|
|
|
|
|
func printblock(bb *BasicBlock) {
|
|
|
|
|
fmt.Printf("basic block %d\n", bb.rpo)
|
|
|
|
|
fmt.Printf("\tpred:")
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, pred := range bb.pred {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf(" %d", pred.rpo)
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
fmt.Printf("\tsucc:")
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, succ := range bb.succ {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf(" %d", succ.rpo)
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
fmt.Printf("\tprog:\n")
|
2015-02-23 16:07:24 -05:00
|
|
|
for prog := bb.first; ; prog = prog.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\t\t%v\n", prog)
|
|
|
|
|
if prog == bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Iterates over a basic block applying a callback to each instruction. There
|
|
|
|
|
// are two criteria for termination. If the end of basic block is reached a
|
|
|
|
|
// value of zero is returned. If the callback returns a non-zero value, the
|
2015-02-13 14:40:36 -05:00
|
|
|
// iteration is stopped and the value of the callback is returned.
|
2015-02-17 22:13:49 -05:00
|
|
|
func blockany(bb *BasicBlock, f func(*obj.Prog) bool) bool {
|
|
|
|
|
for p := bb.last; p != nil; p = p.Opt.(*obj.Prog) {
|
|
|
|
|
if f(p) {
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
// livenessShouldTrack reports whether the liveness analysis
|
|
|
|
|
// should track the variable n.
|
|
|
|
|
// We don't care about variables that have no pointers,
|
|
|
|
|
// nor do we care about non-local variables,
|
|
|
|
|
// nor do we care about empty structs (handled by the pointer check),
|
|
|
|
|
// nor do we care about the fake PAUTOHEAP variables.
|
|
|
|
|
func livenessShouldTrack(n *Node) bool {
|
|
|
|
|
return n.Op == ONAME && (n.Class == PAUTO || n.Class == PPARAM || n.Class == PPARAMOUT) && haspointers(n.Type)
|
|
|
|
|
}
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
// getvariables returns the list of on-stack variables that we need to track.
|
|
|
|
|
func getvariables(fn *Node) []*Node {
|
|
|
|
|
var vars []*Node
|
|
|
|
|
for _, n := range fn.Func.Dcl {
|
|
|
|
|
if n.Op == ONAME {
|
2015-02-13 14:40:36 -05:00
|
|
|
// The Node.opt field is available for use by optimization passes.
|
2016-05-25 10:01:58 -04:00
|
|
|
// We use it to hold the index of the node in the variables array
|
|
|
|
|
// (nil means the Node is not in the variables array).
|
2015-02-13 14:40:36 -05:00
|
|
|
// The Node.curfn field is supposed to be set to the current function
|
|
|
|
|
// already, but for some compiler-introduced names it seems not to be,
|
|
|
|
|
// so fix that here.
|
|
|
|
|
// Later, when we want to find the index of a node in the variables list,
|
2016-05-25 10:01:58 -04:00
|
|
|
// we will check that n.Curfn == Curfn and n.Opt() != nil. Then n.Opt().(int32)
|
2015-02-13 14:40:36 -05:00
|
|
|
// is the index in the variables list.
|
2016-05-25 10:01:58 -04:00
|
|
|
n.SetOpt(nil)
|
|
|
|
|
n.Name.Curfn = Curfn
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
if livenessShouldTrack(n) {
|
|
|
|
|
n.SetOpt(int32(len(vars)))
|
|
|
|
|
vars = append(vars, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
return vars
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// A pretty printer for control flow graphs. Takes a slice of *BasicBlocks.
|
2015-02-13 14:40:36 -05:00
|
|
|
func printcfg(cfg []*BasicBlock) {
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
printblock(bb)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Assigns a reverse post order number to each connected basic block using the
|
2016-03-01 23:21:55 +00:00
|
|
|
// standard algorithm. Unconnected blocks will not be affected.
|
2015-02-13 14:40:36 -05:00
|
|
|
func reversepostorder(root *BasicBlock, rpo *int32) {
|
|
|
|
|
root.mark = VISITED
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range root.succ {
|
2015-02-13 14:40:36 -05:00
|
|
|
if bb.mark == UNVISITED {
|
|
|
|
|
reversepostorder(bb, rpo)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*rpo -= 1
|
|
|
|
|
root.rpo = int(*rpo)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Comparison predicate used for sorting basic blocks by their rpo in ascending
|
|
|
|
|
// order.
|
|
|
|
|
type blockrpocmp []*BasicBlock
|
|
|
|
|
|
|
|
|
|
func (x blockrpocmp) Len() int { return len(x) }
|
|
|
|
|
func (x blockrpocmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
|
|
|
func (x blockrpocmp) Less(i, j int) bool { return x[i].rpo < x[j].rpo }
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// A pattern matcher for call instructions. Returns true when the instruction
|
2015-02-13 14:40:36 -05:00
|
|
|
// is a call to a specific package qualified function name.
|
|
|
|
|
func iscall(prog *obj.Prog, name *obj.LSym) bool {
|
|
|
|
|
if prog == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("iscall: prog is nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if name == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("iscall: function name is nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if prog.As != obj.ACALL {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return name == prog.To.Sym
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns true for instructions that call a runtime function implementing a
|
|
|
|
|
// select communication clause.
|
|
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
var selectNames [4]*obj.LSym
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func isselectcommcasecall(prog *obj.Prog) bool {
|
2015-03-02 21:25:33 -05:00
|
|
|
if selectNames[0] == nil {
|
|
|
|
|
selectNames[0] = Linksym(Pkglookup("selectsend", Runtimepkg))
|
|
|
|
|
selectNames[1] = Linksym(Pkglookup("selectrecv", Runtimepkg))
|
|
|
|
|
selectNames[2] = Linksym(Pkglookup("selectrecv2", Runtimepkg))
|
|
|
|
|
selectNames[3] = Linksym(Pkglookup("selectdefault", Runtimepkg))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, name := range selectNames {
|
|
|
|
|
if iscall(prog, name) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns true for call instructions that target runtime·newselect.
|
|
|
|
|
|
|
|
|
|
var isnewselect_sym *obj.LSym
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func isnewselect(prog *obj.Prog) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if isnewselect_sym == nil {
|
|
|
|
|
isnewselect_sym = Linksym(Pkglookup("newselect", Runtimepkg))
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return iscall(prog, isnewselect_sym)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns true for call instructions that target runtime·selectgo.
|
|
|
|
|
|
|
|
|
|
var isselectgocall_sym *obj.LSym
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func isselectgocall(prog *obj.Prog) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if isselectgocall_sym == nil {
|
|
|
|
|
isselectgocall_sym = Linksym(Pkglookup("selectgo", Runtimepkg))
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return iscall(prog, isselectgocall_sym)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var isdeferreturn_sym *obj.LSym
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func isdeferreturn(prog *obj.Prog) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if isdeferreturn_sym == nil {
|
|
|
|
|
isdeferreturn_sym = Linksym(Pkglookup("deferreturn", Runtimepkg))
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return iscall(prog, isdeferreturn_sym)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Walk backwards from a runtime·selectgo call up to its immediately dominating
|
2016-03-01 23:21:55 +00:00
|
|
|
// runtime·newselect call. Any successor nodes of communication clause nodes
|
|
|
|
|
// are implicit successors of the runtime·selectgo call node. The goal of this
|
2015-02-13 14:40:36 -05:00
|
|
|
// analysis is to add these missing edges to complete the control flow graph.
|
|
|
|
|
func addselectgosucc(selectgo *BasicBlock) {
|
2015-02-23 16:07:24 -05:00
|
|
|
pred := selectgo
|
2015-02-13 14:40:36 -05:00
|
|
|
for {
|
|
|
|
|
if len(pred.pred) == 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("selectgo does not have a newselect")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
pred = pred.pred[0]
|
2015-02-17 22:13:49 -05:00
|
|
|
if blockany(pred, isselectcommcasecall) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// A select comm case block should have exactly one
|
|
|
|
|
// successor.
|
|
|
|
|
if len(pred.succ) != 1 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("select comm case has too many successors")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
succ := pred.succ[0]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Its successor should have exactly two successors.
|
|
|
|
|
// The drop through should flow to the selectgo block
|
|
|
|
|
// and the branch should lead to the select case
|
|
|
|
|
// statements block.
|
|
|
|
|
if len(succ.succ) != 2 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("select comm case successor has too many successors")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add the block as a successor of the selectgo block.
|
|
|
|
|
addedge(selectgo, succ)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if blockany(pred, isnewselect) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Reached the matching newselect.
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// The entry point for the missing selectgo control flow algorithm. Takes a
|
|
|
|
|
// slice of *BasicBlocks containing selectgo calls.
|
2015-02-13 14:40:36 -05:00
|
|
|
func fixselectgo(selectgo []*BasicBlock) {
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range selectgo {
|
2015-02-13 14:40:36 -05:00
|
|
|
addselectgosucc(bb)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Constructs a control flow graph from a sequence of instructions. This
|
2015-02-13 14:40:36 -05:00
|
|
|
// procedure is complicated by various sources of implicit control flow that are
|
2016-03-13 10:23:18 +09:00
|
|
|
// not accounted for using the standard cfg construction algorithm. Returns a
|
|
|
|
|
// slice of *BasicBlocks in control flow graph form (basic blocks ordered by
|
2015-02-13 14:40:36 -05:00
|
|
|
// their RPO number).
|
|
|
|
|
func newcfg(firstp *obj.Prog) []*BasicBlock {
|
2016-03-01 23:21:55 +00:00
|
|
|
// Reset the opt field of each prog to nil. In the first and second
|
2015-02-13 14:40:36 -05:00
|
|
|
// passes, instructions that are labels temporarily use the opt field to
|
2016-03-01 23:21:55 +00:00
|
|
|
// point to their basic block. In the third pass, the opt field reset
|
2015-02-13 14:40:36 -05:00
|
|
|
// to point to the predecessor of an instruction in its basic block.
|
2015-02-23 16:07:24 -05:00
|
|
|
for p := firstp; p != nil; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
p.Opt = nil
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// Allocate a slice to remember where we have seen selectgo calls.
|
2015-02-13 14:40:36 -05:00
|
|
|
// These blocks will be revisited to add successor control flow edges.
|
2016-03-13 10:23:18 +09:00
|
|
|
var selectgo []*BasicBlock
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Loop through all instructions identifying branch targets
|
|
|
|
|
// and fall-throughs and allocate basic blocks.
|
2016-03-13 10:23:18 +09:00
|
|
|
var cfg []*BasicBlock
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
bb := newblock(firstp)
|
2015-02-13 14:40:36 -05:00
|
|
|
cfg = append(cfg, bb)
|
2015-09-08 16:04:37 -07:00
|
|
|
for p := firstp; p != nil && p.As != obj.AEND; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
if p.To.Type == obj.TYPE_BRANCH {
|
2015-03-16 15:54:44 -04:00
|
|
|
if p.To.Val == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("prog branch to nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-03-16 15:54:44 -04:00
|
|
|
if p.To.Val.(*obj.Prog).Opt == nil {
|
|
|
|
|
p.To.Val.(*obj.Prog).Opt = newblock(p.To.Val.(*obj.Prog))
|
|
|
|
|
cfg = append(cfg, p.To.Val.(*obj.Prog).Opt.(*BasicBlock))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if p.As != obj.AJMP && p.Link != nil && p.Link.Opt == nil {
|
|
|
|
|
p.Link.Opt = newblock(p.Link)
|
|
|
|
|
cfg = append(cfg, p.Link.Opt.(*BasicBlock))
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
} else if isselectcommcasecall(p) || isselectgocall(p) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Accommodate implicit selectgo control flow.
|
|
|
|
|
if p.Link.Opt == nil {
|
|
|
|
|
p.Link.Opt = newblock(p.Link)
|
|
|
|
|
cfg = append(cfg, p.Link.Opt.(*BasicBlock))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Loop through all basic blocks maximally growing the list of
|
2016-03-01 23:21:55 +00:00
|
|
|
// contained instructions until a label is reached. Add edges
|
2015-02-13 14:40:36 -05:00
|
|
|
// for branches and fall-through instructions.
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range cfg {
|
2015-09-08 16:04:37 -07:00
|
|
|
for p := bb.last; p != nil && p.As != obj.AEND; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
if p.Opt != nil && p != bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
bb.last = p
|
|
|
|
|
|
|
|
|
|
// Stop before an unreachable RET, to avoid creating
|
|
|
|
|
// unreachable control flow nodes.
|
|
|
|
|
if p.Link != nil && p.Link.As == obj.ARET && p.Link.Mode == 1 {
|
2016-03-01 23:21:55 +00:00
|
|
|
// TODO: remove after SSA is done. SSA does not
|
2015-09-08 16:04:37 -07:00
|
|
|
// generate any unreachable RET instructions.
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Collect basic blocks with selectgo calls.
|
2015-02-17 22:13:49 -05:00
|
|
|
if isselectgocall(p) {
|
2015-02-13 14:40:36 -05:00
|
|
|
selectgo = append(selectgo, bb)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if bb.last.To.Type == obj.TYPE_BRANCH {
|
2015-03-16 15:54:44 -04:00
|
|
|
addedge(bb, bb.last.To.Val.(*obj.Prog).Opt.(*BasicBlock))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if bb.last.Link != nil {
|
|
|
|
|
// Add a fall-through when the instruction is
|
|
|
|
|
// not an unconditional control transfer.
|
|
|
|
|
if bb.last.As != obj.AJMP && bb.last.As != obj.ARET && bb.last.As != obj.AUNDEF {
|
|
|
|
|
addedge(bb, bb.last.Link.Opt.(*BasicBlock))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add back links so the instructions in a basic block can be traversed
|
2016-03-01 23:21:55 +00:00
|
|
|
// backward. This is the final state of the instruction opt field.
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range cfg {
|
|
|
|
|
p := bb.first
|
|
|
|
|
var prev *obj.Prog
|
2015-02-13 14:40:36 -05:00
|
|
|
for {
|
|
|
|
|
p.Opt = prev
|
|
|
|
|
if p == bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
prev = p
|
|
|
|
|
p = p.Link
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add missing successor edges to the selectgo blocks.
|
|
|
|
|
if len(selectgo) != 0 {
|
2016-03-03 15:49:04 -08:00
|
|
|
fixselectgo(selectgo)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find a depth-first order and assign a depth-first number to
|
|
|
|
|
// all basic blocks.
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
bb.mark = UNVISITED
|
|
|
|
|
}
|
|
|
|
|
bb = cfg[0]
|
2015-02-23 16:07:24 -05:00
|
|
|
rpo := int32(len(cfg))
|
2015-02-13 14:40:36 -05:00
|
|
|
reversepostorder(bb, &rpo)
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Sort the basic blocks by their depth first number. The
|
2016-03-13 10:23:18 +09:00
|
|
|
// slice is now a depth-first spanning tree with the first
|
2015-02-13 14:40:36 -05:00
|
|
|
// node being the root.
|
|
|
|
|
sort.Sort(blockrpocmp(cfg))
|
|
|
|
|
|
|
|
|
|
// Unreachable control flow nodes are indicated by a -1 in the rpo
|
2016-03-01 23:21:55 +00:00
|
|
|
// field. If we see these nodes something must have gone wrong in an
|
2015-02-13 14:40:36 -05:00
|
|
|
// upstream compilation phase.
|
2015-03-02 21:25:33 -05:00
|
|
|
bb = cfg[0]
|
2015-02-13 14:40:36 -05:00
|
|
|
if bb.rpo == -1 {
|
|
|
|
|
fmt.Printf("newcfg: unreachable basic block for %v\n", bb.last)
|
|
|
|
|
printcfg(cfg)
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("newcfg: invalid control flow graph")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return cfg
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// Frees a control flow graph (a slice of *BasicBlocks) and all of its leaf
|
2015-02-13 14:40:36 -05:00
|
|
|
// data structures.
|
|
|
|
|
func freecfg(cfg []*BasicBlock) {
|
2015-03-02 21:25:33 -05:00
|
|
|
if len(cfg) > 0 {
|
2015-02-23 16:07:24 -05:00
|
|
|
bb0 := cfg[0]
|
|
|
|
|
for p := bb0.first; p != nil; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
p.Opt = nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns true if the node names a variable that is otherwise uninteresting to
|
|
|
|
|
// the liveness computation.
|
|
|
|
|
func isfunny(n *Node) bool {
|
|
|
|
|
return n.Sym != nil && (n.Sym.Name == ".fp" || n.Sym.Name == ".args")
|
|
|
|
|
}
|
|
|
|
|
|
2017-01-14 23:43:26 -08:00
|
|
|
func (lv *Liveness) initcache() {
|
|
|
|
|
if lv.cache.initialized {
|
|
|
|
|
Fatalf("liveness cache initialized twice")
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
lv.cache.initialized = true
|
|
|
|
|
|
|
|
|
|
for i, node := range lv.vars {
|
|
|
|
|
switch node.Class {
|
|
|
|
|
case PPARAM:
|
|
|
|
|
// A return instruction with a p.to is a tail return, which brings
|
|
|
|
|
// the stack pointer back up (if it ever went down) and then jumps
|
|
|
|
|
// to a new function entirely. That form of instruction must read
|
|
|
|
|
// all the parameters for correctness, and similarly it must not
|
|
|
|
|
// read the out arguments - they won't be set until the new
|
|
|
|
|
// function runs.
|
|
|
|
|
lv.cache.tailuevar = append(lv.cache.tailuevar, int32(i))
|
|
|
|
|
|
|
|
|
|
if node.Addrtaken {
|
|
|
|
|
lv.cache.textavarinit = append(lv.cache.textavarinit, int32(i))
|
|
|
|
|
}
|
|
|
|
|
lv.cache.textvarkill = append(lv.cache.textvarkill, int32(i))
|
|
|
|
|
|
|
|
|
|
case PPARAMOUT:
|
|
|
|
|
// If the result had its address taken, it is being tracked
|
|
|
|
|
// by the avarinit code, which does not use uevar.
|
|
|
|
|
// If we added it to uevar too, we'd not see any kill
|
|
|
|
|
// and decide that the variable was live entry, which it is not.
|
|
|
|
|
// So only use uevar in the non-addrtaken case.
|
|
|
|
|
// The p.to.type == obj.TYPE_NONE limits the bvset to
|
|
|
|
|
// non-tail-call return instructions; see note below for details.
|
|
|
|
|
if !node.Addrtaken {
|
|
|
|
|
lv.cache.retuevar = append(lv.cache.retuevar, int32(i))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Computes the effects of an instruction on a set of
|
2016-03-13 10:23:18 +09:00
|
|
|
// variables. The vars argument is a slice of *Nodes.
|
2015-02-13 14:40:36 -05:00
|
|
|
//
|
|
|
|
|
// The output vectors give bits for variables:
|
|
|
|
|
// uevar - used by this instruction
|
|
|
|
|
// varkill - killed by this instruction
|
|
|
|
|
// for variables without address taken, means variable was set
|
|
|
|
|
// for variables with address taken, means variable was marked dead
|
|
|
|
|
// avarinit - initialized or referred to by this instruction,
|
|
|
|
|
// only for variables with address taken but not escaping to heap
|
|
|
|
|
//
|
|
|
|
|
// The avarinit output serves as a signal that the data has been
|
|
|
|
|
// initialized, because any use of a variable must come after its
|
|
|
|
|
// initialization.
|
2017-01-14 23:43:26 -08:00
|
|
|
func (lv *Liveness) progeffects(prog *obj.Prog) (uevar, varkill, avarinit []int32) {
|
|
|
|
|
if !lv.cache.initialized {
|
|
|
|
|
Fatalf("liveness progeffects cache not initialized")
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-21 18:53:31 -04:00
|
|
|
// A return instruction with a p.to is a tail return, which brings
|
|
|
|
|
// the stack pointer back up (if it ever went down) and then jumps
|
|
|
|
|
// to a new function entirely. That form of instruction must read
|
|
|
|
|
// all the parameters for correctness, and similarly it must not
|
|
|
|
|
// read the out arguments - they won't be set until the new
|
|
|
|
|
// function runs.
|
|
|
|
|
if (prog.As == obj.AJMP || prog.As == obj.ARET) && prog.To.Type == obj.TYPE_MEM && prog.To.Name == obj.NAME_EXTERN {
|
|
|
|
|
// This is a tail call. Ensure the arguments are still alive.
|
|
|
|
|
// See issue 16016.
|
2017-01-14 23:43:26 -08:00
|
|
|
return lv.cache.tailuevar, nil, nil
|
2016-09-21 18:53:31 -04:00
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
if prog.As == obj.ARET {
|
2017-01-14 23:43:26 -08:00
|
|
|
if prog.To.Type == obj.TYPE_NONE {
|
|
|
|
|
return lv.cache.retuevar, nil, nil
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-01-14 23:43:26 -08:00
|
|
|
return nil, nil, nil
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if prog.As == obj.ATEXT {
|
|
|
|
|
// A text instruction marks the entry point to a function and
|
|
|
|
|
// the definition point of all in arguments.
|
2017-01-14 23:43:26 -08:00
|
|
|
return nil, lv.cache.textvarkill, lv.cache.textavarinit
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar = lv.cache.uevar[:0]
|
|
|
|
|
varkill = lv.cache.varkill[:0]
|
|
|
|
|
avarinit = lv.cache.avarinit[:0]
|
|
|
|
|
|
2016-09-15 01:42:58 -07:00
|
|
|
info := Thearch.Proginfo(prog)
|
|
|
|
|
|
|
|
|
|
if info.Flags&(LeftRead|LeftWrite|LeftAddr) != 0 {
|
2015-02-23 16:07:24 -05:00
|
|
|
from := &prog.From
|
2016-05-25 10:01:58 -04:00
|
|
|
if from.Node != nil && from.Sym != nil {
|
|
|
|
|
n := from.Node.(*Node)
|
2017-01-14 23:43:26 -08:00
|
|
|
if pos := liveIndex(n, lv.vars); pos >= 0 {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if n.Addrtaken {
|
2017-01-14 23:43:26 -08:00
|
|
|
avarinit = append(avarinit, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-09-15 01:42:58 -07:00
|
|
|
if info.Flags&(LeftRead|LeftAddr) != 0 {
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar = append(uevar, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-01-14 23:43:26 -08:00
|
|
|
if info.Flags&LeftWrite != 0 && !isfat(n.Type) {
|
|
|
|
|
varkill = append(varkill, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:02:47 -07:00
|
|
|
if info.Flags&From3Read != 0 {
|
|
|
|
|
from := prog.From3
|
|
|
|
|
if from.Node != nil && from.Sym != nil {
|
|
|
|
|
n := from.Node.(*Node)
|
2017-01-14 23:43:26 -08:00
|
|
|
if pos := liveIndex(n, lv.vars); pos >= 0 {
|
2016-09-16 15:02:47 -07:00
|
|
|
if n.Addrtaken {
|
2017-01-14 23:43:26 -08:00
|
|
|
avarinit = append(avarinit, pos)
|
2016-09-16 15:02:47 -07:00
|
|
|
} else {
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar = append(uevar, pos)
|
2016-09-16 15:02:47 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-15 01:42:58 -07:00
|
|
|
if info.Flags&(RightRead|RightWrite|RightAddr) != 0 {
|
2015-02-23 16:07:24 -05:00
|
|
|
to := &prog.To
|
2016-05-25 10:01:58 -04:00
|
|
|
if to.Node != nil && to.Sym != nil {
|
|
|
|
|
n := to.Node.(*Node)
|
2017-01-14 23:43:26 -08:00
|
|
|
if pos := liveIndex(n, lv.vars); pos >= 0 {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if n.Addrtaken {
|
2015-02-13 14:40:36 -05:00
|
|
|
if prog.As != obj.AVARKILL {
|
2017-01-14 23:43:26 -08:00
|
|
|
avarinit = append(avarinit, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if prog.As == obj.AVARDEF || prog.As == obj.AVARKILL {
|
2017-01-14 23:43:26 -08:00
|
|
|
varkill = append(varkill, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// RightRead is a read, obviously.
|
|
|
|
|
// RightAddr by itself is also implicitly a read.
|
|
|
|
|
//
|
|
|
|
|
// RightAddr|RightWrite means that the address is being taken
|
|
|
|
|
// but only so that the instruction can write to the value.
|
|
|
|
|
// It is not a read. It is equivalent to RightWrite except that
|
|
|
|
|
// having the RightAddr bit set keeps the registerizer from
|
|
|
|
|
// trying to substitute a register for the memory location.
|
2016-09-15 01:42:58 -07:00
|
|
|
if (info.Flags&RightRead != 0) || info.Flags&(RightAddr|RightWrite) == RightAddr {
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar = append(uevar, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-09-15 01:42:58 -07:00
|
|
|
if info.Flags&RightWrite != 0 {
|
2016-09-15 14:34:20 +10:00
|
|
|
if !isfat(n.Type) || prog.As == obj.AVARDEF {
|
2017-01-14 23:43:26 -08:00
|
|
|
varkill = append(varkill, pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-01-14 23:43:26 -08:00
|
|
|
|
|
|
|
|
return uevar, varkill, avarinit
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
// liveIndex returns the index of n in the set of tracked vars.
|
|
|
|
|
// If n is not a tracked var, liveIndex returns -1.
|
|
|
|
|
// If n is not a tracked var but should be tracked, liveIndex crashes.
|
|
|
|
|
func liveIndex(n *Node, vars []*Node) int32 {
|
|
|
|
|
if n.Name.Curfn != Curfn || !livenessShouldTrack(n) {
|
|
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pos, ok := n.Opt().(int32) // index in vars
|
|
|
|
|
if !ok {
|
|
|
|
|
Fatalf("lost track of variable in liveness: %v (%p, %p)", n, n, n.Orig)
|
|
|
|
|
}
|
|
|
|
|
if pos >= int32(len(vars)) || vars[pos] != n {
|
|
|
|
|
Fatalf("bad bookkeeping in liveness: %v (%p, %p)", n, n, n.Orig)
|
|
|
|
|
}
|
|
|
|
|
return pos
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Constructs a new liveness structure used to hold the global state of the
|
2016-03-13 10:23:18 +09:00
|
|
|
// liveness computation. The cfg argument is a slice of *BasicBlocks and the
|
|
|
|
|
// vars argument is a slice of *Nodes.
|
2015-02-13 14:40:36 -05:00
|
|
|
func newliveness(fn *Node, ptxt *obj.Prog, cfg []*BasicBlock, vars []*Node) *Liveness {
|
2016-03-13 10:23:18 +09:00
|
|
|
result := Liveness{
|
|
|
|
|
fn: fn,
|
|
|
|
|
ptxt: ptxt,
|
|
|
|
|
cfg: cfg,
|
|
|
|
|
vars: vars,
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
nblocks := int32(len(cfg))
|
|
|
|
|
nvars := int32(len(vars))
|
2015-03-02 21:25:33 -05:00
|
|
|
bulk := bvbulkalloc(nvars, nblocks*7)
|
|
|
|
|
for _, bb := range cfg {
|
|
|
|
|
bb.uevar = bulk.next()
|
|
|
|
|
bb.varkill = bulk.next()
|
|
|
|
|
bb.livein = bulk.next()
|
|
|
|
|
bb.liveout = bulk.next()
|
|
|
|
|
bb.avarinit = bulk.next()
|
|
|
|
|
bb.avarinitany = bulk.next()
|
|
|
|
|
bb.avarinitall = bulk.next()
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
return &result
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-01-14 23:43:26 -08:00
|
|
|
func (lv *Liveness) printeffects(p *obj.Prog, uevar, varkill, avarinit []int32) {
|
2016-10-04 15:57:24 -07:00
|
|
|
fmt.Printf("effects of %v\n", p)
|
2017-01-14 23:43:26 -08:00
|
|
|
fmt.Println("uevar:", lv.slice2bvec(uevar))
|
|
|
|
|
fmt.Println("varkill:", lv.slice2bvec(varkill))
|
|
|
|
|
fmt.Println("avarinit:", lv.slice2bvec(avarinit))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Pretty print a variable node. Uses Pascal like conventions for pointers and
|
2015-02-13 14:40:36 -05:00
|
|
|
// addresses to avoid confusing the C like conventions used in the node variable
|
|
|
|
|
// names.
|
|
|
|
|
func printnode(node *Node) {
|
2015-02-23 16:07:24 -05:00
|
|
|
p := ""
|
2015-02-13 14:40:36 -05:00
|
|
|
if haspointers(node.Type) {
|
|
|
|
|
p = "^"
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
a := ""
|
2015-03-05 18:20:54 +11:00
|
|
|
if node.Addrtaken {
|
2015-02-13 14:40:36 -05:00
|
|
|
a = "@"
|
|
|
|
|
}
|
2015-04-17 12:03:22 -04:00
|
|
|
fmt.Printf(" %v%s%s", node, p, a)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// Pretty print a list of variables. The vars argument is a slice of *Nodes.
|
2016-04-29 14:17:04 +10:00
|
|
|
func printvars(name string, bv bvec, vars []*Node) {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("%s:", name)
|
2015-03-02 21:25:33 -05:00
|
|
|
for i, node := range vars {
|
2016-10-04 15:57:24 -07:00
|
|
|
if bv.Get(int32(i)) {
|
2015-03-02 21:25:33 -05:00
|
|
|
printnode(node)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
2017-01-14 23:43:26 -08:00
|
|
|
func (lv *Liveness) slice2bvec(vars []int32) bvec {
|
|
|
|
|
bv := bvalloc(int32(len(lv.vars)))
|
|
|
|
|
for _, id := range vars {
|
|
|
|
|
bv.Set(id)
|
|
|
|
|
}
|
|
|
|
|
return bv
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Prints a basic block annotated with the information computed by liveness
|
|
|
|
|
// analysis.
|
|
|
|
|
func livenessprintblock(lv *Liveness, bb *BasicBlock) {
|
|
|
|
|
fmt.Printf("basic block %d\n", bb.rpo)
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\tpred:")
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, pred := range bb.pred {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf(" %d", pred.rpo)
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\tsucc:")
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, succ := range bb.succ {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf(" %d", succ.rpo)
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
|
2016-03-03 15:49:04 -08:00
|
|
|
printvars("\tuevar", bb.uevar, lv.vars)
|
|
|
|
|
printvars("\tvarkill", bb.varkill, lv.vars)
|
|
|
|
|
printvars("\tlivein", bb.livein, lv.vars)
|
|
|
|
|
printvars("\tliveout", bb.liveout, lv.vars)
|
|
|
|
|
printvars("\tavarinit", bb.avarinit, lv.vars)
|
|
|
|
|
printvars("\tavarinitany", bb.avarinitany, lv.vars)
|
|
|
|
|
printvars("\tavarinitall", bb.avarinitall, lv.vars)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
fmt.Printf("\tprog:\n")
|
2015-02-23 16:07:24 -05:00
|
|
|
for prog := bb.first; ; prog = prog.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\t\t%v", prog)
|
|
|
|
|
if prog.As == obj.APCDATA && prog.From.Offset == obj.PCDATA_StackMapIndex {
|
2015-03-02 21:25:33 -05:00
|
|
|
pos := int32(prog.To.Offset)
|
|
|
|
|
live := lv.livepointers[pos]
|
2016-10-04 15:57:24 -07:00
|
|
|
fmt.Printf(" %s", live.String())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
if prog == bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Prints a control flow graph annotated with any information computed by
|
|
|
|
|
// liveness analysis.
|
|
|
|
|
func livenessprintcfg(lv *Liveness) {
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range lv.cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
livenessprintblock(lv, bb)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkauto(fn *Node, p *obj.Prog, n *Node) {
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
|
|
|
|
if ln.Op == ONAME && ln.Class == PAUTO && ln == n {
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n == nil {
|
2015-04-17 12:03:22 -04:00
|
|
|
fmt.Printf("%v: checkauto %v: nil node in %v\n", p.Line(), Curfn, p)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
fmt.Printf("checkauto %v: %v (%p; class=%d) not found in %p %v\n", funcSym(Curfn), n, n, n.Class, p, p)
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
|
|
|
|
fmt.Printf("\t%v (%p; class=%d)\n", ln, ln, ln.Class)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("checkauto: invariant lost")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkparam(fn *Node, p *obj.Prog, n *Node) {
|
|
|
|
|
if isfunny(n) {
|
|
|
|
|
return
|
|
|
|
|
}
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, a := range fn.Func.Dcl {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if a.Op == ONAME && (a.Class == PPARAM || a.Class == PPARAMOUT) && a == n {
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-17 12:03:22 -04:00
|
|
|
fmt.Printf("checkparam %v: %v (%p; class=%d) not found in %v\n", Curfn, n, n, n.Class, p)
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
|
|
|
|
fmt.Printf("\t%v (%p; class=%d)\n", ln, ln, ln.Class)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("checkparam: invariant lost")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkprog(fn *Node, p *obj.Prog) {
|
|
|
|
|
if p.From.Name == obj.NAME_AUTO {
|
|
|
|
|
checkauto(fn, p, p.From.Node.(*Node))
|
|
|
|
|
}
|
|
|
|
|
if p.From.Name == obj.NAME_PARAM {
|
|
|
|
|
checkparam(fn, p, p.From.Node.(*Node))
|
|
|
|
|
}
|
|
|
|
|
if p.To.Name == obj.NAME_AUTO {
|
|
|
|
|
checkauto(fn, p, p.To.Node.(*Node))
|
|
|
|
|
}
|
|
|
|
|
if p.To.Name == obj.NAME_PARAM {
|
|
|
|
|
checkparam(fn, p, p.To.Node.(*Node))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Check instruction invariants. We assume that the nodes corresponding to the
|
2015-02-13 14:40:36 -05:00
|
|
|
// sources and destinations of memory operations will be declared in the
|
2016-03-01 23:21:55 +00:00
|
|
|
// function. This is not strictly true, as is the case for the so-called funny
|
|
|
|
|
// nodes and there are special cases to skip over that stuff. The analysis will
|
2015-02-13 14:40:36 -05:00
|
|
|
// fail if this invariant blindly changes.
|
|
|
|
|
func checkptxt(fn *Node, firstp *obj.Prog) {
|
|
|
|
|
if debuglive == 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
for p := firstp; p != nil; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
if false {
|
|
|
|
|
fmt.Printf("analyzing '%v'\n", p)
|
|
|
|
|
}
|
2017-02-06 17:06:02 -08:00
|
|
|
checkprog(fn, p)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NOTE: The bitmap for a specific type t should be cached in t after the first run
|
|
|
|
|
// and then simply copied into bv at the correct offset on future calls with
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, onebitwalktype1
|
2015-02-13 14:40:36 -05:00
|
|
|
// accounts for 40% of the 6g execution time.
|
2016-04-29 14:17:04 +10:00
|
|
|
func onebitwalktype1(t *Type, xoffset *int64, bv bvec) {
|
2015-02-13 14:40:36 -05:00
|
|
|
if t.Align > 0 && *xoffset&int64(t.Align-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid initial alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch t.Etype {
|
|
|
|
|
case TINT8,
|
|
|
|
|
TUINT8,
|
|
|
|
|
TINT16,
|
|
|
|
|
TUINT16,
|
|
|
|
|
TINT32,
|
|
|
|
|
TUINT32,
|
|
|
|
|
TINT64,
|
|
|
|
|
TUINT64,
|
|
|
|
|
TINT,
|
|
|
|
|
TUINT,
|
|
|
|
|
TUINTPTR,
|
|
|
|
|
TBOOL,
|
|
|
|
|
TFLOAT32,
|
|
|
|
|
TFLOAT64,
|
|
|
|
|
TCOMPLEX64,
|
|
|
|
|
TCOMPLEX128:
|
|
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
|
|
|
|
case TPTR32,
|
|
|
|
|
TPTR64,
|
|
|
|
|
TUNSAFEPTR,
|
|
|
|
|
TFUNC,
|
|
|
|
|
TCHAN,
|
|
|
|
|
TMAP:
|
|
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
|
|
|
|
case TSTRING:
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
// struct { byte *str; intgo len; }
|
2015-02-13 14:40:36 -05:00
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) //pointer in first slot
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
|
|
|
|
case TINTER:
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
// struct { Itab *tab; void *data; }
|
|
|
|
|
// or, when isnilinter(t)==true:
|
|
|
|
|
// struct { Type *type; void *data; }
|
2015-02-13 14:40:36 -05:00
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer in first slot
|
|
|
|
|
bv.Set(int32(*xoffset/int64(Widthptr) + 1)) // pointer in second slot
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
2016-04-18 14:02:08 -07:00
|
|
|
case TSLICE:
|
|
|
|
|
// struct { byte *array; uintgo len; uintgo cap; }
|
|
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
|
|
|
|
Fatalf("onebitwalktype1: invalid TARRAY alignment, %v", t)
|
|
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer in first slot (BitsPointer)
|
2016-04-18 14:02:08 -07:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
case TARRAY:
|
2016-04-18 14:02:08 -07:00
|
|
|
for i := int64(0); i < t.NumElem(); i++ {
|
|
|
|
|
onebitwalktype1(t.Elem(), xoffset, bv)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case TSTRUCT:
|
2016-03-13 10:23:18 +09:00
|
|
|
var o int64
|
2016-03-17 01:32:18 -07:00
|
|
|
for _, t1 := range t.Fields().Slice() {
|
2016-03-28 09:40:53 -07:00
|
|
|
fieldoffset := t1.Offset
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += fieldoffset - o
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwalktype1(t1.Type, xoffset, bv)
|
2015-02-13 14:40:36 -05:00
|
|
|
o = fieldoffset + t1.Type.Width
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*xoffset += t.Width - o
|
|
|
|
|
|
|
|
|
|
default:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: unexpected type, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns the number of words of local variables.
|
|
|
|
|
func localswords() int32 {
|
|
|
|
|
return int32(stkptrsize / int64(Widthptr))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns the number of words of in and out arguments.
|
|
|
|
|
func argswords() int32 {
|
2016-03-28 14:31:57 -07:00
|
|
|
return int32(Curfn.Type.ArgWidth() / int64(Widthptr))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Generates live pointer value maps for arguments and local variables. The
|
|
|
|
|
// this argument and the in arguments are always assumed live. The vars
|
2016-03-13 10:23:18 +09:00
|
|
|
// argument is a slice of *Nodes.
|
2016-04-29 14:17:04 +10:00
|
|
|
func onebitlivepointermap(lv *Liveness, liveout bvec, vars []*Node, args bvec, locals bvec) {
|
2015-02-13 14:40:36 -05:00
|
|
|
var xoffset int64
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
for i := int32(0); ; i++ {
|
2016-10-04 15:57:24 -07:00
|
|
|
i = liveout.Next(i)
|
2015-02-17 22:13:49 -05:00
|
|
|
if i < 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
node := vars[i]
|
2015-02-13 14:40:36 -05:00
|
|
|
switch node.Class {
|
|
|
|
|
case PAUTO:
|
|
|
|
|
xoffset = node.Xoffset + stkptrsize
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwalktype1(node.Type, &xoffset, locals)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case PPARAM, PPARAMOUT:
|
2015-02-13 14:40:36 -05:00
|
|
|
xoffset = node.Xoffset
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwalktype1(node.Type, &xoffset, args)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Construct a disembodied instruction.
|
2016-03-07 18:00:08 -08:00
|
|
|
func unlinkedprog(as obj.As) *obj.Prog {
|
2015-02-23 16:07:24 -05:00
|
|
|
p := Ctxt.NewProg()
|
2015-02-13 14:40:36 -05:00
|
|
|
Clearp(p)
|
2016-03-07 18:00:08 -08:00
|
|
|
p.As = as
|
2015-02-13 14:40:36 -05:00
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Construct a new PCDATA instruction associated with and for the purposes of
|
|
|
|
|
// covering an existing instruction.
|
|
|
|
|
func newpcdataprog(prog *obj.Prog, index int32) *obj.Prog {
|
2015-02-23 16:07:24 -05:00
|
|
|
pcdata := unlinkedprog(obj.APCDATA)
|
2016-12-09 14:30:40 -05:00
|
|
|
pcdata.Pos = prog.Pos
|
2016-03-25 17:19:59 -07:00
|
|
|
pcdata.From.Type = obj.TYPE_CONST
|
|
|
|
|
pcdata.From.Offset = obj.PCDATA_StackMapIndex
|
|
|
|
|
pcdata.To.Type = obj.TYPE_CONST
|
|
|
|
|
pcdata.To.Offset = int64(index)
|
2015-02-13 14:40:36 -05:00
|
|
|
return pcdata
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns true for instructions that are safe points that must be annotated
|
|
|
|
|
// with liveness information.
|
2015-02-17 22:13:49 -05:00
|
|
|
func issafepoint(prog *obj.Prog) bool {
|
|
|
|
|
return prog.As == obj.ATEXT || prog.As == obj.ACALL
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Initializes the sets for solving the live variables. Visits all the
|
2015-02-13 14:40:36 -05:00
|
|
|
// instructions in each basic block to summarizes the information at each basic
|
|
|
|
|
// block
|
|
|
|
|
func livenessprologue(lv *Liveness) {
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.initcache()
|
|
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range lv.cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Walk the block instructions backward and update the block
|
|
|
|
|
// effects with the each prog effects.
|
2015-03-02 21:25:33 -05:00
|
|
|
for p := bb.last; p != nil; p = p.Opt.(*obj.Prog) {
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar, varkill, _ := lv.progeffects(p)
|
2015-02-13 14:40:36 -05:00
|
|
|
if debuglive >= 3 {
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.printeffects(p, uevar, varkill, nil)
|
|
|
|
|
}
|
|
|
|
|
for _, pos := range varkill {
|
|
|
|
|
bb.varkill.Set(pos)
|
|
|
|
|
bb.uevar.Unset(pos)
|
|
|
|
|
}
|
|
|
|
|
for _, pos := range uevar {
|
|
|
|
|
bb.uevar.Set(pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Walk the block instructions forward to update avarinit bits.
|
|
|
|
|
// avarinit describes the effect at the end of the block, not the beginning.
|
2015-03-02 21:25:33 -05:00
|
|
|
for p := bb.first; ; p = p.Link {
|
2017-01-14 23:43:26 -08:00
|
|
|
_, varkill, avarinit := lv.progeffects(p)
|
2015-02-13 14:40:36 -05:00
|
|
|
if debuglive >= 3 {
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.printeffects(p, nil, varkill, avarinit)
|
|
|
|
|
}
|
|
|
|
|
for _, pos := range varkill {
|
|
|
|
|
bb.avarinit.Unset(pos)
|
|
|
|
|
}
|
|
|
|
|
for _, pos := range avarinit {
|
|
|
|
|
bb.avarinit.Set(pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if p == bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Solve the liveness dataflow equations.
|
|
|
|
|
func livenesssolve(lv *Liveness) {
|
|
|
|
|
// These temporary bitvectors exist to avoid successive allocations and
|
|
|
|
|
// frees within the loop.
|
2015-02-23 16:07:24 -05:00
|
|
|
newlivein := bvalloc(int32(len(lv.vars)))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
newliveout := bvalloc(int32(len(lv.vars)))
|
|
|
|
|
any := bvalloc(int32(len(lv.vars)))
|
|
|
|
|
all := bvalloc(int32(len(lv.vars)))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Push avarinitall, avarinitany forward.
|
|
|
|
|
// avarinitall says the addressed var is initialized along all paths reaching the block exit.
|
|
|
|
|
// avarinitany says the addressed var is initialized along some path reaching the block exit.
|
2015-03-02 21:25:33 -05:00
|
|
|
for i, bb := range lv.cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
if i == 0 {
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.avarinitall.Copy(bb.avarinit)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.avarinitall.Clear()
|
|
|
|
|
bb.avarinitall.Not()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.avarinitany.Copy(bb.avarinit)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-15 17:03:10 +11:00
|
|
|
for change := true; change; {
|
|
|
|
|
change = false
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range lv.cfg {
|
2016-10-04 15:57:24 -07:00
|
|
|
any.Clear()
|
|
|
|
|
all.Clear()
|
2015-03-02 21:25:33 -05:00
|
|
|
for j, pred := range bb.pred {
|
2015-02-13 14:40:36 -05:00
|
|
|
if j == 0 {
|
2016-10-04 15:57:24 -07:00
|
|
|
any.Copy(pred.avarinitany)
|
|
|
|
|
all.Copy(pred.avarinitall)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-10-04 15:57:24 -07:00
|
|
|
any.Or(any, pred.avarinitany)
|
|
|
|
|
all.And(all, pred.avarinitall)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
any.AndNot(any, bb.varkill)
|
|
|
|
|
all.AndNot(all, bb.varkill)
|
|
|
|
|
any.Or(any, bb.avarinit)
|
|
|
|
|
all.Or(all, bb.avarinit)
|
|
|
|
|
if !any.Eq(bb.avarinitany) {
|
2016-03-15 17:03:10 +11:00
|
|
|
change = true
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.avarinitany.Copy(any)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
if !all.Eq(bb.avarinitall) {
|
2016-03-15 17:03:10 +11:00
|
|
|
change = true
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.avarinitall.Copy(all)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Iterate through the blocks in reverse round-robin fashion. A work
|
|
|
|
|
// queue might be slightly faster. As is, the number of iterations is
|
2015-02-13 14:40:36 -05:00
|
|
|
// so low that it hardly seems to be worth the complexity.
|
|
|
|
|
|
2016-03-15 17:03:10 +11:00
|
|
|
for change := true; change; {
|
|
|
|
|
change = false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Walk blocks in the general direction of propagation. This
|
2015-02-13 14:40:36 -05:00
|
|
|
// improves convergence.
|
2015-03-02 21:25:33 -05:00
|
|
|
for i := len(lv.cfg) - 1; i >= 0; i-- {
|
|
|
|
|
bb := lv.cfg[i]
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// A variable is live on output from this block
|
|
|
|
|
// if it is live on input to some successor.
|
|
|
|
|
//
|
|
|
|
|
// out[b] = \bigcup_{s \in succ[b]} in[s]
|
2016-10-04 15:57:24 -07:00
|
|
|
newliveout.Clear()
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, succ := range bb.succ {
|
2016-10-04 15:57:24 -07:00
|
|
|
newliveout.Or(newliveout, succ.livein)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
if !bb.liveout.Eq(newliveout) {
|
2016-03-15 17:03:10 +11:00
|
|
|
change = true
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.liveout.Copy(newliveout)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A variable is live on input to this block
|
|
|
|
|
// if it is live on output from this block and
|
|
|
|
|
// not set by the code in this block.
|
|
|
|
|
//
|
|
|
|
|
// in[b] = uevar[b] \cup (out[b] \setminus varkill[b])
|
2016-10-04 15:57:24 -07:00
|
|
|
newlivein.AndNot(bb.liveout, bb.varkill)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
bb.livein.Or(newlivein, bb.uevar)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This function is slow but it is only used for generating debug prints.
|
|
|
|
|
// Check whether n is marked live in args/locals.
|
2016-04-29 14:17:04 +10:00
|
|
|
func islive(n *Node, args bvec, locals bvec) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Class {
|
2015-04-01 09:38:44 -07:00
|
|
|
case PPARAM, PPARAMOUT:
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
for i := 0; int64(i) < n.Type.Width/int64(Widthptr); i++ {
|
2016-10-04 15:57:24 -07:00
|
|
|
if args.Get(int32(n.Xoffset/int64(Widthptr) + int64(i))) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case PAUTO:
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
for i := 0; int64(i) < n.Type.Width/int64(Widthptr); i++ {
|
2016-10-04 15:57:24 -07:00
|
|
|
if locals.Get(int32((n.Xoffset+stkptrsize)/int64(Widthptr) + int64(i))) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Visits all instructions in a basic block and computes a bit vector of live
|
|
|
|
|
// variables at each safe point locations.
|
|
|
|
|
func livenessepilogue(lv *Liveness) {
|
2015-02-23 16:07:24 -05:00
|
|
|
nvars := int32(len(lv.vars))
|
|
|
|
|
livein := bvalloc(nvars)
|
|
|
|
|
liveout := bvalloc(nvars)
|
|
|
|
|
any := bvalloc(nvars)
|
|
|
|
|
all := bvalloc(nvars)
|
2017-01-30 14:55:12 -08:00
|
|
|
outLive := bvalloc(argswords()) // always-live output params
|
|
|
|
|
outLiveHeap := bvalloc(localswords()) // always-live pointers to heap-allocated copies of output params
|
|
|
|
|
|
|
|
|
|
// If there is a defer (that could recover), then all output
|
|
|
|
|
// parameters are live all the time. In addition, any locals
|
|
|
|
|
// that are pointers to heap-allocated output parameters are
|
|
|
|
|
// also always live (post-deferreturn code needs these
|
|
|
|
|
// pointers to copy values back to the stack).
|
|
|
|
|
// TODO: if the output parameter is heap-allocated, then we
|
|
|
|
|
// don't need to keep the stack copy live?
|
2016-06-18 19:40:57 -07:00
|
|
|
if hasdefer {
|
|
|
|
|
for _, n := range lv.vars {
|
2017-01-30 14:55:12 -08:00
|
|
|
if n.Class == PPARAMOUT {
|
|
|
|
|
if n.IsOutputParamHeapAddr() {
|
|
|
|
|
// Just to be paranoid.
|
|
|
|
|
Fatalf("variable %v both output param and heap output param", n)
|
|
|
|
|
}
|
|
|
|
|
// Needzero not necessary, as the compiler
|
|
|
|
|
// explicitly zeroes output vars at start of fn.
|
|
|
|
|
xoffset := n.Xoffset
|
|
|
|
|
onebitwalktype1(n.Type, &xoffset, outLive)
|
|
|
|
|
}
|
2016-06-18 19:40:57 -07:00
|
|
|
if n.IsOutputParamHeapAddr() {
|
2016-07-01 15:44:12 -07:00
|
|
|
n.Name.Needzero = true
|
2016-06-18 19:40:57 -07:00
|
|
|
xoffset := n.Xoffset + stkptrsize
|
2017-01-30 14:55:12 -08:00
|
|
|
onebitwalktype1(n.Type, &xoffset, outLiveHeap)
|
2016-06-18 19:40:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range lv.cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Compute avarinitany and avarinitall for entry to block.
|
|
|
|
|
// This duplicates information known during livenesssolve
|
|
|
|
|
// but avoids storing two more vectors for each block.
|
2016-10-04 15:57:24 -07:00
|
|
|
any.Clear()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
all.Clear()
|
2016-03-13 10:23:18 +09:00
|
|
|
for j := 0; j < len(bb.pred); j++ {
|
|
|
|
|
pred := bb.pred[j]
|
2015-02-13 14:40:36 -05:00
|
|
|
if j == 0 {
|
2016-10-04 15:57:24 -07:00
|
|
|
any.Copy(pred.avarinitany)
|
|
|
|
|
all.Copy(pred.avarinitall)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-10-04 15:57:24 -07:00
|
|
|
any.Or(any, pred.avarinitany)
|
|
|
|
|
all.And(all, pred.avarinitall)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Walk forward through the basic block instructions and
|
|
|
|
|
// allocate liveness maps for those instructions that need them.
|
|
|
|
|
// Seed the maps with information about the addrtaken variables.
|
2016-03-13 10:23:18 +09:00
|
|
|
for p := bb.first; ; p = p.Link {
|
2017-01-14 23:43:26 -08:00
|
|
|
_, varkill, avarinit := lv.progeffects(p)
|
|
|
|
|
for _, pos := range varkill {
|
|
|
|
|
any.Unset(pos)
|
|
|
|
|
all.Unset(pos)
|
|
|
|
|
}
|
|
|
|
|
for _, pos := range avarinit {
|
|
|
|
|
any.Set(pos)
|
|
|
|
|
all.Set(pos)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if issafepoint(p) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Annotate ambiguously live variables so that they can
|
|
|
|
|
// be zeroed at function entry.
|
|
|
|
|
// livein and liveout are dead here and used as temporaries.
|
2016-10-04 15:57:24 -07:00
|
|
|
livein.Clear()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
liveout.AndNot(any, all)
|
|
|
|
|
if !liveout.IsEmpty() {
|
2016-03-13 10:23:18 +09:00
|
|
|
for pos := int32(0); pos < liveout.n; pos++ {
|
2016-10-04 15:57:24 -07:00
|
|
|
if !liveout.Get(pos) {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
all.Set(pos) // silence future warnings in this block
|
2016-03-13 10:23:18 +09:00
|
|
|
n := lv.vars[pos]
|
2015-05-15 10:02:19 -07:00
|
|
|
if !n.Name.Needzero {
|
|
|
|
|
n.Name.Needzero = true
|
2015-02-13 14:40:36 -05:00
|
|
|
if debuglive >= 1 {
|
2016-12-09 14:30:40 -05:00
|
|
|
Warnl(p.Pos, "%v: %L is ambiguously live", Curfn.Func.Nname, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a bit vector for each class and facet of
|
|
|
|
|
// value we are tracking.
|
|
|
|
|
|
|
|
|
|
// Live stuff first.
|
2016-03-13 10:23:18 +09:00
|
|
|
args := bvalloc(argswords())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
lv.argslivepointers = append(lv.argslivepointers, args)
|
2016-03-13 10:23:18 +09:00
|
|
|
locals := bvalloc(localswords())
|
2015-02-13 14:40:36 -05:00
|
|
|
lv.livepointers = append(lv.livepointers, locals)
|
|
|
|
|
|
|
|
|
|
if debuglive >= 3 {
|
|
|
|
|
fmt.Printf("%v\n", p)
|
|
|
|
|
printvars("avarinitany", any, lv.vars)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Record any values with an "address taken" reaching
|
|
|
|
|
// this code position as live. Must do now instead of below
|
|
|
|
|
// because the any/all calculation requires walking forward
|
|
|
|
|
// over the block (as this loop does), while the liveout
|
|
|
|
|
// requires walking backward (as the next loop does).
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitlivepointermap(lv, any, lv.vars, args, locals)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if p == bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bb.lastbitmapindex = len(lv.livepointers) - 1
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-02 20:34:22 -05:00
|
|
|
var msg []string
|
2016-03-13 10:23:18 +09:00
|
|
|
var nmsg, startmsg int
|
2015-03-02 21:25:33 -05:00
|
|
|
for _, bb := range lv.cfg {
|
2015-05-27 10:42:55 -04:00
|
|
|
if debuglive >= 1 && Curfn.Func.Nname.Sym.Name != "init" && Curfn.Func.Nname.Sym.Name[0] != '.' {
|
2016-03-13 10:23:18 +09:00
|
|
|
nmsg = len(lv.livepointers)
|
2015-02-13 14:40:36 -05:00
|
|
|
startmsg = nmsg
|
|
|
|
|
msg = make([]string, nmsg)
|
2016-03-13 10:23:18 +09:00
|
|
|
for j := 0; j < nmsg; j++ {
|
2015-02-13 14:40:36 -05:00
|
|
|
msg[j] = ""
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// walk backward, emit pcdata and populate the maps
|
2016-03-13 10:23:18 +09:00
|
|
|
pos := int32(bb.lastbitmapindex)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if pos < 0 {
|
|
|
|
|
// the first block we encounter should have the ATEXT so
|
|
|
|
|
// at no point should pos ever be less than zero.
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("livenessepilogue")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
livein.Copy(bb.liveout)
|
2016-03-13 10:23:18 +09:00
|
|
|
var next *obj.Prog
|
|
|
|
|
for p := bb.last; p != nil; p = next {
|
|
|
|
|
next = p.Opt.(*obj.Prog) // splicebefore modifies p.opt
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Propagate liveness information
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar, varkill, _ := lv.progeffects(p)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
liveout.Copy(livein)
|
2017-01-14 23:43:26 -08:00
|
|
|
for _, pos := range varkill {
|
|
|
|
|
livein.Unset(pos)
|
|
|
|
|
}
|
|
|
|
|
for _, pos := range uevar {
|
|
|
|
|
livein.Set(pos)
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
if debuglive >= 3 && issafepoint(p) {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("%v\n", p)
|
2017-01-14 23:43:26 -08:00
|
|
|
printvars("uevar", lv.slice2bvec(uevar), lv.vars)
|
|
|
|
|
printvars("varkill", lv.slice2bvec(varkill), lv.vars)
|
2015-02-13 14:40:36 -05:00
|
|
|
printvars("livein", livein, lv.vars)
|
|
|
|
|
printvars("liveout", liveout, lv.vars)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if issafepoint(p) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Found an interesting instruction, record the
|
|
|
|
|
// corresponding liveness information.
|
|
|
|
|
|
|
|
|
|
// Useful sanity check: on entry to the function,
|
|
|
|
|
// the only things that can possibly be live are the
|
|
|
|
|
// input parameters.
|
|
|
|
|
if p.As == obj.ATEXT {
|
2016-03-13 10:23:18 +09:00
|
|
|
for j := int32(0); j < liveout.n; j++ {
|
2016-10-04 15:57:24 -07:00
|
|
|
if !liveout.Get(j) {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
n := lv.vars[j]
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Class != PPARAM {
|
2016-12-09 14:30:40 -05:00
|
|
|
yyerrorl(p.Pos, "internal error: %v %L recorded as live on entry, p.Pc=%v", Curfn.Func.Nname, n, p.Pc)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Record live pointers.
|
2016-03-13 10:23:18 +09:00
|
|
|
args := lv.argslivepointers[pos]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
locals := lv.livepointers[pos]
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitlivepointermap(lv, liveout, lv.vars, args, locals)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-07-14 13:23:11 -04:00
|
|
|
// Mark pparamout variables (as described above)
|
2015-02-13 14:40:36 -05:00
|
|
|
if p.As == obj.ACALL {
|
2017-01-30 14:55:12 -08:00
|
|
|
args.Or(args, outLive)
|
|
|
|
|
locals.Or(locals, outLiveHeap)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Show live pointer bitmaps.
|
|
|
|
|
// We're interpreting the args and locals bitmap instead of liveout so that we
|
|
|
|
|
// include the bits added by the avarinit logic in the
|
|
|
|
|
// previous loop.
|
|
|
|
|
if msg != nil {
|
2016-03-13 10:23:18 +09:00
|
|
|
fmt_ := fmt.Sprintf("%v: live at ", p.Line())
|
2015-10-28 10:40:47 -07:00
|
|
|
if p.As == obj.ACALL && p.To.Sym != nil {
|
|
|
|
|
name := p.To.Sym.Name
|
|
|
|
|
i := strings.Index(name, ".")
|
|
|
|
|
if i >= 0 {
|
|
|
|
|
name = name[i+1:]
|
|
|
|
|
}
|
|
|
|
|
fmt_ += fmt.Sprintf("call to %s:", name)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else if p.As == obj.ACALL {
|
2015-02-28 20:31:32 +00:00
|
|
|
fmt_ += "indirect call:"
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
|
|
|
|
fmt_ += fmt.Sprintf("entry to %s:", ((p.From.Node).(*Node)).Sym.Name)
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
numlive := 0
|
|
|
|
|
for j := 0; j < len(lv.vars); j++ {
|
|
|
|
|
n := lv.vars[j]
|
2015-02-17 22:13:49 -05:00
|
|
|
if islive(n, args, locals) {
|
2015-04-17 12:03:22 -04:00
|
|
|
fmt_ += fmt.Sprintf(" %v", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
numlive++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-28 20:31:32 +00:00
|
|
|
fmt_ += "\n"
|
2015-02-13 14:40:36 -05:00
|
|
|
if numlive == 0 { // squelch message
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
startmsg--
|
|
|
|
|
msg[startmsg] = fmt_
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Only CALL instructions need a PCDATA annotation.
|
|
|
|
|
// The TEXT instruction annotation is implicit.
|
|
|
|
|
if p.As == obj.ACALL {
|
2015-02-17 22:13:49 -05:00
|
|
|
if isdeferreturn(p) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// runtime.deferreturn modifies its return address to return
|
|
|
|
|
// back to the CALL, not to the subsequent instruction.
|
|
|
|
|
// Because the return comes back one instruction early,
|
|
|
|
|
// the PCDATA must begin one instruction early too.
|
|
|
|
|
// The instruction before a call to deferreturn is always a
|
|
|
|
|
// no-op, to keep PC-specific data unambiguous.
|
cmd/compile, cmd/link, runtime: on ppc64x, maintain the TOC pointer in R2 when compiling PIC
The PowerPC ISA does not have a PC-relative load instruction, which poses
obvious challenges when generating position-independent code. The way the ELFv2
ABI addresses this is to specify that r2 points to a per "module" (shared
library or executable) TOC pointer. Maintaining this pointer requires
cooperation between codegen and the system linker:
* Non-leaf functions leave space on the stack at r1+24 to save the TOC pointer.
* A call to a function that *might* have to go via a PLT stub must be followed
by a nop instruction that the system linker can replace with "ld r1, 24(r1)"
to restore the TOC pointer (only when dynamically linking Go code).
* When calling a function via a function pointer, the address of the function
must be in r12, and the first couple of instructions (the "global entry
point") of the called function use this to derive the address of the TOC
for the module it is in.
* When calling a function that is implemented in the same module, the system
linker adjusts the call to skip over the instructions mentioned above (the
"local entry point"), assuming that r2 is already correctly set.
So this changeset adds the global entry point instructions, sets the metadata so
the system linker knows where the local entry point is, inserts code to save the
TOC pointer at 24(r1), adds a nop after any call not known to be local and copes
with the odd non-local code transfer in the runtime (e.g. the stuff around
jmpdefer). It does not actually compile PIC yet.
Change-Id: I7522e22bdfd2f891745a900c60254fe9e372c854
Reviewed-on: https://go-review.googlesource.com/15967
Reviewed-by: Russ Cox <rsc@golang.org>
2015-10-16 15:42:09 +13:00
|
|
|
prev := p.Opt.(*obj.Prog)
|
2016-04-06 12:01:40 -07:00
|
|
|
if Ctxt.Arch.Family == sys.PPC64 {
|
cmd/compile, cmd/link, runtime: on ppc64x, maintain the TOC pointer in R2 when compiling PIC
The PowerPC ISA does not have a PC-relative load instruction, which poses
obvious challenges when generating position-independent code. The way the ELFv2
ABI addresses this is to specify that r2 points to a per "module" (shared
library or executable) TOC pointer. Maintaining this pointer requires
cooperation between codegen and the system linker:
* Non-leaf functions leave space on the stack at r1+24 to save the TOC pointer.
* A call to a function that *might* have to go via a PLT stub must be followed
by a nop instruction that the system linker can replace with "ld r1, 24(r1)"
to restore the TOC pointer (only when dynamically linking Go code).
* When calling a function via a function pointer, the address of the function
must be in r12, and the first couple of instructions (the "global entry
point") of the called function use this to derive the address of the TOC
for the module it is in.
* When calling a function that is implemented in the same module, the system
linker adjusts the call to skip over the instructions mentioned above (the
"local entry point"), assuming that r2 is already correctly set.
So this changeset adds the global entry point instructions, sets the metadata so
the system linker knows where the local entry point is, inserts code to save the
TOC pointer at 24(r1), adds a nop after any call not known to be local and copes
with the odd non-local code transfer in the runtime (e.g. the stuff around
jmpdefer). It does not actually compile PIC yet.
Change-Id: I7522e22bdfd2f891745a900c60254fe9e372c854
Reviewed-on: https://go-review.googlesource.com/15967
Reviewed-by: Russ Cox <rsc@golang.org>
2015-10-16 15:42:09 +13:00
|
|
|
// On ppc64 there is an additional instruction
|
|
|
|
|
// (another no-op or reload of toc pointer) before
|
|
|
|
|
// the call.
|
|
|
|
|
prev = prev.Opt.(*obj.Prog)
|
|
|
|
|
}
|
|
|
|
|
splicebefore(lv, bb, newpcdataprog(prev, pos), prev)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
|
|
|
|
splicebefore(lv, bb, newpcdataprog(p, pos), p)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pos--
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if msg != nil {
|
2016-03-13 10:23:18 +09:00
|
|
|
for j := startmsg; j < nmsg; j++ {
|
2015-02-13 14:40:36 -05:00
|
|
|
if msg[j] != "" {
|
|
|
|
|
fmt.Printf("%s", msg[j])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
msg = nil
|
|
|
|
|
nmsg = 0
|
|
|
|
|
startmsg = 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-15 15:45:10 +10:00
|
|
|
flusherrors()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FNV-1 hash function constants.
|
|
|
|
|
const (
|
|
|
|
|
H0 = 2166136261
|
|
|
|
|
Hp = 16777619
|
|
|
|
|
)
|
|
|
|
|
|
2016-04-29 14:17:04 +10:00
|
|
|
func hashbitmap(h uint32, bv bvec) uint32 {
|
2015-02-23 16:07:24 -05:00
|
|
|
n := int((bv.n + 31) / 32)
|
|
|
|
|
for i := 0; i < n; i++ {
|
2016-03-13 10:23:18 +09:00
|
|
|
w := bv.b[i]
|
2015-02-13 14:40:36 -05:00
|
|
|
h = (h * Hp) ^ (w & 0xff)
|
|
|
|
|
h = (h * Hp) ^ ((w >> 8) & 0xff)
|
|
|
|
|
h = (h * Hp) ^ ((w >> 16) & 0xff)
|
|
|
|
|
h = (h * Hp) ^ ((w >> 24) & 0xff)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return h
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compact liveness information by coalescing identical per-call-site bitmaps.
|
|
|
|
|
// The merging only happens for a single function, not across the entire binary.
|
|
|
|
|
//
|
|
|
|
|
// There are actually two lists of bitmaps, one list for the local variables and one
|
|
|
|
|
// list for the function arguments. Both lists are indexed by the same PCDATA
|
|
|
|
|
// index, so the corresponding pairs must be considered together when
|
|
|
|
|
// merging duplicates. The argument bitmaps change much less often during
|
|
|
|
|
// function execution than the local variable bitmaps, so it is possible that
|
|
|
|
|
// we could introduce a separate PCDATA index for arguments vs locals and
|
|
|
|
|
// then compact the set of argument bitmaps separately from the set of
|
|
|
|
|
// local variable bitmaps. As of 2014-04-02, doing this to the godoc binary
|
|
|
|
|
// is actually a net loss: we save about 50k of argument bitmaps but the new
|
|
|
|
|
// PCDATA tables cost about 100k. So for now we keep using a single index for
|
|
|
|
|
// both bitmap lists.
|
|
|
|
|
func livenesscompact(lv *Liveness) {
|
|
|
|
|
// Linear probing hash table of bitmaps seen so far.
|
|
|
|
|
// The hash table has 4n entries to keep the linear
|
|
|
|
|
// scan short. An entry of -1 indicates an empty slot.
|
2015-02-23 16:07:24 -05:00
|
|
|
n := len(lv.livepointers)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
tablesize := 4 * n
|
|
|
|
|
table := make([]int, tablesize)
|
2015-02-13 14:40:36 -05:00
|
|
|
for i := range table {
|
|
|
|
|
table[i] = -1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// remap[i] = the new index of the old bit vector #i.
|
2015-02-23 16:07:24 -05:00
|
|
|
remap := make([]int, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
for i := range remap {
|
|
|
|
|
remap[i] = -1
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
uniq := 0 // unique tables found so far
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Consider bit vectors in turn.
|
|
|
|
|
// If new, assign next number using uniq,
|
2016-03-13 10:23:18 +09:00
|
|
|
// record in remap, record in lv.livepointers and lv.argslivepointers
|
2015-02-13 14:40:36 -05:00
|
|
|
// under the new index, and add entry to hash table.
|
|
|
|
|
// If already seen, record earlier index in remap and free bitmaps.
|
2015-02-23 16:07:24 -05:00
|
|
|
for i := 0; i < n; i++ {
|
2016-03-13 10:23:18 +09:00
|
|
|
local := lv.livepointers[i]
|
|
|
|
|
arg := lv.argslivepointers[i]
|
|
|
|
|
h := hashbitmap(hashbitmap(H0, local), arg) % uint32(tablesize)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
for {
|
2016-03-13 10:23:18 +09:00
|
|
|
j := table[h]
|
2015-02-13 14:40:36 -05:00
|
|
|
if j < 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
jlocal := lv.livepointers[j]
|
|
|
|
|
jarg := lv.argslivepointers[j]
|
2016-10-04 15:57:24 -07:00
|
|
|
if local.Eq(jlocal) && arg.Eq(jarg) {
|
2015-02-13 14:40:36 -05:00
|
|
|
remap[i] = j
|
|
|
|
|
goto Next
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h++
|
|
|
|
|
if h == uint32(tablesize) {
|
|
|
|
|
h = 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
table[h] = uniq
|
|
|
|
|
remap[i] = uniq
|
|
|
|
|
lv.livepointers[uniq] = local
|
|
|
|
|
lv.argslivepointers[uniq] = arg
|
|
|
|
|
uniq++
|
|
|
|
|
Next:
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// We've already reordered lv.livepointers[0:uniq]
|
|
|
|
|
// and lv.argslivepointers[0:uniq] and freed the bitmaps
|
2015-02-13 14:40:36 -05:00
|
|
|
// we don't need anymore. Clear the pointers later in the
|
|
|
|
|
// array so that we can tell where the coalesced bitmaps stop
|
|
|
|
|
// and so that we don't double-free when cleaning up.
|
2015-02-23 16:07:24 -05:00
|
|
|
for j := uniq; j < n; j++ {
|
2016-04-29 14:17:04 +10:00
|
|
|
lv.livepointers[j] = bvec{}
|
|
|
|
|
lv.argslivepointers[j] = bvec{}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rewrite PCDATA instructions to use new numbering.
|
2015-02-23 16:07:24 -05:00
|
|
|
for p := lv.ptxt; p != nil; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
if p.As == obj.APCDATA && p.From.Offset == obj.PCDATA_StackMapIndex {
|
2016-03-13 10:23:18 +09:00
|
|
|
i := p.To.Offset
|
2015-02-13 14:40:36 -05:00
|
|
|
if i >= 0 {
|
|
|
|
|
p.To.Offset = int64(remap[i])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-29 14:17:04 +10:00
|
|
|
func printbitset(printed bool, name string, vars []*Node, bits bvec) bool {
|
2016-03-13 10:23:18 +09:00
|
|
|
started := false
|
2015-03-02 21:25:33 -05:00
|
|
|
for i, n := range vars {
|
2016-10-04 15:57:24 -07:00
|
|
|
if !bits.Get(int32(i)) {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
if !started {
|
|
|
|
|
if !printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\t")
|
|
|
|
|
} else {
|
|
|
|
|
fmt.Printf(" ")
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
started = true
|
|
|
|
|
printed = true
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("%s=", name)
|
|
|
|
|
} else {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("%s", n.Sym.Name)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return printed
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Prints the computed liveness information and inputs, for debugging.
|
|
|
|
|
// This format synthesizes the information used during the multiple passes
|
|
|
|
|
// into a single presentation.
|
|
|
|
|
func livenessprintdebug(lv *Liveness) {
|
2015-05-27 10:42:55 -04:00
|
|
|
fmt.Printf("liveness: %s\n", Curfn.Func.Nname.Sym.Name)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
pcdata := 0
|
2015-03-02 21:25:33 -05:00
|
|
|
for i, bb := range lv.cfg {
|
2015-02-13 14:40:36 -05:00
|
|
|
if i > 0 {
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bb#0 pred=1,2 succ=3,4
|
|
|
|
|
fmt.Printf("bb#%d pred=", i)
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
for j := 0; j < len(bb.pred); j++ {
|
2015-02-13 14:40:36 -05:00
|
|
|
if j > 0 {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("%d", (bb.pred[j]).rpo)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf(" succ=")
|
2016-03-13 10:23:18 +09:00
|
|
|
for j := 0; j < len(bb.succ); j++ {
|
2015-02-13 14:40:36 -05:00
|
|
|
if j > 0 {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("%d", (bb.succ[j]).rpo)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
|
|
|
|
|
// initial settings
|
2016-03-13 10:23:18 +09:00
|
|
|
var printed bool
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
printed = printbitset(printed, "uevar", lv.vars, bb.uevar)
|
|
|
|
|
printed = printbitset(printed, "livein", lv.vars, bb.livein)
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// program listing, with individual effects listed
|
2016-03-13 10:23:18 +09:00
|
|
|
for p := bb.first; ; p = p.Link {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("%v\n", p)
|
|
|
|
|
if p.As == obj.APCDATA && p.From.Offset == obj.PCDATA_StackMapIndex {
|
|
|
|
|
pcdata = int(p.To.Offset)
|
|
|
|
|
}
|
2017-01-14 23:43:26 -08:00
|
|
|
uevar, varkill, avarinit := lv.progeffects(p)
|
2016-03-13 10:23:18 +09:00
|
|
|
printed = false
|
2017-01-14 23:43:26 -08:00
|
|
|
printed = printbitset(printed, "uevar", lv.vars, lv.slice2bvec(uevar))
|
|
|
|
|
printed = printbitset(printed, "varkill", lv.vars, lv.slice2bvec(varkill))
|
|
|
|
|
printed = printbitset(printed, "avarinit", lv.vars, lv.slice2bvec(avarinit))
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
if issafepoint(p) {
|
2016-03-13 10:23:18 +09:00
|
|
|
args := lv.argslivepointers[pcdata]
|
|
|
|
|
locals := lv.livepointers[pcdata]
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\tlive=")
|
2016-03-13 10:23:18 +09:00
|
|
|
printed = false
|
|
|
|
|
for j := 0; j < len(lv.vars); j++ {
|
|
|
|
|
n := lv.vars[j]
|
2015-02-17 22:13:49 -05:00
|
|
|
if islive(n, args, locals) {
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
2015-04-17 12:03:22 -04:00
|
|
|
fmt.Printf("%v", n)
|
2016-03-13 10:23:18 +09:00
|
|
|
printed = true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if p == bb.last {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bb bitsets
|
|
|
|
|
fmt.Printf("end\n")
|
|
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
printed = printbitset(printed, "varkill", lv.vars, bb.varkill)
|
|
|
|
|
printed = printbitset(printed, "liveout", lv.vars, bb.liveout)
|
|
|
|
|
printed = printbitset(printed, "avarinit", lv.vars, bb.avarinit)
|
|
|
|
|
printed = printbitset(printed, "avarinitany", lv.vars, bb.avarinitany)
|
|
|
|
|
printed = printbitset(printed, "avarinitall", lv.vars, bb.avarinitall)
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 10:23:18 +09:00
|
|
|
// Dumps a slice of bitmaps to a symbol as a sequence of uint32 values. The
|
2016-03-01 23:21:55 +00:00
|
|
|
// first word dumped is the total number of bitmaps. The second word is the
|
|
|
|
|
// length of the bitmaps. All bitmaps are assumed to be of equal length. The
|
2016-10-11 10:23:14 -07:00
|
|
|
// remaining bytes are the raw bitmaps.
|
2016-04-29 14:17:04 +10:00
|
|
|
func onebitwritesymbol(arr []bvec, sym *Sym) {
|
2016-03-13 10:23:18 +09:00
|
|
|
off := 4 // number of bitmaps, to fill in later
|
|
|
|
|
off = duint32(sym, off, uint32(arr[0].n)) // number of bits in each bitmap
|
2015-02-13 14:40:36 -05:00
|
|
|
var i int
|
2016-03-13 10:23:18 +09:00
|
|
|
for i = 0; i < len(arr); i++ {
|
2015-02-13 14:40:36 -05:00
|
|
|
// bitmap words
|
2016-03-13 10:23:18 +09:00
|
|
|
bv := arr[i]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 21:25:33 -05:00
|
|
|
if bv.b == nil {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-10-11 10:23:14 -07:00
|
|
|
off = dbvec(sym, off, bv)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
duint32(sym, 0, uint32(i)) // number of bitmaps
|
2016-03-28 22:27:36 +13:00
|
|
|
ls := Linksym(sym)
|
|
|
|
|
ls.Name = fmt.Sprintf("gclocals·%x", md5.Sum(ls.P))
|
2016-10-24 23:15:41 +03:00
|
|
|
ls.Set(obj.AttrDuplicateOK, true)
|
2016-04-29 09:02:27 -07:00
|
|
|
sv := obj.SymVer{Name: ls.Name, Version: 0}
|
2016-03-28 22:27:36 +13:00
|
|
|
ls2, ok := Ctxt.Hash[sv]
|
|
|
|
|
if ok {
|
|
|
|
|
sym.Lsym = ls2
|
|
|
|
|
} else {
|
|
|
|
|
Ctxt.Hash[sv] = ls
|
|
|
|
|
ggloblsym(sym, int32(off), obj.RODATA)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func printprog(p *obj.Prog) {
|
|
|
|
|
for p != nil {
|
|
|
|
|
fmt.Printf("%v\n", p)
|
|
|
|
|
p = p.Link
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Entry pointer for liveness analysis. Constructs a complete CFG, solves for
|
2015-02-13 14:40:36 -05:00
|
|
|
// the liveness of pointer variables in the function, and emits a runtime data
|
|
|
|
|
// structure read by the garbage collector.
|
|
|
|
|
func liveness(fn *Node, firstp *obj.Prog, argssym *Sym, livesym *Sym) {
|
|
|
|
|
// Change name to dump debugging information only for a specific function.
|
2015-02-23 16:07:24 -05:00
|
|
|
debugdelta := 0
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-05-27 10:42:55 -04:00
|
|
|
if Curfn.Func.Nname.Sym.Name == "!" {
|
2015-02-13 14:40:36 -05:00
|
|
|
debugdelta = 2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
debuglive += debugdelta
|
|
|
|
|
if debuglive >= 3 {
|
2015-05-27 10:42:55 -04:00
|
|
|
fmt.Printf("liveness: %s\n", Curfn.Func.Nname.Sym.Name)
|
2015-02-13 14:40:36 -05:00
|
|
|
printprog(firstp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
checkptxt(fn, firstp)
|
|
|
|
|
|
|
|
|
|
// Construct the global liveness state.
|
2015-02-23 16:07:24 -05:00
|
|
|
cfg := newcfg(firstp)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if debuglive >= 3 {
|
2016-03-03 15:49:04 -08:00
|
|
|
printcfg(cfg)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
vars := getvariables(fn)
|
|
|
|
|
lv := newliveness(fn, firstp, cfg, vars)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Run the dataflow framework.
|
|
|
|
|
livenessprologue(lv)
|
|
|
|
|
|
|
|
|
|
if debuglive >= 3 {
|
|
|
|
|
livenessprintcfg(lv)
|
|
|
|
|
}
|
|
|
|
|
livenesssolve(lv)
|
|
|
|
|
if debuglive >= 3 {
|
|
|
|
|
livenessprintcfg(lv)
|
|
|
|
|
}
|
|
|
|
|
livenessepilogue(lv)
|
|
|
|
|
if debuglive >= 3 {
|
|
|
|
|
livenessprintcfg(lv)
|
|
|
|
|
}
|
|
|
|
|
livenesscompact(lv)
|
|
|
|
|
|
|
|
|
|
if debuglive >= 2 {
|
|
|
|
|
livenessprintdebug(lv)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit the live pointer map data structures
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwritesymbol(lv.livepointers, livesym)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwritesymbol(lv.argslivepointers, argssym)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Free everything.
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
|
|
|
|
if ln != nil {
|
|
|
|
|
ln.SetOpt(nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-03 15:49:04 -08:00
|
|
|
freecfg(cfg)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
debuglive -= debugdelta
|
|
|
|
|
}
|