2015-02-13 14:40:36 -05:00
|
|
|
// Copyright 2013 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2015-03-05 13:57:36 -05:00
|
|
|
// Garbage collector liveness bitmap generation.
|
|
|
|
|
|
|
|
|
|
// The command line flag -live causes this code to print debug information.
|
|
|
|
|
// The levels are:
|
|
|
|
|
//
|
|
|
|
|
// -live (aka -live=1): print liveness lists as code warnings at safe points
|
|
|
|
|
// -live=2: print an assembly listing with liveness annotations
|
|
|
|
|
//
|
|
|
|
|
// Each level includes the earlier output as well.
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import (
|
2017-03-09 18:32:17 -08:00
|
|
|
"cmd/compile/internal/ssa"
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
"cmd/compile/internal/types"
|
2015-02-13 14:40:36 -05:00
|
|
|
"cmd/internal/obj"
|
2016-06-08 22:02:08 -07:00
|
|
|
"cmd/internal/objabi"
|
|
|
|
|
"cmd/internal/src"
|
2016-03-28 22:27:36 +13:00
|
|
|
"crypto/md5"
|
2016-06-08 22:02:08 -07:00
|
|
|
"crypto/sha1"
|
2015-02-13 14:40:36 -05:00
|
|
|
"fmt"
|
2016-06-08 22:02:08 -07:00
|
|
|
"os"
|
2015-10-28 10:40:47 -07:00
|
|
|
"strings"
|
2015-02-13 14:40:36 -05:00
|
|
|
)
|
|
|
|
|
|
2017-04-14 06:43:01 -07:00
|
|
|
// TODO(mdempsky): Update to reference OpVar{Def,Kill,Live} instead.
|
|
|
|
|
|
|
|
|
|
// VARDEF is an annotation for the liveness analysis, marking a place
|
|
|
|
|
// where a complete initialization (definition) of a variable begins.
|
|
|
|
|
// Since the liveness analysis can see initialization of single-word
|
|
|
|
|
// variables quite easy, gvardef is usually only called for multi-word
|
|
|
|
|
// or 'fat' variables, those satisfying isfat(n->type).
|
|
|
|
|
// However, gvardef is also called when a non-fat variable is initialized
|
|
|
|
|
// via a block move; the only time this happens is when you have
|
|
|
|
|
// return f()
|
|
|
|
|
// for a function with multiple return values exactly matching the return
|
|
|
|
|
// types of the current function.
|
|
|
|
|
//
|
|
|
|
|
// A 'VARDEF x' annotation in the instruction stream tells the liveness
|
|
|
|
|
// analysis to behave as though the variable x is being initialized at that
|
|
|
|
|
// point in the instruction stream. The VARDEF must appear before the
|
|
|
|
|
// actual (multi-instruction) initialization, and it must also appear after
|
|
|
|
|
// any uses of the previous value, if any. For example, if compiling:
|
|
|
|
|
//
|
|
|
|
|
// x = x[1:]
|
|
|
|
|
//
|
|
|
|
|
// it is important to generate code like:
|
|
|
|
|
//
|
|
|
|
|
// base, len, cap = pieces of x[1:]
|
|
|
|
|
// VARDEF x
|
|
|
|
|
// x = {base, len, cap}
|
|
|
|
|
//
|
|
|
|
|
// If instead the generated code looked like:
|
|
|
|
|
//
|
|
|
|
|
// VARDEF x
|
|
|
|
|
// base, len, cap = pieces of x[1:]
|
|
|
|
|
// x = {base, len, cap}
|
|
|
|
|
//
|
|
|
|
|
// then the liveness analysis would decide the previous value of x was
|
|
|
|
|
// unnecessary even though it is about to be used by the x[1:] computation.
|
|
|
|
|
// Similarly, if the generated code looked like:
|
|
|
|
|
//
|
|
|
|
|
// base, len, cap = pieces of x[1:]
|
|
|
|
|
// x = {base, len, cap}
|
|
|
|
|
// VARDEF x
|
|
|
|
|
//
|
|
|
|
|
// then the liveness analysis will not preserve the new value of x, because
|
|
|
|
|
// the VARDEF appears to have "overwritten" it.
|
|
|
|
|
//
|
|
|
|
|
// VARDEF is a bit of a kludge to work around the fact that the instruction
|
|
|
|
|
// stream is working on single-word values but the liveness analysis
|
|
|
|
|
// wants to work on individual variables, which might be multi-word
|
|
|
|
|
// aggregates. It might make sense at some point to look into letting
|
|
|
|
|
// the liveness analysis work on single-word values as well, although
|
|
|
|
|
// there are complications around interface values, slices, and strings,
|
|
|
|
|
// all of which cannot be treated as individual words.
|
|
|
|
|
//
|
|
|
|
|
// VARKILL is the opposite of VARDEF: it marks a value as no longer needed,
|
|
|
|
|
// even if its address has been taken. That is, a VARKILL annotation asserts
|
|
|
|
|
// that its argument is certainly dead, for use when the liveness analysis
|
|
|
|
|
// would not otherwise be able to deduce that fact.
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// BlockEffects summarizes the liveness effects on an SSA block.
|
|
|
|
|
type BlockEffects struct {
|
|
|
|
|
lastbitmapindex int // for livenessepilogue
|
2015-03-05 13:57:36 -05:00
|
|
|
|
|
|
|
|
// Computed during livenessprologue using only the content of
|
|
|
|
|
// individual blocks:
|
|
|
|
|
//
|
|
|
|
|
// uevar: upward exposed variables (used before set in block)
|
|
|
|
|
// varkill: killed variables (set in block)
|
|
|
|
|
// avarinit: addrtaken variables set or used (proof of initialization)
|
2016-04-29 14:17:04 +10:00
|
|
|
uevar bvec
|
|
|
|
|
varkill bvec
|
|
|
|
|
avarinit bvec
|
2015-03-05 13:57:36 -05:00
|
|
|
|
|
|
|
|
// Computed during livenesssolve using control flow information:
|
|
|
|
|
//
|
|
|
|
|
// livein: variables live at block entry
|
|
|
|
|
// liveout: variables live at block exit
|
|
|
|
|
// avarinitany: addrtaken variables possibly initialized at block exit
|
|
|
|
|
// (initialized in block or at exit from any predecessor block)
|
|
|
|
|
// avarinitall: addrtaken variables certainly initialized at block exit
|
|
|
|
|
// (initialized in block or at exit from all predecessor blocks)
|
2016-04-29 14:17:04 +10:00
|
|
|
livein bvec
|
|
|
|
|
liveout bvec
|
|
|
|
|
avarinitany bvec
|
|
|
|
|
avarinitall bvec
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A collection of global state used by liveness analysis.
|
|
|
|
|
type Liveness struct {
|
2017-03-17 09:19:56 -07:00
|
|
|
fn *Node
|
2017-03-09 18:32:17 -08:00
|
|
|
f *ssa.Func
|
2017-03-17 09:19:56 -07:00
|
|
|
vars []*Node
|
|
|
|
|
stkptrsize int64
|
2015-03-05 13:57:36 -05:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
be []BlockEffects
|
|
|
|
|
|
|
|
|
|
// stackMapIndex maps from safe points (i.e., CALLs) to their
|
|
|
|
|
// index within the stack maps.
|
|
|
|
|
stackMapIndex map[*ssa.Value]int
|
|
|
|
|
|
2016-06-08 22:02:08 -07:00
|
|
|
// An array with a bit vector for each safe point tracking live variables.
|
2017-02-24 16:02:31 -08:00
|
|
|
livevars []bvec
|
2017-01-14 23:43:26 -08:00
|
|
|
|
|
|
|
|
cache progeffectscache
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type progeffectscache struct {
|
|
|
|
|
textavarinit []int32
|
2017-03-09 12:15:41 -08:00
|
|
|
retuevar []int32
|
|
|
|
|
tailuevar []int32
|
2017-01-14 23:43:26 -08:00
|
|
|
initialized bool
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
// livenessShouldTrack reports whether the liveness analysis
|
|
|
|
|
// should track the variable n.
|
|
|
|
|
// We don't care about variables that have no pointers,
|
|
|
|
|
// nor do we care about non-local variables,
|
|
|
|
|
// nor do we care about empty structs (handled by the pointer check),
|
|
|
|
|
// nor do we care about the fake PAUTOHEAP variables.
|
|
|
|
|
func livenessShouldTrack(n *Node) bool {
|
2017-04-25 18:14:12 -07:00
|
|
|
return n.Op == ONAME && (n.Class() == PAUTO || n.Class() == PPARAM || n.Class() == PPARAMOUT) && types.Haspointers(n.Type)
|
2016-05-25 10:01:58 -04:00
|
|
|
}
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
// getvariables returns the list of on-stack variables that we need to track.
|
|
|
|
|
func getvariables(fn *Node) []*Node {
|
|
|
|
|
var vars []*Node
|
|
|
|
|
for _, n := range fn.Func.Dcl {
|
|
|
|
|
if n.Op == ONAME {
|
2015-02-13 14:40:36 -05:00
|
|
|
// The Node.opt field is available for use by optimization passes.
|
2016-05-25 10:01:58 -04:00
|
|
|
// We use it to hold the index of the node in the variables array
|
|
|
|
|
// (nil means the Node is not in the variables array).
|
2015-02-13 14:40:36 -05:00
|
|
|
// The Node.curfn field is supposed to be set to the current function
|
|
|
|
|
// already, but for some compiler-introduced names it seems not to be,
|
|
|
|
|
// so fix that here.
|
|
|
|
|
// Later, when we want to find the index of a node in the variables list,
|
2017-03-22 20:28:12 -07:00
|
|
|
// we will check that n.Curfn == lv.fn and n.Opt() != nil. Then n.Opt().(int32)
|
2015-02-13 14:40:36 -05:00
|
|
|
// is the index in the variables list.
|
2016-05-25 10:01:58 -04:00
|
|
|
n.SetOpt(nil)
|
2017-03-22 20:28:12 -07:00
|
|
|
n.Name.Curfn = fn
|
2016-05-25 10:01:58 -04:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
if livenessShouldTrack(n) {
|
|
|
|
|
n.SetOpt(int32(len(vars)))
|
|
|
|
|
vars = append(vars, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
return vars
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-01-14 23:43:26 -08:00
|
|
|
func (lv *Liveness) initcache() {
|
|
|
|
|
if lv.cache.initialized {
|
|
|
|
|
Fatalf("liveness cache initialized twice")
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
lv.cache.initialized = true
|
|
|
|
|
|
|
|
|
|
for i, node := range lv.vars {
|
2017-04-25 18:14:12 -07:00
|
|
|
switch node.Class() {
|
2017-01-14 23:43:26 -08:00
|
|
|
case PPARAM:
|
|
|
|
|
// A return instruction with a p.to is a tail return, which brings
|
|
|
|
|
// the stack pointer back up (if it ever went down) and then jumps
|
|
|
|
|
// to a new function entirely. That form of instruction must read
|
|
|
|
|
// all the parameters for correctness, and similarly it must not
|
|
|
|
|
// read the out arguments - they won't be set until the new
|
|
|
|
|
// function runs.
|
2017-03-09 12:15:41 -08:00
|
|
|
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.cache.tailuevar = append(lv.cache.tailuevar, int32(i))
|
|
|
|
|
|
2017-02-27 19:56:38 +02:00
|
|
|
if node.Addrtaken() {
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.cache.textavarinit = append(lv.cache.textavarinit, int32(i))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case PPARAMOUT:
|
|
|
|
|
// If the result had its address taken, it is being tracked
|
|
|
|
|
// by the avarinit code, which does not use uevar.
|
|
|
|
|
// If we added it to uevar too, we'd not see any kill
|
|
|
|
|
// and decide that the variable was live entry, which it is not.
|
|
|
|
|
// So only use uevar in the non-addrtaken case.
|
|
|
|
|
// The p.to.type == obj.TYPE_NONE limits the bvset to
|
|
|
|
|
// non-tail-call return instructions; see note below for details.
|
2017-02-27 19:56:38 +02:00
|
|
|
if !node.Addrtaken() {
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.cache.retuevar = append(lv.cache.retuevar, int32(i))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// A liveEffect is a set of flags that describe an instruction's
|
|
|
|
|
// liveness effects on a variable.
|
2015-02-13 14:40:36 -05:00
|
|
|
//
|
2017-03-09 18:32:17 -08:00
|
|
|
// The possible flags are:
|
|
|
|
|
// uevar - used by the instruction
|
|
|
|
|
// varkill - killed by the instruction
|
2015-02-13 14:40:36 -05:00
|
|
|
// for variables without address taken, means variable was set
|
|
|
|
|
// for variables with address taken, means variable was marked dead
|
2017-03-09 18:32:17 -08:00
|
|
|
// avarinit - initialized or referred to by the instruction,
|
2015-02-13 14:40:36 -05:00
|
|
|
// only for variables with address taken but not escaping to heap
|
|
|
|
|
//
|
|
|
|
|
// The avarinit output serves as a signal that the data has been
|
|
|
|
|
// initialized, because any use of a variable must come after its
|
|
|
|
|
// initialization.
|
2017-03-09 18:32:17 -08:00
|
|
|
type liveEffect int
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
const (
|
|
|
|
|
uevar liveEffect = 1 << iota
|
|
|
|
|
varkill
|
|
|
|
|
avarinit
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// valueEffects returns the index of a variable in lv.vars and the
|
|
|
|
|
// liveness effects v has on that variable.
|
|
|
|
|
// If v does not affect any tracked variables, it returns -1, 0.
|
|
|
|
|
func (lv *Liveness) valueEffects(v *ssa.Value) (pos int32, effect liveEffect) {
|
|
|
|
|
n, e := affectedNode(v)
|
|
|
|
|
if e == 0 {
|
|
|
|
|
return -1, 0
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-21 10:15:14 -07:00
|
|
|
// AllocFrame has dropped unused variables from
|
|
|
|
|
// lv.fn.Func.Dcl, but they might still be referenced by
|
|
|
|
|
// OpVarFoo pseudo-ops. Ignore them to prevent "lost track of
|
|
|
|
|
// variable" ICEs (issue 19632).
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpVarDef, ssa.OpVarKill, ssa.OpVarLive, ssa.OpKeepAlive:
|
|
|
|
|
if !n.Used() {
|
|
|
|
|
return -1, 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-22 20:28:12 -07:00
|
|
|
pos = lv.liveIndex(n)
|
2017-03-09 18:32:17 -08:00
|
|
|
if pos < 0 {
|
|
|
|
|
return -1, 0
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
if n.Addrtaken() {
|
|
|
|
|
if v.Op != ssa.OpVarKill {
|
|
|
|
|
effect |= avarinit
|
|
|
|
|
}
|
|
|
|
|
if v.Op == ssa.OpVarDef || v.Op == ssa.OpVarKill {
|
|
|
|
|
effect |= varkill
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Read is a read, obviously.
|
|
|
|
|
// Addr by itself is also implicitly a read.
|
|
|
|
|
//
|
|
|
|
|
// Addr|Write means that the address is being taken
|
|
|
|
|
// but only so that the instruction can write to the value.
|
|
|
|
|
// It is not a read.
|
|
|
|
|
|
|
|
|
|
if e&ssa.SymRead != 0 || e&(ssa.SymAddr|ssa.SymWrite) == ssa.SymAddr {
|
|
|
|
|
effect |= uevar
|
|
|
|
|
}
|
|
|
|
|
if e&ssa.SymWrite != 0 && (!isfat(n.Type) || v.Op == ssa.OpVarDef) {
|
|
|
|
|
effect |= varkill
|
2016-09-16 15:02:47 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// affectedNode returns the *Node affected by v
|
|
|
|
|
func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
|
|
|
|
|
// Special cases.
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpLoadReg:
|
|
|
|
|
n, _ := AutoVar(v.Args[0])
|
|
|
|
|
return n, ssa.SymRead
|
|
|
|
|
case ssa.OpStoreReg:
|
|
|
|
|
n, _ := AutoVar(v)
|
|
|
|
|
return n, ssa.SymWrite
|
|
|
|
|
|
|
|
|
|
case ssa.OpVarLive:
|
|
|
|
|
return v.Aux.(*Node), ssa.SymRead
|
|
|
|
|
case ssa.OpVarDef, ssa.OpVarKill:
|
|
|
|
|
return v.Aux.(*Node), ssa.SymWrite
|
|
|
|
|
case ssa.OpKeepAlive:
|
|
|
|
|
n, _ := AutoVar(v.Args[0])
|
|
|
|
|
return n, ssa.SymRead
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
e := v.Op.SymEffect()
|
|
|
|
|
if e == 0 {
|
|
|
|
|
return nil, 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n *Node
|
|
|
|
|
switch a := v.Aux.(type) {
|
|
|
|
|
case nil, *ssa.ExternSymbol:
|
|
|
|
|
// ok, but no node
|
|
|
|
|
case *ssa.ArgSymbol:
|
|
|
|
|
n = a.Node.(*Node)
|
|
|
|
|
case *ssa.AutoSymbol:
|
|
|
|
|
n = a.Node.(*Node)
|
|
|
|
|
default:
|
|
|
|
|
Fatalf("weird aux: %s", v.LongString())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-01-14 23:43:26 -08:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
return n, e
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:01:58 -04:00
|
|
|
// liveIndex returns the index of n in the set of tracked vars.
|
|
|
|
|
// If n is not a tracked var, liveIndex returns -1.
|
|
|
|
|
// If n is not a tracked var but should be tracked, liveIndex crashes.
|
2017-03-22 20:28:12 -07:00
|
|
|
func (lv *Liveness) liveIndex(n *Node) int32 {
|
|
|
|
|
if n == nil || n.Name.Curfn != lv.fn || !livenessShouldTrack(n) {
|
2016-05-25 10:01:58 -04:00
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pos, ok := n.Opt().(int32) // index in vars
|
|
|
|
|
if !ok {
|
|
|
|
|
Fatalf("lost track of variable in liveness: %v (%p, %p)", n, n, n.Orig)
|
|
|
|
|
}
|
2017-03-22 20:28:12 -07:00
|
|
|
if pos >= int32(len(lv.vars)) || lv.vars[pos] != n {
|
2016-05-25 10:01:58 -04:00
|
|
|
Fatalf("bad bookkeeping in liveness: %v (%p, %p)", n, n, n.Orig)
|
|
|
|
|
}
|
|
|
|
|
return pos
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Constructs a new liveness structure used to hold the global state of the
|
2016-03-13 10:23:18 +09:00
|
|
|
// liveness computation. The cfg argument is a slice of *BasicBlocks and the
|
|
|
|
|
// vars argument is a slice of *Nodes.
|
2017-03-09 18:32:17 -08:00
|
|
|
func newliveness(fn *Node, f *ssa.Func, vars []*Node, stkptrsize int64) *Liveness {
|
|
|
|
|
lv := &Liveness{
|
2017-03-17 09:19:56 -07:00
|
|
|
fn: fn,
|
2017-03-09 18:32:17 -08:00
|
|
|
f: f,
|
2017-03-17 09:19:56 -07:00
|
|
|
vars: vars,
|
|
|
|
|
stkptrsize: stkptrsize,
|
2017-03-09 18:32:17 -08:00
|
|
|
be: make([]BlockEffects, f.NumBlocks()),
|
2016-03-13 10:23:18 +09:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
nblocks := int32(len(f.Blocks))
|
2015-02-23 16:07:24 -05:00
|
|
|
nvars := int32(len(vars))
|
2015-03-02 21:25:33 -05:00
|
|
|
bulk := bvbulkalloc(nvars, nblocks*7)
|
2017-03-09 18:32:17 -08:00
|
|
|
for _, b := range f.Blocks {
|
|
|
|
|
be := lv.blockEffects(b)
|
|
|
|
|
|
|
|
|
|
be.uevar = bulk.next()
|
|
|
|
|
be.varkill = bulk.next()
|
|
|
|
|
be.livein = bulk.next()
|
|
|
|
|
be.liveout = bulk.next()
|
|
|
|
|
be.avarinit = bulk.next()
|
|
|
|
|
be.avarinitany = bulk.next()
|
|
|
|
|
be.avarinitall = bulk.next()
|
|
|
|
|
}
|
|
|
|
|
return lv
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
func (lv *Liveness) blockEffects(b *ssa.Block) *BlockEffects {
|
|
|
|
|
return &lv.be[b.ID]
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NOTE: The bitmap for a specific type t should be cached in t after the first run
|
|
|
|
|
// and then simply copied into bv at the correct offset on future calls with
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, onebitwalktype1
|
2015-02-13 14:40:36 -05:00
|
|
|
// accounts for 40% of the 6g execution time.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func onebitwalktype1(t *types.Type, xoffset *int64, bv bvec) {
|
2015-02-13 14:40:36 -05:00
|
|
|
if t.Align > 0 && *xoffset&int64(t.Align-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid initial alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch t.Etype {
|
|
|
|
|
case TINT8,
|
|
|
|
|
TUINT8,
|
|
|
|
|
TINT16,
|
|
|
|
|
TUINT16,
|
|
|
|
|
TINT32,
|
|
|
|
|
TUINT32,
|
|
|
|
|
TINT64,
|
|
|
|
|
TUINT64,
|
|
|
|
|
TINT,
|
|
|
|
|
TUINT,
|
|
|
|
|
TUINTPTR,
|
|
|
|
|
TBOOL,
|
|
|
|
|
TFLOAT32,
|
|
|
|
|
TFLOAT64,
|
|
|
|
|
TCOMPLEX64,
|
|
|
|
|
TCOMPLEX128:
|
|
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
|
|
|
|
case TPTR32,
|
|
|
|
|
TPTR64,
|
|
|
|
|
TUNSAFEPTR,
|
|
|
|
|
TFUNC,
|
|
|
|
|
TCHAN,
|
|
|
|
|
TMAP:
|
|
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
|
|
|
|
case TSTRING:
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
// struct { byte *str; intgo len; }
|
2015-02-13 14:40:36 -05:00
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) //pointer in first slot
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
|
|
|
|
case TINTER:
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
// struct { Itab *tab; void *data; }
|
|
|
|
|
// or, when isnilinter(t)==true:
|
|
|
|
|
// struct { Type *type; void *data; }
|
2015-02-13 14:40:36 -05:00
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: invalid alignment, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer in first slot
|
|
|
|
|
bv.Set(int32(*xoffset/int64(Widthptr) + 1)) // pointer in second slot
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
2016-04-18 14:02:08 -07:00
|
|
|
case TSLICE:
|
|
|
|
|
// struct { byte *array; uintgo len; uintgo cap; }
|
|
|
|
|
if *xoffset&int64(Widthptr-1) != 0 {
|
|
|
|
|
Fatalf("onebitwalktype1: invalid TARRAY alignment, %v", t)
|
|
|
|
|
}
|
2016-10-04 15:57:24 -07:00
|
|
|
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer in first slot (BitsPointer)
|
2016-04-18 14:02:08 -07:00
|
|
|
*xoffset += t.Width
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
case TARRAY:
|
2016-04-18 14:02:08 -07:00
|
|
|
for i := int64(0); i < t.NumElem(); i++ {
|
|
|
|
|
onebitwalktype1(t.Elem(), xoffset, bv)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case TSTRUCT:
|
2016-03-13 10:23:18 +09:00
|
|
|
var o int64
|
2016-03-17 01:32:18 -07:00
|
|
|
for _, t1 := range t.Fields().Slice() {
|
2016-03-28 09:40:53 -07:00
|
|
|
fieldoffset := t1.Offset
|
2015-02-13 14:40:36 -05:00
|
|
|
*xoffset += fieldoffset - o
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwalktype1(t1.Type, xoffset, bv)
|
2015-02-13 14:40:36 -05:00
|
|
|
o = fieldoffset + t1.Type.Width
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*xoffset += t.Width - o
|
|
|
|
|
|
|
|
|
|
default:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("onebitwalktype1: unexpected type, %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns the number of words of local variables.
|
2017-03-17 09:19:56 -07:00
|
|
|
func localswords(lv *Liveness) int32 {
|
|
|
|
|
return int32(lv.stkptrsize / int64(Widthptr))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns the number of words of in and out arguments.
|
2017-03-21 12:13:12 -07:00
|
|
|
func argswords(lv *Liveness) int32 {
|
|
|
|
|
return int32(lv.fn.Type.ArgWidth() / int64(Widthptr))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Generates live pointer value maps for arguments and local variables. The
|
|
|
|
|
// this argument and the in arguments are always assumed live. The vars
|
2016-03-13 10:23:18 +09:00
|
|
|
// argument is a slice of *Nodes.
|
2016-04-29 14:17:04 +10:00
|
|
|
func onebitlivepointermap(lv *Liveness, liveout bvec, vars []*Node, args bvec, locals bvec) {
|
2015-02-13 14:40:36 -05:00
|
|
|
var xoffset int64
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
for i := int32(0); ; i++ {
|
2016-10-04 15:57:24 -07:00
|
|
|
i = liveout.Next(i)
|
2015-02-17 22:13:49 -05:00
|
|
|
if i < 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
node := vars[i]
|
2017-04-25 18:14:12 -07:00
|
|
|
switch node.Class() {
|
2015-02-13 14:40:36 -05:00
|
|
|
case PAUTO:
|
2017-03-17 09:19:56 -07:00
|
|
|
xoffset = node.Xoffset + lv.stkptrsize
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwalktype1(node.Type, &xoffset, locals)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case PPARAM, PPARAMOUT:
|
2015-02-13 14:40:36 -05:00
|
|
|
xoffset = node.Xoffset
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
|
|
|
onebitwalktype1(node.Type, &xoffset, args)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns true for instructions that are safe points that must be annotated
|
|
|
|
|
// with liveness information.
|
2017-03-09 18:32:17 -08:00
|
|
|
func issafepoint(v *ssa.Value) bool {
|
2017-04-21 06:50:02 -04:00
|
|
|
return v.Op.IsCall()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Initializes the sets for solving the live variables. Visits all the
|
2015-02-13 14:40:36 -05:00
|
|
|
// instructions in each basic block to summarizes the information at each basic
|
|
|
|
|
// block
|
|
|
|
|
func livenessprologue(lv *Liveness) {
|
2017-01-14 23:43:26 -08:00
|
|
|
lv.initcache()
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
for _, b := range lv.f.Blocks {
|
|
|
|
|
be := lv.blockEffects(b)
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Walk the block instructions backward and update the block
|
|
|
|
|
// effects with the each prog effects.
|
2017-03-09 18:32:17 -08:00
|
|
|
for j := len(b.Values) - 1; j >= 0; j-- {
|
|
|
|
|
pos, e := lv.valueEffects(b.Values[j])
|
|
|
|
|
if e&varkill != 0 {
|
|
|
|
|
be.varkill.Set(pos)
|
|
|
|
|
be.uevar.Unset(pos)
|
2017-01-14 23:43:26 -08:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
if e&uevar != 0 {
|
|
|
|
|
be.uevar.Set(pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Walk the block instructions forward to update avarinit bits.
|
|
|
|
|
// avarinit describes the effect at the end of the block, not the beginning.
|
2017-03-09 18:32:17 -08:00
|
|
|
for j := 0; j < len(b.Values); j++ {
|
|
|
|
|
pos, e := lv.valueEffects(b.Values[j])
|
|
|
|
|
if e&varkill != 0 {
|
|
|
|
|
be.avarinit.Unset(pos)
|
2017-01-14 23:43:26 -08:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
if e&avarinit != 0 {
|
|
|
|
|
be.avarinit.Set(pos)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Solve the liveness dataflow equations.
|
|
|
|
|
func livenesssolve(lv *Liveness) {
|
|
|
|
|
// These temporary bitvectors exist to avoid successive allocations and
|
|
|
|
|
// frees within the loop.
|
2015-02-23 16:07:24 -05:00
|
|
|
newlivein := bvalloc(int32(len(lv.vars)))
|
|
|
|
|
newliveout := bvalloc(int32(len(lv.vars)))
|
|
|
|
|
any := bvalloc(int32(len(lv.vars)))
|
|
|
|
|
all := bvalloc(int32(len(lv.vars)))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Push avarinitall, avarinitany forward.
|
|
|
|
|
// avarinitall says the addressed var is initialized along all paths reaching the block exit.
|
|
|
|
|
// avarinitany says the addressed var is initialized along some path reaching the block exit.
|
2017-03-09 18:32:17 -08:00
|
|
|
for _, b := range lv.f.Blocks {
|
|
|
|
|
be := lv.blockEffects(b)
|
|
|
|
|
if b == lv.f.Entry {
|
|
|
|
|
be.avarinitall.Copy(be.avarinit)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2017-03-09 18:32:17 -08:00
|
|
|
be.avarinitall.Clear()
|
|
|
|
|
be.avarinitall.Not()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
be.avarinitany.Copy(be.avarinit)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// Walk blocks in the general direction of propagation (RPO
|
|
|
|
|
// for avarinit{any,all}, and PO for live{in,out}). This
|
|
|
|
|
// improves convergence.
|
|
|
|
|
po := lv.f.Postorder()
|
|
|
|
|
|
2016-03-15 17:03:10 +11:00
|
|
|
for change := true; change; {
|
|
|
|
|
change = false
|
2017-03-09 18:32:17 -08:00
|
|
|
for i := len(po) - 1; i >= 0; i-- {
|
|
|
|
|
b := po[i]
|
|
|
|
|
be := lv.blockEffects(b)
|
|
|
|
|
lv.avarinitanyall(b, any, all)
|
|
|
|
|
|
|
|
|
|
any.AndNot(any, be.varkill)
|
|
|
|
|
all.AndNot(all, be.varkill)
|
|
|
|
|
any.Or(any, be.avarinit)
|
|
|
|
|
all.Or(all, be.avarinit)
|
|
|
|
|
if !any.Eq(be.avarinitany) {
|
2016-03-15 17:03:10 +11:00
|
|
|
change = true
|
2017-03-09 18:32:17 -08:00
|
|
|
be.avarinitany.Copy(any)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
if !all.Eq(be.avarinitall) {
|
2016-03-15 17:03:10 +11:00
|
|
|
change = true
|
2017-03-09 18:32:17 -08:00
|
|
|
be.avarinitall.Copy(all)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Iterate through the blocks in reverse round-robin fashion. A work
|
|
|
|
|
// queue might be slightly faster. As is, the number of iterations is
|
2015-02-13 14:40:36 -05:00
|
|
|
// so low that it hardly seems to be worth the complexity.
|
|
|
|
|
|
2016-03-15 17:03:10 +11:00
|
|
|
for change := true; change; {
|
|
|
|
|
change = false
|
2017-03-09 18:32:17 -08:00
|
|
|
for _, b := range po {
|
|
|
|
|
be := lv.blockEffects(b)
|
2015-03-02 21:25:33 -05:00
|
|
|
|
2016-10-04 15:57:24 -07:00
|
|
|
newliveout.Clear()
|
2017-03-09 18:32:17 -08:00
|
|
|
switch b.Kind {
|
|
|
|
|
case ssa.BlockRet:
|
|
|
|
|
for _, pos := range lv.cache.retuevar {
|
|
|
|
|
newliveout.Set(pos)
|
2017-03-09 12:15:41 -08:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
case ssa.BlockRetJmp:
|
|
|
|
|
for _, pos := range lv.cache.tailuevar {
|
|
|
|
|
newliveout.Set(pos)
|
|
|
|
|
}
|
|
|
|
|
case ssa.BlockExit:
|
|
|
|
|
// nothing to do
|
|
|
|
|
default:
|
2017-03-09 12:15:41 -08:00
|
|
|
// A variable is live on output from this block
|
|
|
|
|
// if it is live on input to some successor.
|
|
|
|
|
//
|
|
|
|
|
// out[b] = \bigcup_{s \in succ[b]} in[s]
|
2017-03-09 18:32:17 -08:00
|
|
|
newliveout.Copy(lv.blockEffects(b.Succs[0].Block()).livein)
|
|
|
|
|
for _, succ := range b.Succs[1:] {
|
|
|
|
|
newliveout.Or(newliveout, lv.blockEffects(succ.Block()).livein)
|
2017-03-09 12:15:41 -08:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
if !be.liveout.Eq(newliveout) {
|
2016-03-15 17:03:10 +11:00
|
|
|
change = true
|
2017-03-09 18:32:17 -08:00
|
|
|
be.liveout.Copy(newliveout)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A variable is live on input to this block
|
|
|
|
|
// if it is live on output from this block and
|
|
|
|
|
// not set by the code in this block.
|
|
|
|
|
//
|
|
|
|
|
// in[b] = uevar[b] \cup (out[b] \setminus varkill[b])
|
2017-03-09 18:32:17 -08:00
|
|
|
newlivein.AndNot(be.liveout, be.varkill)
|
|
|
|
|
be.livein.Or(newlivein, be.uevar)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Visits all instructions in a basic block and computes a bit vector of live
|
|
|
|
|
// variables at each safe point locations.
|
|
|
|
|
func livenessepilogue(lv *Liveness) {
|
2015-02-23 16:07:24 -05:00
|
|
|
nvars := int32(len(lv.vars))
|
|
|
|
|
liveout := bvalloc(nvars)
|
|
|
|
|
any := bvalloc(nvars)
|
|
|
|
|
all := bvalloc(nvars)
|
2017-02-24 16:02:31 -08:00
|
|
|
livedefer := bvalloc(nvars) // always-live variables
|
2017-01-30 14:55:12 -08:00
|
|
|
|
|
|
|
|
// If there is a defer (that could recover), then all output
|
|
|
|
|
// parameters are live all the time. In addition, any locals
|
|
|
|
|
// that are pointers to heap-allocated output parameters are
|
|
|
|
|
// also always live (post-deferreturn code needs these
|
|
|
|
|
// pointers to copy values back to the stack).
|
|
|
|
|
// TODO: if the output parameter is heap-allocated, then we
|
|
|
|
|
// don't need to keep the stack copy live?
|
2017-03-15 22:55:21 -07:00
|
|
|
if lv.fn.Func.HasDefer() {
|
2017-02-24 16:02:31 -08:00
|
|
|
for i, n := range lv.vars {
|
2017-04-25 18:14:12 -07:00
|
|
|
if n.Class() == PPARAMOUT {
|
2017-01-30 14:55:12 -08:00
|
|
|
if n.IsOutputParamHeapAddr() {
|
2017-03-09 10:38:45 -08:00
|
|
|
// Just to be paranoid. Heap addresses are PAUTOs.
|
2017-01-30 14:55:12 -08:00
|
|
|
Fatalf("variable %v both output param and heap output param", n)
|
|
|
|
|
}
|
2017-03-09 10:38:45 -08:00
|
|
|
if n.Name.Param.Heapaddr != nil {
|
|
|
|
|
// If this variable moved to the heap, then
|
|
|
|
|
// its stack copy is not live.
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
// Note: zeroing is handled by zeroResults in walk.go.
|
2017-02-24 16:02:31 -08:00
|
|
|
livedefer.Set(int32(i))
|
2017-01-30 14:55:12 -08:00
|
|
|
}
|
2016-06-18 19:40:57 -07:00
|
|
|
if n.IsOutputParamHeapAddr() {
|
2017-02-27 19:56:38 +02:00
|
|
|
n.Name.SetNeedzero(true)
|
2017-02-24 16:02:31 -08:00
|
|
|
livedefer.Set(int32(i))
|
2016-06-18 19:40:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
{
|
|
|
|
|
// Reserve an entry for function entry.
|
|
|
|
|
live := bvalloc(nvars)
|
|
|
|
|
for _, pos := range lv.cache.textavarinit {
|
|
|
|
|
live.Set(pos)
|
|
|
|
|
}
|
|
|
|
|
lv.livevars = append(lv.livevars, live)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, b := range lv.f.Blocks {
|
|
|
|
|
be := lv.blockEffects(b)
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Compute avarinitany and avarinitall for entry to block.
|
|
|
|
|
// This duplicates information known during livenesssolve
|
|
|
|
|
// but avoids storing two more vectors for each block.
|
2017-03-09 18:32:17 -08:00
|
|
|
lv.avarinitanyall(b, any, all)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Walk forward through the basic block instructions and
|
|
|
|
|
// allocate liveness maps for those instructions that need them.
|
|
|
|
|
// Seed the maps with information about the addrtaken variables.
|
2017-03-09 18:32:17 -08:00
|
|
|
for _, v := range b.Values {
|
|
|
|
|
pos, e := lv.valueEffects(v)
|
|
|
|
|
if e&varkill != 0 {
|
2017-01-14 23:43:26 -08:00
|
|
|
any.Unset(pos)
|
|
|
|
|
all.Unset(pos)
|
|
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
if e&avarinit != 0 {
|
2017-01-14 23:43:26 -08:00
|
|
|
any.Set(pos)
|
|
|
|
|
all.Set(pos)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
if !issafepoint(v) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// Annotate ambiguously live variables so that they can
|
2017-04-19 11:19:53 -07:00
|
|
|
// be zeroed at function entry and at VARKILL points.
|
2017-03-22 11:21:35 -07:00
|
|
|
// liveout is dead here and used as a temporary.
|
2017-03-09 18:32:17 -08:00
|
|
|
liveout.AndNot(any, all)
|
|
|
|
|
if !liveout.IsEmpty() {
|
|
|
|
|
for pos := int32(0); pos < liveout.n; pos++ {
|
|
|
|
|
if !liveout.Get(pos) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
all.Set(pos) // silence future warnings in this block
|
|
|
|
|
n := lv.vars[pos]
|
|
|
|
|
if !n.Name.Needzero() {
|
|
|
|
|
n.Name.SetNeedzero(true)
|
|
|
|
|
if debuglive >= 1 {
|
2017-03-22 20:28:12 -07:00
|
|
|
Warnl(v.Pos, "%v: %L is ambiguously live", lv.fn.Func.Nname, n)
|
2017-03-09 18:32:17 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// Live stuff first.
|
|
|
|
|
live := bvalloc(nvars)
|
|
|
|
|
live.Copy(any)
|
|
|
|
|
lv.livevars = append(lv.livevars, live)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
be.lastbitmapindex = len(lv.livevars) - 1
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
for _, b := range lv.f.Blocks {
|
|
|
|
|
be := lv.blockEffects(b)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// walk backward, emit pcdata and populate the maps
|
2017-03-22 11:21:35 -07:00
|
|
|
index := int32(be.lastbitmapindex)
|
|
|
|
|
if index < 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
// the first block we encounter should have the ATEXT so
|
|
|
|
|
// at no point should pos ever be less than zero.
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("livenessepilogue")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-22 11:21:35 -07:00
|
|
|
liveout.Copy(be.liveout)
|
2017-03-09 18:32:17 -08:00
|
|
|
for i := len(b.Values) - 1; i >= 0; i-- {
|
|
|
|
|
v := b.Values[i]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-22 11:21:35 -07:00
|
|
|
if issafepoint(v) {
|
|
|
|
|
// Found an interesting instruction, record the
|
|
|
|
|
// corresponding liveness information.
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-22 11:21:35 -07:00
|
|
|
live := lv.livevars[index]
|
|
|
|
|
live.Or(live, liveout)
|
|
|
|
|
live.Or(live, livedefer) // only for non-entry safe points
|
|
|
|
|
index--
|
2017-03-09 18:32:17 -08:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-22 11:21:35 -07:00
|
|
|
// Update liveness information.
|
|
|
|
|
pos, e := lv.valueEffects(v)
|
|
|
|
|
if e&varkill != 0 {
|
|
|
|
|
liveout.Unset(pos)
|
|
|
|
|
}
|
|
|
|
|
if e&uevar != 0 {
|
|
|
|
|
liveout.Set(pos)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
if b == lv.f.Entry {
|
2017-03-22 11:21:35 -07:00
|
|
|
if index != 0 {
|
|
|
|
|
Fatalf("bad index for entry point: %v", index)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// Record live variables.
|
2017-03-22 11:21:35 -07:00
|
|
|
live := lv.livevars[index]
|
2017-03-09 18:32:17 -08:00
|
|
|
live.Or(live, liveout)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 12:15:41 -08:00
|
|
|
// Useful sanity check: on entry to the function,
|
|
|
|
|
// the only things that can possibly be live are the
|
|
|
|
|
// input parameters.
|
|
|
|
|
for j, n := range lv.vars {
|
2017-04-25 18:14:12 -07:00
|
|
|
if n.Class() != PPARAM && lv.livevars[0].Get(int32(j)) {
|
2017-03-22 20:28:12 -07:00
|
|
|
Fatalf("internal error: %v %L recorded as live on entry", lv.fn.Func.Nname, n)
|
2017-03-09 12:15:41 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-08 22:02:08 -07:00
|
|
|
func (lv *Liveness) clobber() {
|
|
|
|
|
// The clobberdead experiment inserts code to clobber all the dead variables (locals and args)
|
|
|
|
|
// before and after every safepoint. This experiment is useful for debugging the generation
|
|
|
|
|
// of live pointer bitmaps.
|
|
|
|
|
if objabi.Clobberdead_enabled == 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
var varSize int64
|
|
|
|
|
for _, n := range lv.vars {
|
2017-04-27 15:30:31 -07:00
|
|
|
varSize += n.Type.MustSize()
|
2016-06-08 22:02:08 -07:00
|
|
|
}
|
|
|
|
|
if len(lv.livevars) > 1000 || varSize > 10000 {
|
|
|
|
|
// Be careful to avoid doing too much work.
|
|
|
|
|
// Bail if >1000 safepoints or >10000 bytes of variables.
|
|
|
|
|
// Otherwise, giant functions make this experiment generate too much code.
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if h := os.Getenv("GOCLOBBERDEADHASH"); h != "" {
|
|
|
|
|
// Clobber only functions where the hash of the function name matches a pattern.
|
|
|
|
|
// Useful for binary searching for a miscompiled function.
|
|
|
|
|
hstr := ""
|
2017-04-23 05:10:21 -07:00
|
|
|
for _, b := range sha1.Sum([]byte(lv.fn.funcname())) {
|
2016-06-08 22:02:08 -07:00
|
|
|
hstr += fmt.Sprintf("%08b", b)
|
|
|
|
|
}
|
|
|
|
|
if !strings.HasSuffix(hstr, h) {
|
|
|
|
|
return
|
|
|
|
|
}
|
2017-04-23 05:10:21 -07:00
|
|
|
fmt.Printf("\t\t\tCLOBBERDEAD %s\n", lv.fn.funcname())
|
2016-06-08 22:02:08 -07:00
|
|
|
}
|
|
|
|
|
if lv.f.Name == "forkAndExecInChild" {
|
|
|
|
|
// forkAndExecInChild calls vfork (on linux/amd64, anyway).
|
|
|
|
|
// The code we add here clobbers parts of the stack in the child.
|
|
|
|
|
// When the parent resumes, it is using the same stack frame. But the
|
|
|
|
|
// child has clobbered stack variables that the parent needs. Boom!
|
|
|
|
|
// In particular, the sys argument gets clobbered.
|
|
|
|
|
// Note to self: GOCLOBBERDEADHASH=011100101110
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var oldSched []*ssa.Value
|
|
|
|
|
for _, b := range lv.f.Blocks {
|
|
|
|
|
// Copy block's values to a temporary.
|
|
|
|
|
oldSched = append(oldSched[:0], b.Values...)
|
|
|
|
|
b.Values = b.Values[:0]
|
|
|
|
|
|
|
|
|
|
// Clobber all dead variables at entry.
|
|
|
|
|
if b == lv.f.Entry {
|
|
|
|
|
for len(oldSched) > 0 && len(oldSched[0].Args) == 0 {
|
|
|
|
|
// Skip argless ops. We need to skip at least
|
|
|
|
|
// the lowered ClosurePtr op, because it
|
|
|
|
|
// really wants to be first. This will also
|
|
|
|
|
// skip ops like InitMem and SP, which are ok.
|
|
|
|
|
b.Values = append(b.Values, oldSched[0])
|
|
|
|
|
oldSched = oldSched[1:]
|
|
|
|
|
}
|
|
|
|
|
clobber(lv, b, lv.livevars[0])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copy values into schedule, adding clobbering around safepoints.
|
|
|
|
|
for _, v := range oldSched {
|
|
|
|
|
if !issafepoint(v) {
|
|
|
|
|
b.Values = append(b.Values, v)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
before := true
|
|
|
|
|
if v.Op.IsCall() && v.Aux != nil && v.Aux.(*obj.LSym) == typedmemmove {
|
|
|
|
|
// Can't put clobber code before the call to typedmemmove.
|
|
|
|
|
// The variable to-be-copied is marked as dead
|
|
|
|
|
// at the callsite. That is ok, though, as typedmemmove
|
|
|
|
|
// is marked as nosplit, and the first thing it does
|
|
|
|
|
// is to call memmove (also nosplit), after which
|
|
|
|
|
// the source value is dead.
|
|
|
|
|
// See issue 16026.
|
|
|
|
|
before = false
|
|
|
|
|
}
|
|
|
|
|
if before {
|
|
|
|
|
clobber(lv, b, lv.livevars[lv.stackMapIndex[v]])
|
|
|
|
|
}
|
|
|
|
|
b.Values = append(b.Values, v)
|
|
|
|
|
clobber(lv, b, lv.livevars[lv.stackMapIndex[v]])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// clobber generates code to clobber all dead variables (those not marked in live).
|
|
|
|
|
// Clobbering instructions are added to the end of b.Values.
|
|
|
|
|
func clobber(lv *Liveness, b *ssa.Block, live bvec) {
|
|
|
|
|
for i, n := range lv.vars {
|
|
|
|
|
if !live.Get(int32(i)) {
|
|
|
|
|
clobberVar(b, n)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// clobberVar generates code to trash the pointers in v.
|
|
|
|
|
// Clobbering instructions are added to the end of b.Values.
|
|
|
|
|
func clobberVar(b *ssa.Block, v *Node) {
|
|
|
|
|
clobberWalk(b, v, 0, v.Type)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// b = block to which we append instructions
|
|
|
|
|
// v = variable
|
|
|
|
|
// offset = offset of (sub-portion of) variable to clobber (in bytes)
|
|
|
|
|
// t = type of sub-portion of v.
|
|
|
|
|
func clobberWalk(b *ssa.Block, v *Node, offset int64, t *types.Type) {
|
|
|
|
|
if !types.Haspointers(t) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
switch t.Etype {
|
|
|
|
|
case TPTR32,
|
|
|
|
|
TPTR64,
|
|
|
|
|
TUNSAFEPTR,
|
|
|
|
|
TFUNC,
|
|
|
|
|
TCHAN,
|
|
|
|
|
TMAP:
|
|
|
|
|
clobberPtr(b, v, offset)
|
|
|
|
|
|
|
|
|
|
case TSTRING:
|
|
|
|
|
// struct { byte *str; int len; }
|
|
|
|
|
clobberPtr(b, v, offset)
|
|
|
|
|
|
|
|
|
|
case TINTER:
|
|
|
|
|
// struct { Itab *tab; void *data; }
|
|
|
|
|
// or, when isnilinter(t)==true:
|
|
|
|
|
// struct { Type *type; void *data; }
|
|
|
|
|
clobberPtr(b, v, offset)
|
|
|
|
|
clobberPtr(b, v, offset+int64(Widthptr))
|
|
|
|
|
|
|
|
|
|
case TSLICE:
|
|
|
|
|
// struct { byte *array; int len; int cap; }
|
|
|
|
|
clobberPtr(b, v, offset)
|
|
|
|
|
|
|
|
|
|
case TARRAY:
|
|
|
|
|
for i := int64(0); i < t.NumElem(); i++ {
|
2017-04-27 15:30:31 -07:00
|
|
|
clobberWalk(b, v, offset+i*t.Elem().MustSize(), t.Elem())
|
2016-06-08 22:02:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case TSTRUCT:
|
|
|
|
|
for _, t1 := range t.Fields().Slice() {
|
|
|
|
|
clobberWalk(b, v, offset+t1.Offset, t1.Type)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
Fatalf("clobberWalk: unexpected type, %v", t)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// clobberPtr generates a clobber of the pointer at offset offset in v.
|
|
|
|
|
// The clobber instruction is added at the end of b.
|
|
|
|
|
func clobberPtr(b *ssa.Block, v *Node, offset int64) {
|
|
|
|
|
var aux interface{}
|
2017-04-25 18:14:12 -07:00
|
|
|
if v.Class() == PAUTO {
|
2016-06-08 22:02:08 -07:00
|
|
|
aux = &ssa.AutoSymbol{Node: v}
|
|
|
|
|
} else {
|
|
|
|
|
aux = &ssa.ArgSymbol{Node: v}
|
|
|
|
|
}
|
|
|
|
|
b.NewValue0IA(src.NoXPos, ssa.OpClobber, ssa.TypeVoid, offset, aux)
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) {
|
|
|
|
|
if len(b.Preds) == 0 {
|
2017-03-09 12:15:41 -08:00
|
|
|
any.Clear()
|
|
|
|
|
all.Clear()
|
|
|
|
|
for _, pos := range lv.cache.textavarinit {
|
|
|
|
|
any.Set(pos)
|
|
|
|
|
all.Set(pos)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
be := lv.blockEffects(b.Preds[0].Block())
|
|
|
|
|
any.Copy(be.avarinitany)
|
|
|
|
|
all.Copy(be.avarinitall)
|
|
|
|
|
|
|
|
|
|
for _, pred := range b.Preds[1:] {
|
|
|
|
|
be := lv.blockEffects(pred.Block())
|
|
|
|
|
any.Or(any, be.avarinitany)
|
|
|
|
|
all.And(all, be.avarinitall)
|
2017-03-09 12:15:41 -08:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FNV-1 hash function constants.
|
|
|
|
|
const (
|
|
|
|
|
H0 = 2166136261
|
|
|
|
|
Hp = 16777619
|
|
|
|
|
)
|
|
|
|
|
|
2016-04-29 14:17:04 +10:00
|
|
|
func hashbitmap(h uint32, bv bvec) uint32 {
|
2015-02-23 16:07:24 -05:00
|
|
|
n := int((bv.n + 31) / 32)
|
|
|
|
|
for i := 0; i < n; i++ {
|
2016-03-13 10:23:18 +09:00
|
|
|
w := bv.b[i]
|
2015-02-13 14:40:36 -05:00
|
|
|
h = (h * Hp) ^ (w & 0xff)
|
|
|
|
|
h = (h * Hp) ^ ((w >> 8) & 0xff)
|
|
|
|
|
h = (h * Hp) ^ ((w >> 16) & 0xff)
|
|
|
|
|
h = (h * Hp) ^ ((w >> 24) & 0xff)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return h
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compact liveness information by coalescing identical per-call-site bitmaps.
|
|
|
|
|
// The merging only happens for a single function, not across the entire binary.
|
|
|
|
|
//
|
|
|
|
|
// There are actually two lists of bitmaps, one list for the local variables and one
|
|
|
|
|
// list for the function arguments. Both lists are indexed by the same PCDATA
|
|
|
|
|
// index, so the corresponding pairs must be considered together when
|
|
|
|
|
// merging duplicates. The argument bitmaps change much less often during
|
|
|
|
|
// function execution than the local variable bitmaps, so it is possible that
|
|
|
|
|
// we could introduce a separate PCDATA index for arguments vs locals and
|
|
|
|
|
// then compact the set of argument bitmaps separately from the set of
|
|
|
|
|
// local variable bitmaps. As of 2014-04-02, doing this to the godoc binary
|
|
|
|
|
// is actually a net loss: we save about 50k of argument bitmaps but the new
|
|
|
|
|
// PCDATA tables cost about 100k. So for now we keep using a single index for
|
|
|
|
|
// both bitmap lists.
|
|
|
|
|
func livenesscompact(lv *Liveness) {
|
|
|
|
|
// Linear probing hash table of bitmaps seen so far.
|
|
|
|
|
// The hash table has 4n entries to keep the linear
|
|
|
|
|
// scan short. An entry of -1 indicates an empty slot.
|
2017-02-24 16:02:31 -08:00
|
|
|
n := len(lv.livevars)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
tablesize := 4 * n
|
|
|
|
|
table := make([]int, tablesize)
|
2015-02-13 14:40:36 -05:00
|
|
|
for i := range table {
|
|
|
|
|
table[i] = -1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// remap[i] = the new index of the old bit vector #i.
|
2015-02-23 16:07:24 -05:00
|
|
|
remap := make([]int, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
for i := range remap {
|
|
|
|
|
remap[i] = -1
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
uniq := 0 // unique tables found so far
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Consider bit vectors in turn.
|
|
|
|
|
// If new, assign next number using uniq,
|
2017-02-24 16:02:31 -08:00
|
|
|
// record in remap, record in lv.livevars
|
2015-02-13 14:40:36 -05:00
|
|
|
// under the new index, and add entry to hash table.
|
2017-02-24 16:02:31 -08:00
|
|
|
// If already seen, record earlier index in remap.
|
|
|
|
|
Outer:
|
|
|
|
|
for i, live := range lv.livevars {
|
|
|
|
|
h := hashbitmap(H0, live) % uint32(tablesize)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
for {
|
2016-03-13 10:23:18 +09:00
|
|
|
j := table[h]
|
2015-02-13 14:40:36 -05:00
|
|
|
if j < 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-02-24 16:02:31 -08:00
|
|
|
jlive := lv.livevars[j]
|
|
|
|
|
if live.Eq(jlive) {
|
2015-02-13 14:40:36 -05:00
|
|
|
remap[i] = j
|
2017-02-24 16:02:31 -08:00
|
|
|
continue Outer
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h++
|
|
|
|
|
if h == uint32(tablesize) {
|
|
|
|
|
h = 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
table[h] = uniq
|
|
|
|
|
remap[i] = uniq
|
2017-02-24 16:02:31 -08:00
|
|
|
lv.livevars[uniq] = live
|
2015-02-13 14:40:36 -05:00
|
|
|
uniq++
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 16:02:31 -08:00
|
|
|
// We've already reordered lv.livevars[0:uniq]. Clear the
|
|
|
|
|
// pointers later in the array so they can be GC'd.
|
|
|
|
|
tail := lv.livevars[uniq:]
|
|
|
|
|
for i := range tail { // memclr loop pattern
|
|
|
|
|
tail[i] = bvec{}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-02-24 16:02:31 -08:00
|
|
|
lv.livevars = lv.livevars[:uniq]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Rewrite PCDATA instructions to use new numbering.
|
2017-03-09 18:32:17 -08:00
|
|
|
lv.showlive(nil, lv.livevars[0])
|
|
|
|
|
pos := 1
|
|
|
|
|
lv.stackMapIndex = make(map[*ssa.Value]int)
|
|
|
|
|
for _, b := range lv.f.Blocks {
|
|
|
|
|
for _, v := range b.Values {
|
|
|
|
|
if issafepoint(v) {
|
|
|
|
|
lv.showlive(v, lv.livevars[remap[pos]])
|
|
|
|
|
lv.stackMapIndex[v] = int(remap[pos])
|
|
|
|
|
pos++
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
|
2017-04-23 05:10:21 -07:00
|
|
|
if debuglive == 0 || lv.fn.funcname() == "init" || strings.HasPrefix(lv.fn.funcname(), ".") {
|
2017-03-09 18:32:17 -08:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if live.IsEmpty() {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-22 20:28:12 -07:00
|
|
|
pos := lv.fn.Func.Nname.Pos
|
2017-03-09 18:32:17 -08:00
|
|
|
if v != nil {
|
|
|
|
|
pos = v.Pos
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s := "live at "
|
|
|
|
|
if v == nil {
|
2017-04-23 05:10:21 -07:00
|
|
|
s += fmt.Sprintf("entry to %s:", lv.fn.funcname())
|
2017-03-09 18:32:17 -08:00
|
|
|
} else if sym, ok := v.Aux.(*obj.LSym); ok {
|
|
|
|
|
fn := sym.Name
|
|
|
|
|
if pos := strings.Index(fn, "."); pos >= 0 {
|
|
|
|
|
fn = fn[pos+1:]
|
|
|
|
|
}
|
|
|
|
|
s += fmt.Sprintf("call to %s:", fn)
|
|
|
|
|
} else {
|
|
|
|
|
s += "indirect call:"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for j, n := range lv.vars {
|
|
|
|
|
if live.Get(int32(j)) {
|
|
|
|
|
s += fmt.Sprintf(" %v", n)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Warnl(pos, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (lv *Liveness) printbvec(printed bool, name string, live bvec) bool {
|
2016-03-13 10:23:18 +09:00
|
|
|
started := false
|
2017-03-09 18:32:17 -08:00
|
|
|
for i, n := range lv.vars {
|
|
|
|
|
if !live.Get(int32(i)) {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
if !started {
|
|
|
|
|
if !printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\t")
|
|
|
|
|
} else {
|
|
|
|
|
fmt.Printf(" ")
|
|
|
|
|
}
|
2016-03-13 10:23:18 +09:00
|
|
|
started = true
|
|
|
|
|
printed = true
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("%s=", name)
|
|
|
|
|
} else {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("%s", n.Sym.Name)
|
|
|
|
|
}
|
|
|
|
|
return printed
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// printeffect is like printbvec, but for a single variable.
|
|
|
|
|
func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool) bool {
|
|
|
|
|
if !x {
|
|
|
|
|
return printed
|
|
|
|
|
}
|
|
|
|
|
if !printed {
|
|
|
|
|
fmt.Printf("\t")
|
|
|
|
|
} else {
|
|
|
|
|
fmt.Printf(" ")
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("%s=%s", name, lv.vars[pos].Sym.Name)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Prints the computed liveness information and inputs, for debugging.
|
|
|
|
|
// This format synthesizes the information used during the multiple passes
|
|
|
|
|
// into a single presentation.
|
|
|
|
|
func livenessprintdebug(lv *Liveness) {
|
2017-04-23 05:10:21 -07:00
|
|
|
fmt.Printf("liveness: %s\n", lv.fn.funcname())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
pcdata := 0
|
2017-03-09 18:32:17 -08:00
|
|
|
for i, b := range lv.f.Blocks {
|
2015-02-13 14:40:36 -05:00
|
|
|
if i > 0 {
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bb#0 pred=1,2 succ=3,4
|
2017-03-09 18:32:17 -08:00
|
|
|
fmt.Printf("bb#%d pred=", b.ID)
|
|
|
|
|
for j, pred := range b.Preds {
|
2015-02-13 14:40:36 -05:00
|
|
|
if j > 0 {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
fmt.Printf("%d", pred.Block().ID)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
fmt.Printf(" succ=")
|
2017-03-09 18:32:17 -08:00
|
|
|
for j, succ := range b.Succs {
|
2015-02-13 14:40:36 -05:00
|
|
|
if j > 0 {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
fmt.Printf("%d", succ.Block().ID)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
be := lv.blockEffects(b)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// initial settings
|
|
|
|
|
printed := false
|
|
|
|
|
printed = lv.printbvec(printed, "uevar", be.uevar)
|
|
|
|
|
printed = lv.printbvec(printed, "livein", be.livein)
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// program listing, with individual effects listed
|
2017-03-09 18:32:17 -08:00
|
|
|
|
|
|
|
|
if b == lv.f.Entry {
|
|
|
|
|
live := lv.livevars[pcdata]
|
2017-03-22 20:28:12 -07:00
|
|
|
fmt.Printf("(%s) function entry\n", linestr(lv.fn.Func.Nname.Pos))
|
2017-03-09 18:32:17 -08:00
|
|
|
fmt.Printf("\tlive=")
|
|
|
|
|
printed = false
|
|
|
|
|
for j, n := range lv.vars {
|
|
|
|
|
if !live.Get(int32(j)) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if printed {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("%v", n)
|
|
|
|
|
printed = true
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, v := range b.Values {
|
|
|
|
|
fmt.Printf("(%s) %v\n", linestr(v.Pos), v.LongString())
|
|
|
|
|
|
|
|
|
|
if pos, ok := lv.stackMapIndex[v]; ok {
|
|
|
|
|
pcdata = pos
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
|
|
|
|
|
pos, effect := lv.valueEffects(v)
|
2016-03-13 10:23:18 +09:00
|
|
|
printed = false
|
2017-03-09 18:32:17 -08:00
|
|
|
printed = lv.printeffect(printed, "uevar", pos, effect&uevar != 0)
|
|
|
|
|
printed = lv.printeffect(printed, "varkill", pos, effect&varkill != 0)
|
|
|
|
|
printed = lv.printeffect(printed, "avarinit", pos, effect&avarinit != 0)
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
|
|
|
|
|
if !issafepoint(v) {
|
|
|
|
|
continue
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
live := lv.livevars[pcdata]
|
|
|
|
|
fmt.Printf("\tlive=")
|
|
|
|
|
printed = false
|
|
|
|
|
for j, n := range lv.vars {
|
|
|
|
|
if !live.Get(int32(j)) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if printed {
|
|
|
|
|
fmt.Printf(",")
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("%v", n)
|
|
|
|
|
printed = true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
fmt.Printf("\n")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bb bitsets
|
|
|
|
|
fmt.Printf("end\n")
|
2017-03-09 18:32:17 -08:00
|
|
|
printed = false
|
|
|
|
|
printed = lv.printbvec(printed, "varkill", be.varkill)
|
|
|
|
|
printed = lv.printbvec(printed, "liveout", be.liveout)
|
|
|
|
|
printed = lv.printbvec(printed, "avarinit", be.avarinit)
|
|
|
|
|
printed = lv.printbvec(printed, "avarinitany", be.avarinitany)
|
|
|
|
|
printed = lv.printbvec(printed, "avarinitall", be.avarinitall)
|
2016-03-13 10:23:18 +09:00
|
|
|
if printed {
|
2015-02-13 14:40:36 -05:00
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\n")
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 16:02:31 -08:00
|
|
|
// Dumps a slice of bitmaps to a symbol as a sequence of uint32 values. The
|
|
|
|
|
// first word dumped is the total number of bitmaps. The second word is the
|
|
|
|
|
// length of the bitmaps. All bitmaps are assumed to be of equal length. The
|
|
|
|
|
// remaining bytes are the raw bitmaps.
|
2017-04-14 06:35:53 -07:00
|
|
|
func livenessemit(lv *Liveness, argssym, livesym *obj.LSym) {
|
2017-03-21 12:13:12 -07:00
|
|
|
args := bvalloc(argswords(lv))
|
2017-04-21 13:59:51 -07:00
|
|
|
aoff := duint32(argssym, 0, uint32(len(lv.livevars))) // number of bitmaps
|
|
|
|
|
aoff = duint32(argssym, aoff, uint32(args.n)) // number of bits in each bitmap
|
2017-02-24 16:02:31 -08:00
|
|
|
|
2017-03-17 09:19:56 -07:00
|
|
|
locals := bvalloc(localswords(lv))
|
2017-04-21 13:59:51 -07:00
|
|
|
loff := duint32(livesym, 0, uint32(len(lv.livevars))) // number of bitmaps
|
|
|
|
|
loff = duint32(livesym, loff, uint32(locals.n)) // number of bits in each bitmap
|
2017-02-24 16:02:31 -08:00
|
|
|
|
|
|
|
|
for _, live := range lv.livevars {
|
|
|
|
|
args.Clear()
|
|
|
|
|
locals.Clear()
|
|
|
|
|
|
|
|
|
|
onebitlivepointermap(lv, live, lv.vars, args, locals)
|
|
|
|
|
|
2017-04-21 13:59:51 -07:00
|
|
|
aoff = dbvec(argssym, aoff, args)
|
|
|
|
|
loff = dbvec(livesym, loff, locals)
|
2016-03-28 22:27:36 +13:00
|
|
|
}
|
2017-02-24 16:02:31 -08:00
|
|
|
|
2017-04-14 06:35:53 -07:00
|
|
|
// Give these LSyms content-addressable names,
|
|
|
|
|
// so that they can be de-duplicated.
|
|
|
|
|
// This provides significant binary size savings.
|
|
|
|
|
// It is safe to rename these LSyms because
|
2017-04-14 06:44:30 -07:00
|
|
|
// they are tracked separately from ctxt.hash.
|
2017-04-14 06:35:53 -07:00
|
|
|
argssym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(argssym.P))
|
|
|
|
|
livesym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(livesym.P))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 18:32:17 -08:00
|
|
|
// Entry pointer for liveness analysis. Solves for the liveness of
|
|
|
|
|
// pointer variables in the function and emits a runtime data
|
2015-02-13 14:40:36 -05:00
|
|
|
// structure read by the garbage collector.
|
2017-03-09 18:32:17 -08:00
|
|
|
// Returns a map from GC safe points to their corresponding stack map index.
|
2017-04-14 06:35:53 -07:00
|
|
|
func liveness(e *ssafn, f *ssa.Func) map[*ssa.Value]int {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Construct the global liveness state.
|
2017-03-17 09:19:56 -07:00
|
|
|
vars := getvariables(e.curfn)
|
2017-03-09 18:32:17 -08:00
|
|
|
lv := newliveness(e.curfn, f, vars, e.stkptrsize)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Run the dataflow framework.
|
|
|
|
|
livenessprologue(lv)
|
|
|
|
|
livenesssolve(lv)
|
|
|
|
|
livenessepilogue(lv)
|
|
|
|
|
livenesscompact(lv)
|
2016-06-08 22:02:08 -07:00
|
|
|
lv.clobber()
|
2015-02-13 14:40:36 -05:00
|
|
|
if debuglive >= 2 {
|
|
|
|
|
livenessprintdebug(lv)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit the live pointer map data structures
|
2017-04-14 06:35:53 -07:00
|
|
|
if ls := e.curfn.Func.lsym; ls != nil {
|
2017-04-18 10:18:34 -07:00
|
|
|
livenessemit(lv, &ls.Func.GCArgs, &ls.Func.GCLocals)
|
2017-04-14 06:35:53 -07:00
|
|
|
}
|
2017-03-09 18:32:17 -08:00
|
|
|
return lv.stackMapIndex
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|