go/src/cmd/compile/internal/gc/plive.go

1130 lines
29 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector liveness bitmap generation.
// The command line flag -live causes this code to print debug information.
// The levels are:
//
// -live (aka -live=1): print liveness lists as code warnings at safe points
// -live=2: print an assembly listing with liveness annotations
//
// Each level includes the earlier output as well.
package gc
import (
"cmd/compile/internal/ssa"
"cmd/internal/obj"
"crypto/md5"
"fmt"
"strings"
)
// BlockEffects summarizes the liveness effects on an SSA block.
type BlockEffects struct {
lastbitmapindex int // for livenessepilogue
// Computed during livenessprologue using only the content of
// individual blocks:
//
// uevar: upward exposed variables (used before set in block)
// varkill: killed variables (set in block)
// avarinit: addrtaken variables set or used (proof of initialization)
uevar bvec
varkill bvec
avarinit bvec
// Computed during livenesssolve using control flow information:
//
// livein: variables live at block entry
// liveout: variables live at block exit
// avarinitany: addrtaken variables possibly initialized at block exit
// (initialized in block or at exit from any predecessor block)
// avarinitall: addrtaken variables certainly initialized at block exit
// (initialized in block or at exit from all predecessor blocks)
livein bvec
liveout bvec
avarinitany bvec
avarinitall bvec
}
// A collection of global state used by liveness analysis.
type Liveness struct {
fn *Node
f *ssa.Func
vars []*Node
stkptrsize int64
be []BlockEffects
// stackMapIndex maps from safe points (i.e., CALLs) to their
// index within the stack maps.
stackMapIndex map[*ssa.Value]int
// An array with a bit vector for each safe point tracking
// live variables, indexed by bb.rpo.
livevars []bvec
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
cache progeffectscache
}
type progeffectscache struct {
textavarinit []int32
retuevar []int32
tailuevar []int32
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
uevar [3]int32
varkill [3]int32
avarinit [3]int32
initialized bool
}
// livenessShouldTrack reports whether the liveness analysis
// should track the variable n.
// We don't care about variables that have no pointers,
// nor do we care about non-local variables,
// nor do we care about empty structs (handled by the pointer check),
// nor do we care about the fake PAUTOHEAP variables.
func livenessShouldTrack(n *Node) bool {
return n.Op == ONAME && (n.Class == PAUTO || n.Class == PPARAM || n.Class == PPARAMOUT) && haspointers(n.Type)
}
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
// getvariables returns the list of on-stack variables that we need to track.
func getvariables(fn *Node) []*Node {
var vars []*Node
for _, n := range fn.Func.Dcl {
if n.Op == ONAME {
// The Node.opt field is available for use by optimization passes.
// We use it to hold the index of the node in the variables array
// (nil means the Node is not in the variables array).
// The Node.curfn field is supposed to be set to the current function
// already, but for some compiler-introduced names it seems not to be,
// so fix that here.
// Later, when we want to find the index of a node in the variables list,
// we will check that n.Curfn == Curfn and n.Opt() != nil. Then n.Opt().(int32)
// is the index in the variables list.
n.SetOpt(nil)
n.Name.Curfn = Curfn
}
if livenessShouldTrack(n) {
n.SetOpt(int32(len(vars)))
vars = append(vars, n)
}
}
return vars
}
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
func (lv *Liveness) initcache() {
if lv.cache.initialized {
Fatalf("liveness cache initialized twice")
return
}
lv.cache.initialized = true
for i, node := range lv.vars {
switch node.Class {
case PPARAM:
// A return instruction with a p.to is a tail return, which brings
// the stack pointer back up (if it ever went down) and then jumps
// to a new function entirely. That form of instruction must read
// all the parameters for correctness, and similarly it must not
// read the out arguments - they won't be set until the new
// function runs.
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
lv.cache.tailuevar = append(lv.cache.tailuevar, int32(i))
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if node.Addrtaken() {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
lv.cache.textavarinit = append(lv.cache.textavarinit, int32(i))
}
case PPARAMOUT:
// If the result had its address taken, it is being tracked
// by the avarinit code, which does not use uevar.
// If we added it to uevar too, we'd not see any kill
// and decide that the variable was live entry, which it is not.
// So only use uevar in the non-addrtaken case.
// The p.to.type == obj.TYPE_NONE limits the bvset to
// non-tail-call return instructions; see note below for details.
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if !node.Addrtaken() {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
lv.cache.retuevar = append(lv.cache.retuevar, int32(i))
}
}
}
}
// A liveEffect is a set of flags that describe an instruction's
// liveness effects on a variable.
//
// The possible flags are:
// uevar - used by the instruction
// varkill - killed by the instruction
// for variables without address taken, means variable was set
// for variables with address taken, means variable was marked dead
// avarinit - initialized or referred to by the instruction,
// only for variables with address taken but not escaping to heap
//
// The avarinit output serves as a signal that the data has been
// initialized, because any use of a variable must come after its
// initialization.
type liveEffect int
const (
uevar liveEffect = 1 << iota
varkill
avarinit
)
// valueEffects returns the index of a variable in lv.vars and the
// liveness effects v has on that variable.
// If v does not affect any tracked variables, it returns -1, 0.
func (lv *Liveness) valueEffects(v *ssa.Value) (pos int32, effect liveEffect) {
n, e := affectedNode(v)
if e == 0 {
return -1, 0
}
pos = liveIndex(n, lv.vars)
if pos < 0 {
return -1, 0
}
if n.Addrtaken() {
if v.Op != ssa.OpVarKill {
effect |= avarinit
}
if v.Op == ssa.OpVarDef || v.Op == ssa.OpVarKill {
effect |= varkill
}
} else {
// Read is a read, obviously.
// Addr by itself is also implicitly a read.
//
// Addr|Write means that the address is being taken
// but only so that the instruction can write to the value.
// It is not a read.
if e&ssa.SymRead != 0 || e&(ssa.SymAddr|ssa.SymWrite) == ssa.SymAddr {
effect |= uevar
}
if e&ssa.SymWrite != 0 && (!isfat(n.Type) || v.Op == ssa.OpVarDef) {
effect |= varkill
}
}
return
}
// affectedNode returns the *Node affected by v
func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
// Special cases.
switch v.Op {
case ssa.OpLoadReg:
n, _ := AutoVar(v.Args[0])
return n, ssa.SymRead
case ssa.OpStoreReg:
n, _ := AutoVar(v)
return n, ssa.SymWrite
case ssa.OpVarLive:
return v.Aux.(*Node), ssa.SymRead
case ssa.OpVarDef, ssa.OpVarKill:
return v.Aux.(*Node), ssa.SymWrite
case ssa.OpKeepAlive:
n, _ := AutoVar(v.Args[0])
return n, ssa.SymRead
}
e := v.Op.SymEffect()
if e == 0 {
return nil, 0
}
var n *Node
switch a := v.Aux.(type) {
case nil, *ssa.ExternSymbol:
// ok, but no node
case *ssa.ArgSymbol:
n = a.Node.(*Node)
case *ssa.AutoSymbol:
n = a.Node.(*Node)
default:
Fatalf("weird aux: %s", v.LongString())
}
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
return n, e
}
// liveIndex returns the index of n in the set of tracked vars.
// If n is not a tracked var, liveIndex returns -1.
// If n is not a tracked var but should be tracked, liveIndex crashes.
func liveIndex(n *Node, vars []*Node) int32 {
if n == nil || n.Name.Curfn != Curfn || !livenessShouldTrack(n) {
return -1
}
pos, ok := n.Opt().(int32) // index in vars
if !ok {
Fatalf("lost track of variable in liveness: %v (%p, %p)", n, n, n.Orig)
}
if pos >= int32(len(vars)) || vars[pos] != n {
Fatalf("bad bookkeeping in liveness: %v (%p, %p)", n, n, n.Orig)
}
return pos
}
// Constructs a new liveness structure used to hold the global state of the
// liveness computation. The cfg argument is a slice of *BasicBlocks and the
// vars argument is a slice of *Nodes.
func newliveness(fn *Node, f *ssa.Func, vars []*Node, stkptrsize int64) *Liveness {
lv := &Liveness{
fn: fn,
f: f,
vars: vars,
stkptrsize: stkptrsize,
be: make([]BlockEffects, f.NumBlocks()),
}
nblocks := int32(len(f.Blocks))
nvars := int32(len(vars))
bulk := bvbulkalloc(nvars, nblocks*7)
for _, b := range f.Blocks {
be := lv.blockEffects(b)
be.uevar = bulk.next()
be.varkill = bulk.next()
be.livein = bulk.next()
be.liveout = bulk.next()
be.avarinit = bulk.next()
be.avarinitany = bulk.next()
be.avarinitall = bulk.next()
}
return lv
}
func (lv *Liveness) blockEffects(b *ssa.Block) *BlockEffects {
return &lv.be[b.ID]
}
// NOTE: The bitmap for a specific type t should be cached in t after the first run
// and then simply copied into bv at the correct offset on future calls with
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, onebitwalktype1
// accounts for 40% of the 6g execution time.
func onebitwalktype1(t *Type, xoffset *int64, bv bvec) {
if t.Align > 0 && *xoffset&int64(t.Align-1) != 0 {
Fatalf("onebitwalktype1: invalid initial alignment, %v", t)
}
switch t.Etype {
case TINT8,
TUINT8,
TINT16,
TUINT16,
TINT32,
TUINT32,
TINT64,
TUINT64,
TINT,
TUINT,
TUINTPTR,
TBOOL,
TFLOAT32,
TFLOAT64,
TCOMPLEX64,
TCOMPLEX128:
*xoffset += t.Width
case TPTR32,
TPTR64,
TUNSAFEPTR,
TFUNC,
TCHAN,
TMAP:
if *xoffset&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid alignment, %v", t)
}
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer
*xoffset += t.Width
case TSTRING:
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// struct { byte *str; intgo len; }
if *xoffset&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid alignment, %v", t)
}
bv.Set(int32(*xoffset / int64(Widthptr))) //pointer in first slot
*xoffset += t.Width
case TINTER:
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// struct { Itab *tab; void *data; }
// or, when isnilinter(t)==true:
// struct { Type *type; void *data; }
if *xoffset&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid alignment, %v", t)
}
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer in first slot
bv.Set(int32(*xoffset/int64(Widthptr) + 1)) // pointer in second slot
*xoffset += t.Width
case TSLICE:
// struct { byte *array; uintgo len; uintgo cap; }
if *xoffset&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid TARRAY alignment, %v", t)
}
bv.Set(int32(*xoffset / int64(Widthptr))) // pointer in first slot (BitsPointer)
*xoffset += t.Width
case TARRAY:
for i := int64(0); i < t.NumElem(); i++ {
onebitwalktype1(t.Elem(), xoffset, bv)
}
case TSTRUCT:
var o int64
for _, t1 := range t.Fields().Slice() {
fieldoffset := t1.Offset
*xoffset += fieldoffset - o
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
onebitwalktype1(t1.Type, xoffset, bv)
o = fieldoffset + t1.Type.Width
}
*xoffset += t.Width - o
default:
Fatalf("onebitwalktype1: unexpected type, %v", t)
}
}
// Returns the number of words of local variables.
func localswords(lv *Liveness) int32 {
return int32(lv.stkptrsize / int64(Widthptr))
}
// Returns the number of words of in and out arguments.
func argswords() int32 {
return int32(Curfn.Type.ArgWidth() / int64(Widthptr))
}
// Generates live pointer value maps for arguments and local variables. The
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
func onebitlivepointermap(lv *Liveness, liveout bvec, vars []*Node, args bvec, locals bvec) {
var xoffset int64
for i := int32(0); ; i++ {
i = liveout.Next(i)
if i < 0 {
break
}
node := vars[i]
switch node.Class {
case PAUTO:
xoffset = node.Xoffset + lv.stkptrsize
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
onebitwalktype1(node.Type, &xoffset, locals)
case PPARAM, PPARAMOUT:
xoffset = node.Xoffset
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
onebitwalktype1(node.Type, &xoffset, args)
}
}
}
// Returns true for instructions that are safe points that must be annotated
// with liveness information.
func issafepoint(v *ssa.Value) bool {
return v.Op.IsCall() || v.Op == ssa.OpARMCALLudiv
}
// Initializes the sets for solving the live variables. Visits all the
// instructions in each basic block to summarizes the information at each basic
// block
func livenessprologue(lv *Liveness) {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
lv.initcache()
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
// Walk the block instructions backward and update the block
// effects with the each prog effects.
for j := len(b.Values) - 1; j >= 0; j-- {
pos, e := lv.valueEffects(b.Values[j])
if e&varkill != 0 {
be.varkill.Set(pos)
be.uevar.Unset(pos)
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
}
if e&uevar != 0 {
be.uevar.Set(pos)
}
}
// Walk the block instructions forward to update avarinit bits.
// avarinit describes the effect at the end of the block, not the beginning.
for j := 0; j < len(b.Values); j++ {
pos, e := lv.valueEffects(b.Values[j])
if e&varkill != 0 {
be.avarinit.Unset(pos)
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
}
if e&avarinit != 0 {
be.avarinit.Set(pos)
}
}
}
}
// Solve the liveness dataflow equations.
func livenesssolve(lv *Liveness) {
// These temporary bitvectors exist to avoid successive allocations and
// frees within the loop.
newlivein := bvalloc(int32(len(lv.vars)))
newliveout := bvalloc(int32(len(lv.vars)))
any := bvalloc(int32(len(lv.vars)))
all := bvalloc(int32(len(lv.vars)))
// Push avarinitall, avarinitany forward.
// avarinitall says the addressed var is initialized along all paths reaching the block exit.
// avarinitany says the addressed var is initialized along some path reaching the block exit.
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
if b == lv.f.Entry {
be.avarinitall.Copy(be.avarinit)
} else {
be.avarinitall.Clear()
be.avarinitall.Not()
}
be.avarinitany.Copy(be.avarinit)
}
// Walk blocks in the general direction of propagation (RPO
// for avarinit{any,all}, and PO for live{in,out}). This
// improves convergence.
po := lv.f.Postorder()
for change := true; change; {
change = false
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
be := lv.blockEffects(b)
lv.avarinitanyall(b, any, all)
any.AndNot(any, be.varkill)
all.AndNot(all, be.varkill)
any.Or(any, be.avarinit)
all.Or(all, be.avarinit)
if !any.Eq(be.avarinitany) {
change = true
be.avarinitany.Copy(any)
}
if !all.Eq(be.avarinitall) {
change = true
be.avarinitall.Copy(all)
}
}
}
// Iterate through the blocks in reverse round-robin fashion. A work
// queue might be slightly faster. As is, the number of iterations is
// so low that it hardly seems to be worth the complexity.
for change := true; change; {
change = false
for _, b := range po {
be := lv.blockEffects(b)
newliveout.Clear()
switch b.Kind {
case ssa.BlockRet:
for _, pos := range lv.cache.retuevar {
newliveout.Set(pos)
}
case ssa.BlockRetJmp:
for _, pos := range lv.cache.tailuevar {
newliveout.Set(pos)
}
case ssa.BlockExit:
// nothing to do
default:
// A variable is live on output from this block
// if it is live on input to some successor.
//
// out[b] = \bigcup_{s \in succ[b]} in[s]
newliveout.Copy(lv.blockEffects(b.Succs[0].Block()).livein)
for _, succ := range b.Succs[1:] {
newliveout.Or(newliveout, lv.blockEffects(succ.Block()).livein)
}
}
if !be.liveout.Eq(newliveout) {
change = true
be.liveout.Copy(newliveout)
}
// A variable is live on input to this block
// if it is live on output from this block and
// not set by the code in this block.
//
// in[b] = uevar[b] \cup (out[b] \setminus varkill[b])
newlivein.AndNot(be.liveout, be.varkill)
be.livein.Or(newlivein, be.uevar)
}
}
}
// Visits all instructions in a basic block and computes a bit vector of live
// variables at each safe point locations.
func livenessepilogue(lv *Liveness) {
nvars := int32(len(lv.vars))
livein := bvalloc(nvars)
liveout := bvalloc(nvars)
any := bvalloc(nvars)
all := bvalloc(nvars)
livedefer := bvalloc(nvars) // always-live variables
// If there is a defer (that could recover), then all output
// parameters are live all the time. In addition, any locals
// that are pointers to heap-allocated output parameters are
// also always live (post-deferreturn code needs these
// pointers to copy values back to the stack).
// TODO: if the output parameter is heap-allocated, then we
// don't need to keep the stack copy live?
if lv.fn.Func.HasDefer() {
for i, n := range lv.vars {
if n.Class == PPARAMOUT {
if n.IsOutputParamHeapAddr() {
// Just to be paranoid. Heap addresses are PAUTOs.
Fatalf("variable %v both output param and heap output param", n)
}
if n.Name.Param.Heapaddr != nil {
// If this variable moved to the heap, then
// its stack copy is not live.
continue
}
// Note: zeroing is handled by zeroResults in walk.go.
livedefer.Set(int32(i))
}
if n.IsOutputParamHeapAddr() {
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.Name.SetNeedzero(true)
livedefer.Set(int32(i))
}
}
}
{
// Reserve an entry for function entry.
live := bvalloc(nvars)
for _, pos := range lv.cache.textavarinit {
live.Set(pos)
}
lv.livevars = append(lv.livevars, live)
}
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
// Compute avarinitany and avarinitall for entry to block.
// This duplicates information known during livenesssolve
// but avoids storing two more vectors for each block.
lv.avarinitanyall(b, any, all)
// Walk forward through the basic block instructions and
// allocate liveness maps for those instructions that need them.
// Seed the maps with information about the addrtaken variables.
for _, v := range b.Values {
pos, e := lv.valueEffects(v)
if e&varkill != 0 {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
any.Unset(pos)
all.Unset(pos)
}
if e&avarinit != 0 {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
any.Set(pos)
all.Set(pos)
}
if !issafepoint(v) {
continue
}
// Annotate ambiguously live variables so that they can
// be zeroed at function entry.
// livein and liveout are dead here and used as temporaries.
livein.Clear()
liveout.AndNot(any, all)
if !liveout.IsEmpty() {
for pos := int32(0); pos < liveout.n; pos++ {
if !liveout.Get(pos) {
continue
}
all.Set(pos) // silence future warnings in this block
n := lv.vars[pos]
if !n.Name.Needzero() {
n.Name.SetNeedzero(true)
if debuglive >= 1 {
Warnl(v.Pos, "%v: %L is ambiguously live", Curfn.Func.Nname, n)
}
}
}
}
// Live stuff first.
live := bvalloc(nvars)
live.Copy(any)
lv.livevars = append(lv.livevars, live)
}
be.lastbitmapindex = len(lv.livevars) - 1
}
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
// walk backward, emit pcdata and populate the maps
pos := int32(be.lastbitmapindex)
if pos < 0 {
// the first block we encounter should have the ATEXT so
// at no point should pos ever be less than zero.
Fatalf("livenessepilogue")
}
livein.Copy(be.liveout)
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
// Propagate liveness information
{
pos, e := lv.valueEffects(v)
liveout.Copy(livein)
if e&varkill != 0 {
livein.Unset(pos)
}
if e&uevar != 0 {
livein.Set(pos)
}
}
if !issafepoint(v) {
continue
}
// Found an interesting instruction, record the
// corresponding liveness information.
// Record live variables.
live := lv.livevars[pos]
live.Or(live, liveout)
live.Or(live, livedefer) // only for non-entry safe points
pos--
}
if b == lv.f.Entry {
if pos != 0 {
Fatalf("bad pos for entry point: %v", pos)
}
// Record live variables.
live := lv.livevars[pos]
live.Or(live, liveout)
}
}
// Useful sanity check: on entry to the function,
// the only things that can possibly be live are the
// input parameters.
for j, n := range lv.vars {
if n.Class != PPARAM && lv.livevars[0].Get(int32(j)) {
Fatalf("internal error: %v %L recorded as live on entry", Curfn.Func.Nname, n)
}
}
}
func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) {
if len(b.Preds) == 0 {
any.Clear()
all.Clear()
for _, pos := range lv.cache.textavarinit {
any.Set(pos)
all.Set(pos)
}
return
}
be := lv.blockEffects(b.Preds[0].Block())
any.Copy(be.avarinitany)
all.Copy(be.avarinitall)
for _, pred := range b.Preds[1:] {
be := lv.blockEffects(pred.Block())
any.Or(any, be.avarinitany)
all.And(all, be.avarinitall)
}
}
// FNV-1 hash function constants.
const (
H0 = 2166136261
Hp = 16777619
)
func hashbitmap(h uint32, bv bvec) uint32 {
n := int((bv.n + 31) / 32)
for i := 0; i < n; i++ {
w := bv.b[i]
h = (h * Hp) ^ (w & 0xff)
h = (h * Hp) ^ ((w >> 8) & 0xff)
h = (h * Hp) ^ ((w >> 16) & 0xff)
h = (h * Hp) ^ ((w >> 24) & 0xff)
}
return h
}
// Compact liveness information by coalescing identical per-call-site bitmaps.
// The merging only happens for a single function, not across the entire binary.
//
// There are actually two lists of bitmaps, one list for the local variables and one
// list for the function arguments. Both lists are indexed by the same PCDATA
// index, so the corresponding pairs must be considered together when
// merging duplicates. The argument bitmaps change much less often during
// function execution than the local variable bitmaps, so it is possible that
// we could introduce a separate PCDATA index for arguments vs locals and
// then compact the set of argument bitmaps separately from the set of
// local variable bitmaps. As of 2014-04-02, doing this to the godoc binary
// is actually a net loss: we save about 50k of argument bitmaps but the new
// PCDATA tables cost about 100k. So for now we keep using a single index for
// both bitmap lists.
func livenesscompact(lv *Liveness) {
// Linear probing hash table of bitmaps seen so far.
// The hash table has 4n entries to keep the linear
// scan short. An entry of -1 indicates an empty slot.
n := len(lv.livevars)
tablesize := 4 * n
table := make([]int, tablesize)
for i := range table {
table[i] = -1
}
// remap[i] = the new index of the old bit vector #i.
remap := make([]int, n)
for i := range remap {
remap[i] = -1
}
uniq := 0 // unique tables found so far
// Consider bit vectors in turn.
// If new, assign next number using uniq,
// record in remap, record in lv.livevars
// under the new index, and add entry to hash table.
// If already seen, record earlier index in remap.
Outer:
for i, live := range lv.livevars {
h := hashbitmap(H0, live) % uint32(tablesize)
for {
j := table[h]
if j < 0 {
break
}
jlive := lv.livevars[j]
if live.Eq(jlive) {
remap[i] = j
continue Outer
}
h++
if h == uint32(tablesize) {
h = 0
}
}
table[h] = uniq
remap[i] = uniq
lv.livevars[uniq] = live
uniq++
}
// We've already reordered lv.livevars[0:uniq]. Clear the
// pointers later in the array so they can be GC'd.
tail := lv.livevars[uniq:]
for i := range tail { // memclr loop pattern
tail[i] = bvec{}
}
lv.livevars = lv.livevars[:uniq]
// Rewrite PCDATA instructions to use new numbering.
lv.showlive(nil, lv.livevars[0])
pos := 1
lv.stackMapIndex = make(map[*ssa.Value]int)
for _, b := range lv.f.Blocks {
for _, v := range b.Values {
if issafepoint(v) {
lv.showlive(v, lv.livevars[remap[pos]])
lv.stackMapIndex[v] = int(remap[pos])
pos++
}
}
}
}
func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
if debuglive == 0 || Curfn.Func.Nname.Sym.Name == "init" || strings.HasPrefix(Curfn.Func.Nname.Sym.Name, ".") {
return
}
if live.IsEmpty() {
return
}
pos := Curfn.Func.Nname.Pos
if v != nil {
pos = v.Pos
}
s := "live at "
if v == nil {
s += fmt.Sprintf("entry to %s:", Curfn.Func.Nname.Sym.Name)
} else if sym, ok := v.Aux.(*obj.LSym); ok {
fn := sym.Name
if pos := strings.Index(fn, "."); pos >= 0 {
fn = fn[pos+1:]
}
s += fmt.Sprintf("call to %s:", fn)
} else {
s += "indirect call:"
}
for j, n := range lv.vars {
if live.Get(int32(j)) {
s += fmt.Sprintf(" %v", n)
}
}
Warnl(pos, s)
}
func (lv *Liveness) printbvec(printed bool, name string, live bvec) bool {
started := false
for i, n := range lv.vars {
if !live.Get(int32(i)) {
continue
}
if !started {
if !printed {
fmt.Printf("\t")
} else {
fmt.Printf(" ")
}
started = true
printed = true
fmt.Printf("%s=", name)
} else {
fmt.Printf(",")
}
fmt.Printf("%s", n.Sym.Name)
}
return printed
}
// printeffect is like printbvec, but for a single variable.
func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool) bool {
if !x {
return printed
}
if !printed {
fmt.Printf("\t")
} else {
fmt.Printf(" ")
}
fmt.Printf("%s=%s", name, lv.vars[pos].Sym.Name)
return true
}
// Prints the computed liveness information and inputs, for debugging.
// This format synthesizes the information used during the multiple passes
// into a single presentation.
func livenessprintdebug(lv *Liveness) {
fmt.Printf("liveness: %s\n", Curfn.Func.Nname.Sym.Name)
pcdata := 0
for i, b := range lv.f.Blocks {
if i > 0 {
fmt.Printf("\n")
}
// bb#0 pred=1,2 succ=3,4
fmt.Printf("bb#%d pred=", b.ID)
for j, pred := range b.Preds {
if j > 0 {
fmt.Printf(",")
}
fmt.Printf("%d", pred.Block().ID)
}
fmt.Printf(" succ=")
for j, succ := range b.Succs {
if j > 0 {
fmt.Printf(",")
}
fmt.Printf("%d", succ.Block().ID)
}
fmt.Printf("\n")
be := lv.blockEffects(b)
// initial settings
printed := false
printed = lv.printbvec(printed, "uevar", be.uevar)
printed = lv.printbvec(printed, "livein", be.livein)
if printed {
fmt.Printf("\n")
}
// program listing, with individual effects listed
if b == lv.f.Entry {
live := lv.livevars[pcdata]
fmt.Printf("(%s) function entry\n", linestr(Curfn.Func.Nname.Pos))
fmt.Printf("\tlive=")
printed = false
for j, n := range lv.vars {
if !live.Get(int32(j)) {
continue
}
if printed {
fmt.Printf(",")
}
fmt.Printf("%v", n)
printed = true
}
fmt.Printf("\n")
}
for _, v := range b.Values {
fmt.Printf("(%s) %v\n", linestr(v.Pos), v.LongString())
if pos, ok := lv.stackMapIndex[v]; ok {
pcdata = pos
}
pos, effect := lv.valueEffects(v)
printed = false
printed = lv.printeffect(printed, "uevar", pos, effect&uevar != 0)
printed = lv.printeffect(printed, "varkill", pos, effect&varkill != 0)
printed = lv.printeffect(printed, "avarinit", pos, effect&avarinit != 0)
if printed {
fmt.Printf("\n")
}
if !issafepoint(v) {
continue
}
live := lv.livevars[pcdata]
fmt.Printf("\tlive=")
printed = false
for j, n := range lv.vars {
if !live.Get(int32(j)) {
continue
}
if printed {
fmt.Printf(",")
}
fmt.Printf("%v", n)
printed = true
}
fmt.Printf("\n")
}
// bb bitsets
fmt.Printf("end\n")
printed = false
printed = lv.printbvec(printed, "varkill", be.varkill)
printed = lv.printbvec(printed, "liveout", be.liveout)
printed = lv.printbvec(printed, "avarinit", be.avarinit)
printed = lv.printbvec(printed, "avarinitany", be.avarinitany)
printed = lv.printbvec(printed, "avarinitall", be.avarinitall)
if printed {
fmt.Printf("\n")
}
}
fmt.Printf("\n")
}
func finishgclocals(sym *Sym) {
ls := Linksym(sym)
ls.Name = fmt.Sprintf("gclocals·%x", md5.Sum(ls.P))
ls.Set(obj.AttrDuplicateOK, true)
sv := obj.SymVer{Name: ls.Name, Version: 0}
ls2, ok := Ctxt.Hash[sv]
if ok {
sym.Lsym = ls2
} else {
Ctxt.Hash[sv] = ls
ggloblsym(sym, int32(ls.Size), obj.RODATA)
}
}
// Dumps a slice of bitmaps to a symbol as a sequence of uint32 values. The
// first word dumped is the total number of bitmaps. The second word is the
// length of the bitmaps. All bitmaps are assumed to be of equal length. The
// remaining bytes are the raw bitmaps.
func livenessemit(lv *Liveness, argssym, livesym *Sym) {
args := bvalloc(argswords())
aoff := duint32(argssym, 0, uint32(len(lv.livevars))) // number of bitmaps
aoff = duint32(argssym, aoff, uint32(args.n)) // number of bits in each bitmap
locals := bvalloc(localswords(lv))
loff := duint32(livesym, 0, uint32(len(lv.livevars))) // number of bitmaps
loff = duint32(livesym, loff, uint32(locals.n)) // number of bits in each bitmap
for _, live := range lv.livevars {
args.Clear()
locals.Clear()
onebitlivepointermap(lv, live, lv.vars, args, locals)
aoff = dbvec(argssym, aoff, args)
loff = dbvec(livesym, loff, locals)
}
finishgclocals(livesym)
finishgclocals(argssym)
}
// Entry pointer for liveness analysis. Solves for the liveness of
// pointer variables in the function and emits a runtime data
// structure read by the garbage collector.
// Returns a map from GC safe points to their corresponding stack map index.
func liveness(e *ssafn, f *ssa.Func, argssym, livesym *Sym) map[*ssa.Value]int {
// Change name to dump debugging information only for a specific function.
debugdelta := 0
if e.curfn.Func.Nname.Sym.Name == "!" {
debugdelta = 2
}
debuglive += debugdelta
// Construct the global liveness state.
vars := getvariables(e.curfn)
lv := newliveness(e.curfn, f, vars, e.stkptrsize)
// Run the dataflow framework.
livenessprologue(lv)
livenesssolve(lv)
livenessepilogue(lv)
livenesscompact(lv)
if debuglive >= 2 {
livenessprintdebug(lv)
}
// Emit the live pointer map data structures
livenessemit(lv, argssym, livesym)
debuglive -= debugdelta
return lv.stackMapIndex
}