go/src/cmd/compile/internal/gc/plive.go

1322 lines
37 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector liveness bitmap generation.
// The command line flag -live causes this code to print debug information.
// The levels are:
//
// -live (aka -live=1): print liveness lists as code warnings at safe points
// -live=2: print an assembly listing with liveness annotations
//
// Each level includes the earlier output as well.
package gc
import (
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/objabi"
"crypto/md5"
"fmt"
"strings"
)
// OpVarDef is an annotation for the liveness analysis, marking a place
// where a complete initialization (definition) of a variable begins.
// Since the liveness analysis can see initialization of single-word
// variables quite easy, OpVarDef is only needed for multi-word
// variables satisfying isfat(n.Type). For simplicity though, buildssa
// emits OpVarDef regardless of variable width.
//
// An 'OpVarDef x' annotation in the instruction stream tells the liveness
// analysis to behave as though the variable x is being initialized at that
// point in the instruction stream. The OpVarDef must appear before the
// actual (multi-instruction) initialization, and it must also appear after
// any uses of the previous value, if any. For example, if compiling:
//
// x = x[1:]
//
// it is important to generate code like:
//
// base, len, cap = pieces of x[1:]
// OpVarDef x
// x = {base, len, cap}
//
// If instead the generated code looked like:
//
// OpVarDef x
// base, len, cap = pieces of x[1:]
// x = {base, len, cap}
//
// then the liveness analysis would decide the previous value of x was
// unnecessary even though it is about to be used by the x[1:] computation.
// Similarly, if the generated code looked like:
//
// base, len, cap = pieces of x[1:]
// x = {base, len, cap}
// OpVarDef x
//
// then the liveness analysis will not preserve the new value of x, because
// the OpVarDef appears to have "overwritten" it.
//
// OpVarDef is a bit of a kludge to work around the fact that the instruction
// stream is working on single-word values but the liveness analysis
// wants to work on individual variables, which might be multi-word
// aggregates. It might make sense at some point to look into letting
// the liveness analysis work on single-word values as well, although
// there are complications around interface values, slices, and strings,
// all of which cannot be treated as individual words.
//
// OpVarKill is the opposite of OpVarDef: it marks a value as no longer needed,
// even if its address has been taken. That is, an OpVarKill annotation asserts
// that its argument is certainly dead, for use when the liveness analysis
// would not otherwise be able to deduce that fact.
// TODO: get rid of OpVarKill here. It's useful for stack frame allocation
// so the compiler can allocate two temps to the same location. Here it's now
// useless, since the implementation of stack objects.
// BlockEffects summarizes the liveness effects on an SSA block.
type BlockEffects struct {
// Computed during Liveness.prologue using only the content of
// individual blocks:
//
// uevar: upward exposed variables (used before set in block)
// varkill: killed variables (set in block)
uevar bvec
varkill bvec
// Computed during Liveness.solve using control flow information:
//
// livein: variables live at block entry
// liveout: variables live at block exit
livein bvec
liveout bvec
}
// A collection of global state used by liveness analysis.
type Liveness struct {
fn *Node
f *ssa.Func
vars []*Node
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
idx map[*Node]int32
stkptrsize int64
be []BlockEffects
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// allUnsafe indicates that all points in this function are
// unsafe-points.
allUnsafe bool
// unsafePoints bit i is set if Value ID i is an unsafe-point
// (preemption is not allowed). Only valid if !allUnsafe.
2018-02-26 20:48:53 -05:00
unsafePoints bvec
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
// An array with a bit vector for each safe point in the
// current Block during Liveness.epilogue. Indexed in Value
// order for that block. Additionally, for the entry block
// livevars[0] is the entry bitmap. Liveness.compact moves
// these to stackMaps.
livevars []bvec
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
// livenessMap maps from safe points (i.e., CALLs) to their
// liveness map indexes.
livenessMap LivenessMap
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
stackMapSet bvecSet
stackMaps []bvec
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
cache progeffectscache
}
// LivenessMap maps from *ssa.Value to LivenessIndex.
type LivenessMap struct {
vals map[ssa.ID]LivenessIndex
// The set of live, pointer-containing variables at the deferreturn
// call (only set when open-coded defers are used).
deferreturn LivenessIndex
cmd/compile: make LivenessMap dense Currently liveness information is kept in a map keyed by *ssa.Value. This made sense when liveness information was sparse, but now we have liveness for nearly every ssa.Value. There's a fair amount of memory and CPU overhead to this map now. This CL replaces this map with a slice indexed by value ID. Passes toolstash -cmp. name old time/op new time/op delta Template 197ms ± 1% 194ms ± 1% -1.60% (p=0.000 n=9+10) Unicode 100ms ± 2% 99ms ± 1% -1.31% (p=0.012 n=8+10) GoTypes 695ms ± 1% 689ms ± 0% -0.94% (p=0.000 n=10+10) Compiler 3.34s ± 2% 3.29s ± 1% -1.26% (p=0.000 n=10+9) SSA 8.08s ± 0% 8.02s ± 2% -0.70% (p=0.034 n=8+10) Flate 133ms ± 1% 131ms ± 1% -1.04% (p=0.006 n=10+9) GoParser 163ms ± 1% 162ms ± 1% -0.79% (p=0.034 n=8+10) Reflect 459ms ± 1% 454ms ± 0% -1.06% (p=0.000 n=10+8) Tar 186ms ± 1% 185ms ± 1% -0.87% (p=0.003 n=9+9) XML 238ms ± 1% 235ms ± 1% -1.01% (p=0.004 n=8+9) [Geo mean] 418ms 414ms -1.06% name old alloc/op new alloc/op delta Template 36.4MB ± 0% 35.6MB ± 0% -2.29% (p=0.000 n=9+10) Unicode 29.7MB ± 0% 29.5MB ± 0% -0.68% (p=0.000 n=10+10) GoTypes 119MB ± 0% 117MB ± 0% -2.30% (p=0.000 n=9+9) Compiler 546MB ± 0% 532MB ± 0% -2.47% (p=0.000 n=10+10) SSA 1.59GB ± 0% 1.55GB ± 0% -2.41% (p=0.000 n=10+10) Flate 24.9MB ± 0% 24.5MB ± 0% -1.77% (p=0.000 n=8+10) GoParser 29.5MB ± 0% 28.7MB ± 0% -2.60% (p=0.000 n=9+10) Reflect 81.7MB ± 0% 80.5MB ± 0% -1.49% (p=0.000 n=10+10) Tar 35.7MB ± 0% 35.1MB ± 0% -1.64% (p=0.000 n=10+10) XML 45.0MB ± 0% 43.7MB ± 0% -2.76% (p=0.000 n=9+10) [Geo mean] 80.1MB 78.4MB -2.04% name old allocs/op new allocs/op delta Template 336k ± 0% 335k ± 0% -0.31% (p=0.000 n=9+10) Unicode 339k ± 0% 339k ± 0% -0.05% (p=0.000 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.26% (p=0.000 n=10+10) Compiler 4.96M ± 0% 4.94M ± 0% -0.24% (p=0.000 n=10+10) SSA 12.6M ± 0% 12.5M ± 0% -0.30% (p=0.000 n=10+10) Flate 224k ± 0% 223k ± 0% -0.30% (p=0.000 n=10+10) GoParser 282k ± 0% 281k ± 0% -0.32% (p=0.000 n=10+10) Reflect 965k ± 0% 963k ± 0% -0.27% (p=0.000 n=9+10) Tar 331k ± 0% 330k ± 0% -0.27% (p=0.000 n=10+10) XML 393k ± 0% 392k ± 0% -0.26% (p=0.000 n=10+10) [Geo mean] 763k 761k -0.26% Updates #24543. Change-Id: I4cfd2461510d3c026a262760bca225dc37482341 Reviewed-on: https://go-review.googlesource.com/110178 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-21 15:40:56 -04:00
}
func (m *LivenessMap) reset() {
if m.vals == nil {
m.vals = make(map[ssa.ID]LivenessIndex)
cmd/compile: make LivenessMap dense Currently liveness information is kept in a map keyed by *ssa.Value. This made sense when liveness information was sparse, but now we have liveness for nearly every ssa.Value. There's a fair amount of memory and CPU overhead to this map now. This CL replaces this map with a slice indexed by value ID. Passes toolstash -cmp. name old time/op new time/op delta Template 197ms ± 1% 194ms ± 1% -1.60% (p=0.000 n=9+10) Unicode 100ms ± 2% 99ms ± 1% -1.31% (p=0.012 n=8+10) GoTypes 695ms ± 1% 689ms ± 0% -0.94% (p=0.000 n=10+10) Compiler 3.34s ± 2% 3.29s ± 1% -1.26% (p=0.000 n=10+9) SSA 8.08s ± 0% 8.02s ± 2% -0.70% (p=0.034 n=8+10) Flate 133ms ± 1% 131ms ± 1% -1.04% (p=0.006 n=10+9) GoParser 163ms ± 1% 162ms ± 1% -0.79% (p=0.034 n=8+10) Reflect 459ms ± 1% 454ms ± 0% -1.06% (p=0.000 n=10+8) Tar 186ms ± 1% 185ms ± 1% -0.87% (p=0.003 n=9+9) XML 238ms ± 1% 235ms ± 1% -1.01% (p=0.004 n=8+9) [Geo mean] 418ms 414ms -1.06% name old alloc/op new alloc/op delta Template 36.4MB ± 0% 35.6MB ± 0% -2.29% (p=0.000 n=9+10) Unicode 29.7MB ± 0% 29.5MB ± 0% -0.68% (p=0.000 n=10+10) GoTypes 119MB ± 0% 117MB ± 0% -2.30% (p=0.000 n=9+9) Compiler 546MB ± 0% 532MB ± 0% -2.47% (p=0.000 n=10+10) SSA 1.59GB ± 0% 1.55GB ± 0% -2.41% (p=0.000 n=10+10) Flate 24.9MB ± 0% 24.5MB ± 0% -1.77% (p=0.000 n=8+10) GoParser 29.5MB ± 0% 28.7MB ± 0% -2.60% (p=0.000 n=9+10) Reflect 81.7MB ± 0% 80.5MB ± 0% -1.49% (p=0.000 n=10+10) Tar 35.7MB ± 0% 35.1MB ± 0% -1.64% (p=0.000 n=10+10) XML 45.0MB ± 0% 43.7MB ± 0% -2.76% (p=0.000 n=9+10) [Geo mean] 80.1MB 78.4MB -2.04% name old allocs/op new allocs/op delta Template 336k ± 0% 335k ± 0% -0.31% (p=0.000 n=9+10) Unicode 339k ± 0% 339k ± 0% -0.05% (p=0.000 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.26% (p=0.000 n=10+10) Compiler 4.96M ± 0% 4.94M ± 0% -0.24% (p=0.000 n=10+10) SSA 12.6M ± 0% 12.5M ± 0% -0.30% (p=0.000 n=10+10) Flate 224k ± 0% 223k ± 0% -0.30% (p=0.000 n=10+10) GoParser 282k ± 0% 281k ± 0% -0.32% (p=0.000 n=10+10) Reflect 965k ± 0% 963k ± 0% -0.27% (p=0.000 n=9+10) Tar 331k ± 0% 330k ± 0% -0.27% (p=0.000 n=10+10) XML 393k ± 0% 392k ± 0% -0.26% (p=0.000 n=10+10) [Geo mean] 763k 761k -0.26% Updates #24543. Change-Id: I4cfd2461510d3c026a262760bca225dc37482341 Reviewed-on: https://go-review.googlesource.com/110178 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-21 15:40:56 -04:00
} else {
for k := range m.vals {
delete(m.vals, k)
}
cmd/compile: make LivenessMap dense Currently liveness information is kept in a map keyed by *ssa.Value. This made sense when liveness information was sparse, but now we have liveness for nearly every ssa.Value. There's a fair amount of memory and CPU overhead to this map now. This CL replaces this map with a slice indexed by value ID. Passes toolstash -cmp. name old time/op new time/op delta Template 197ms ± 1% 194ms ± 1% -1.60% (p=0.000 n=9+10) Unicode 100ms ± 2% 99ms ± 1% -1.31% (p=0.012 n=8+10) GoTypes 695ms ± 1% 689ms ± 0% -0.94% (p=0.000 n=10+10) Compiler 3.34s ± 2% 3.29s ± 1% -1.26% (p=0.000 n=10+9) SSA 8.08s ± 0% 8.02s ± 2% -0.70% (p=0.034 n=8+10) Flate 133ms ± 1% 131ms ± 1% -1.04% (p=0.006 n=10+9) GoParser 163ms ± 1% 162ms ± 1% -0.79% (p=0.034 n=8+10) Reflect 459ms ± 1% 454ms ± 0% -1.06% (p=0.000 n=10+8) Tar 186ms ± 1% 185ms ± 1% -0.87% (p=0.003 n=9+9) XML 238ms ± 1% 235ms ± 1% -1.01% (p=0.004 n=8+9) [Geo mean] 418ms 414ms -1.06% name old alloc/op new alloc/op delta Template 36.4MB ± 0% 35.6MB ± 0% -2.29% (p=0.000 n=9+10) Unicode 29.7MB ± 0% 29.5MB ± 0% -0.68% (p=0.000 n=10+10) GoTypes 119MB ± 0% 117MB ± 0% -2.30% (p=0.000 n=9+9) Compiler 546MB ± 0% 532MB ± 0% -2.47% (p=0.000 n=10+10) SSA 1.59GB ± 0% 1.55GB ± 0% -2.41% (p=0.000 n=10+10) Flate 24.9MB ± 0% 24.5MB ± 0% -1.77% (p=0.000 n=8+10) GoParser 29.5MB ± 0% 28.7MB ± 0% -2.60% (p=0.000 n=9+10) Reflect 81.7MB ± 0% 80.5MB ± 0% -1.49% (p=0.000 n=10+10) Tar 35.7MB ± 0% 35.1MB ± 0% -1.64% (p=0.000 n=10+10) XML 45.0MB ± 0% 43.7MB ± 0% -2.76% (p=0.000 n=9+10) [Geo mean] 80.1MB 78.4MB -2.04% name old allocs/op new allocs/op delta Template 336k ± 0% 335k ± 0% -0.31% (p=0.000 n=9+10) Unicode 339k ± 0% 339k ± 0% -0.05% (p=0.000 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.26% (p=0.000 n=10+10) Compiler 4.96M ± 0% 4.94M ± 0% -0.24% (p=0.000 n=10+10) SSA 12.6M ± 0% 12.5M ± 0% -0.30% (p=0.000 n=10+10) Flate 224k ± 0% 223k ± 0% -0.30% (p=0.000 n=10+10) GoParser 282k ± 0% 281k ± 0% -0.32% (p=0.000 n=10+10) Reflect 965k ± 0% 963k ± 0% -0.27% (p=0.000 n=9+10) Tar 331k ± 0% 330k ± 0% -0.27% (p=0.000 n=10+10) XML 393k ± 0% 392k ± 0% -0.26% (p=0.000 n=10+10) [Geo mean] 763k 761k -0.26% Updates #24543. Change-Id: I4cfd2461510d3c026a262760bca225dc37482341 Reviewed-on: https://go-review.googlesource.com/110178 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-21 15:40:56 -04:00
}
m.deferreturn = LivenessDontCare
cmd/compile: make LivenessMap dense Currently liveness information is kept in a map keyed by *ssa.Value. This made sense when liveness information was sparse, but now we have liveness for nearly every ssa.Value. There's a fair amount of memory and CPU overhead to this map now. This CL replaces this map with a slice indexed by value ID. Passes toolstash -cmp. name old time/op new time/op delta Template 197ms ± 1% 194ms ± 1% -1.60% (p=0.000 n=9+10) Unicode 100ms ± 2% 99ms ± 1% -1.31% (p=0.012 n=8+10) GoTypes 695ms ± 1% 689ms ± 0% -0.94% (p=0.000 n=10+10) Compiler 3.34s ± 2% 3.29s ± 1% -1.26% (p=0.000 n=10+9) SSA 8.08s ± 0% 8.02s ± 2% -0.70% (p=0.034 n=8+10) Flate 133ms ± 1% 131ms ± 1% -1.04% (p=0.006 n=10+9) GoParser 163ms ± 1% 162ms ± 1% -0.79% (p=0.034 n=8+10) Reflect 459ms ± 1% 454ms ± 0% -1.06% (p=0.000 n=10+8) Tar 186ms ± 1% 185ms ± 1% -0.87% (p=0.003 n=9+9) XML 238ms ± 1% 235ms ± 1% -1.01% (p=0.004 n=8+9) [Geo mean] 418ms 414ms -1.06% name old alloc/op new alloc/op delta Template 36.4MB ± 0% 35.6MB ± 0% -2.29% (p=0.000 n=9+10) Unicode 29.7MB ± 0% 29.5MB ± 0% -0.68% (p=0.000 n=10+10) GoTypes 119MB ± 0% 117MB ± 0% -2.30% (p=0.000 n=9+9) Compiler 546MB ± 0% 532MB ± 0% -2.47% (p=0.000 n=10+10) SSA 1.59GB ± 0% 1.55GB ± 0% -2.41% (p=0.000 n=10+10) Flate 24.9MB ± 0% 24.5MB ± 0% -1.77% (p=0.000 n=8+10) GoParser 29.5MB ± 0% 28.7MB ± 0% -2.60% (p=0.000 n=9+10) Reflect 81.7MB ± 0% 80.5MB ± 0% -1.49% (p=0.000 n=10+10) Tar 35.7MB ± 0% 35.1MB ± 0% -1.64% (p=0.000 n=10+10) XML 45.0MB ± 0% 43.7MB ± 0% -2.76% (p=0.000 n=9+10) [Geo mean] 80.1MB 78.4MB -2.04% name old allocs/op new allocs/op delta Template 336k ± 0% 335k ± 0% -0.31% (p=0.000 n=9+10) Unicode 339k ± 0% 339k ± 0% -0.05% (p=0.000 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.26% (p=0.000 n=10+10) Compiler 4.96M ± 0% 4.94M ± 0% -0.24% (p=0.000 n=10+10) SSA 12.6M ± 0% 12.5M ± 0% -0.30% (p=0.000 n=10+10) Flate 224k ± 0% 223k ± 0% -0.30% (p=0.000 n=10+10) GoParser 282k ± 0% 281k ± 0% -0.32% (p=0.000 n=10+10) Reflect 965k ± 0% 963k ± 0% -0.27% (p=0.000 n=9+10) Tar 331k ± 0% 330k ± 0% -0.27% (p=0.000 n=10+10) XML 393k ± 0% 392k ± 0% -0.26% (p=0.000 n=10+10) [Geo mean] 763k 761k -0.26% Updates #24543. Change-Id: I4cfd2461510d3c026a262760bca225dc37482341 Reviewed-on: https://go-review.googlesource.com/110178 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-21 15:40:56 -04:00
}
func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
m.vals[v.ID] = i
}
func (m LivenessMap) Get(v *ssa.Value) LivenessIndex {
cmd/compile,runtime: stack maps only at calls, remove register maps Currently, we emit stack maps and register maps at almost every instruction. This was originally intended to support non-cooperative preemption, but was only ever used for debug call injection. Now debug call injection also uses conservative frame scanning. As a result, stack maps are only needed at call sites and register maps aren't needed at all except that we happen to also encode unsafe-point information in the register map PCDATA stream. This CL reduces stack maps to only appear at calls, and replace full register maps with just safe/unsafe-point information. This is all protected by the go115ReduceLiveness feature flag, which is defined in both runtime and cmd/compile. This CL significantly reduces binary sizes and also speeds up compiles and links: name old exe-bytes new exe-bytes delta BinGoSize 15.0MB ± 0% 14.1MB ± 0% -5.72% name old pcln-bytes new pcln-bytes delta BinGoSize 3.14MB ± 0% 2.48MB ± 0% -21.08% name old time/op new time/op delta Template 178ms ± 7% 172ms ±14% -3.59% (p=0.005 n=19+19) Unicode 71.0ms ±12% 69.8ms ±10% ~ (p=0.126 n=18+18) GoTypes 655ms ± 8% 615ms ± 8% -6.11% (p=0.000 n=19+19) Compiler 3.27s ± 6% 3.15s ± 7% -3.69% (p=0.001 n=20+20) SSA 7.10s ± 5% 6.85s ± 8% -3.53% (p=0.001 n=19+20) Flate 124ms ±15% 116ms ±22% -6.57% (p=0.024 n=18+19) GoParser 156ms ±26% 147ms ±34% ~ (p=0.070 n=19+19) Reflect 406ms ± 9% 387ms ±21% -4.69% (p=0.028 n=19+20) Tar 163ms ±15% 162ms ±27% ~ (p=0.370 n=19+19) XML 223ms ±13% 218ms ±14% ~ (p=0.157 n=20+20) LinkCompiler 503ms ±21% 484ms ±23% ~ (p=0.072 n=20+20) ExternalLinkCompiler 1.27s ± 7% 1.22s ± 8% -3.85% (p=0.005 n=20+19) LinkWithoutDebugCompiler 294ms ±17% 273ms ±11% -7.16% (p=0.001 n=19+18) (https://perf.golang.org/search?q=upload:20200428.8) The binary size improvement is even slightly better when you include the CLs leading up to this. Relative to the parent of "cmd/compile: mark PanicBounds/Extend as calls": name old exe-bytes new exe-bytes delta BinGoSize 15.0MB ± 0% 14.1MB ± 0% -6.18% name old pcln-bytes new pcln-bytes delta BinGoSize 3.22MB ± 0% 2.48MB ± 0% -22.92% (https://perf.golang.org/search?q=upload:20200428.9) For #36365. Change-Id: I69448e714f2a44430067ca97f6b78e08c0abed27 Reviewed-on: https://go-review.googlesource.com/c/go/+/230544 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-16 08:13:58 -04:00
// If v isn't in the map, then it's a "don't care" and not an
// unsafe-point.
if idx, ok := m.vals[v.ID]; ok {
return idx
}
return LivenessIndex{StackMapDontCare, false}
}
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// LivenessIndex stores the liveness map information for a Value.
type LivenessIndex struct {
stackMapIndex int
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// isUnsafePoint indicates that this is an unsafe-point.
//
// Note that it's possible for a call Value to have a stack
// map while also being an unsafe-point. This means it cannot
// be preempted at this instruction, but that a preemption or
// stack growth may happen in the called function.
isUnsafePoint bool
}
// LivenessDontCare indicates that the liveness information doesn't
// matter. Currently it is used in deferreturn liveness when we don't
// actually need it. It should never be emitted to the PCDATA stream.
var LivenessDontCare = LivenessIndex{StackMapDontCare, true}
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// StackMapDontCare indicates that the stack map index at a Value
// doesn't matter.
//
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// This is a sentinel value that should never be emitted to the PCDATA
// stream. We use -1000 because that's obviously never a valid stack
// index (but -1 is).
const StackMapDontCare = -1000
func (idx LivenessIndex) StackMapValid() bool {
return idx.stackMapIndex != StackMapDontCare
}
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
type progeffectscache struct {
retuevar []int32
tailuevar []int32
initialized bool
}
// livenessShouldTrack reports whether the liveness analysis
// should track the variable n.
// We don't care about variables that have no pointers,
// nor do we care about non-local variables,
// nor do we care about empty structs (handled by the pointer check),
// nor do we care about the fake PAUTOHEAP variables.
func livenessShouldTrack(n *Node) bool {
return n.Op == ONAME && (n.Class() == PAUTO || n.Class() == PPARAM || n.Class() == PPARAMOUT) && n.Type.HasPointers()
}
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
// getvariables returns the list of on-stack variables that we need to track
// and a map for looking up indices by *Node.
func getvariables(fn *Node) ([]*Node, map[*Node]int32) {
var vars []*Node
for _, n := range fn.Func.Dcl {
if livenessShouldTrack(n) {
vars = append(vars, n)
}
}
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
idx := make(map[*Node]int32, len(vars))
for i, n := range vars {
idx[n] = int32(i)
}
return vars, idx
}
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
func (lv *Liveness) initcache() {
if lv.cache.initialized {
Fatalf("liveness cache initialized twice")
return
}
lv.cache.initialized = true
for i, node := range lv.vars {
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch node.Class() {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
case PPARAM:
// A return instruction with a p.to is a tail return, which brings
// the stack pointer back up (if it ever went down) and then jumps
// to a new function entirely. That form of instruction must read
// all the parameters for correctness, and similarly it must not
// read the out arguments - they won't be set until the new
// function runs.
lv.cache.tailuevar = append(lv.cache.tailuevar, int32(i))
case PPARAMOUT:
// All results are live at every return point.
// Note that this point is after escaping return values
// are copied back to the stack using their PAUTOHEAP references.
lv.cache.retuevar = append(lv.cache.retuevar, int32(i))
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
}
}
}
// A liveEffect is a set of flags that describe an instruction's
// liveness effects on a variable.
//
// The possible flags are:
// uevar - used by the instruction
// varkill - killed by the instruction (set)
// A kill happens after the use (for an instruction that updates a value, for example).
type liveEffect int
const (
uevar liveEffect = 1 << iota
varkill
)
// valueEffects returns the index of a variable in lv.vars and the
// liveness effects v has on that variable.
// If v does not affect any tracked variables, it returns -1, 0.
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
func (lv *Liveness) valueEffects(v *ssa.Value) (int32, liveEffect) {
n, e := affectedNode(v)
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
if e == 0 || n == nil || n.Op != ONAME { // cheapest checks first
return -1, 0
}
// AllocFrame has dropped unused variables from
// lv.fn.Func.Dcl, but they might still be referenced by
// OpVarFoo pseudo-ops. Ignore them to prevent "lost track of
// variable" ICEs (issue 19632).
switch v.Op {
case ssa.OpVarDef, ssa.OpVarKill, ssa.OpVarLive, ssa.OpKeepAlive:
if !n.Name.Used() {
return -1, 0
}
}
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
var effect liveEffect
// Read is a read, obviously.
//
// Addr is a read also, as any subsequent holder of the pointer must be able
// to see all the values (including initialization) written so far.
// This also prevents a variable from "coming back from the dead" and presenting
// stale pointers to the garbage collector. See issue 28445.
if e&(ssa.SymRead|ssa.SymAddr) != 0 {
effect |= uevar
}
if e&ssa.SymWrite != 0 && (!isfat(n.Type) || v.Op == ssa.OpVarDef) {
effect |= varkill
}
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
if effect == 0 {
return -1, 0
}
if pos, ok := lv.idx[n]; ok {
return pos, effect
}
return -1, 0
}
// affectedNode returns the *Node affected by v
func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
// Special cases.
switch v.Op {
case ssa.OpLoadReg:
n, _ := AutoVar(v.Args[0])
return n, ssa.SymRead
case ssa.OpStoreReg:
n, _ := AutoVar(v)
return n, ssa.SymWrite
case ssa.OpVarLive:
return v.Aux.(*Node), ssa.SymRead
case ssa.OpVarDef, ssa.OpVarKill:
return v.Aux.(*Node), ssa.SymWrite
case ssa.OpKeepAlive:
n, _ := AutoVar(v.Args[0])
return n, ssa.SymRead
}
e := v.Op.SymEffect()
if e == 0 {
return nil, 0
}
switch a := v.Aux.(type) {
case nil, *obj.LSym:
// ok, but no node
return nil, e
case *Node:
return a, e
default:
Fatalf("weird aux: %s", v.LongString())
return nil, e
}
}
cmd/compile: reuse liveness structures Currently liveness analysis is a significant source of allocations in the compiler. This CL mitigates this by moving the main sources of allocation to the ssa.Cache, allowing them to be reused between different liveness runs. Passes toolstash -cmp. name old time/op new time/op delta Template 194ms ± 1% 193ms ± 1% ~ (p=0.156 n=10+9) Unicode 99.1ms ± 1% 99.3ms ± 2% ~ (p=0.853 n=10+10) GoTypes 689ms ± 0% 687ms ± 0% -0.27% (p=0.022 n=10+9) Compiler 3.29s ± 1% 3.30s ± 1% ~ (p=0.489 n=9+9) SSA 8.02s ± 2% 7.97s ± 1% -0.71% (p=0.011 n=10+10) Flate 131ms ± 1% 130ms ± 1% -0.59% (p=0.043 n=9+10) GoParser 162ms ± 1% 160ms ± 1% -1.53% (p=0.000 n=10+10) Reflect 454ms ± 0% 454ms ± 0% ~ (p=0.959 n=8+8) Tar 185ms ± 1% 185ms ± 2% ~ (p=0.905 n=9+10) XML 235ms ± 1% 232ms ± 1% -1.15% (p=0.001 n=9+10) [Geo mean] 414ms 412ms -0.39% name old alloc/op new alloc/op delta Template 35.6MB ± 0% 34.2MB ± 0% -3.75% (p=0.000 n=10+10) Unicode 29.5MB ± 0% 29.4MB ± 0% -0.26% (p=0.000 n=10+9) GoTypes 117MB ± 0% 112MB ± 0% -3.78% (p=0.000 n=9+10) Compiler 532MB ± 0% 512MB ± 0% -3.80% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.48GB ± 0% -4.82% (p=0.000 n=10+10) Flate 24.5MB ± 0% 23.6MB ± 0% -3.61% (p=0.000 n=10+9) GoParser 28.7MB ± 0% 27.7MB ± 0% -3.43% (p=0.000 n=10+10) Reflect 80.5MB ± 0% 78.1MB ± 0% -2.96% (p=0.000 n=10+10) Tar 35.1MB ± 0% 33.9MB ± 0% -3.49% (p=0.000 n=10+10) XML 43.7MB ± 0% 42.4MB ± 0% -3.05% (p=0.000 n=10+10) [Geo mean] 78.4MB 75.8MB -3.30% name old allocs/op new allocs/op delta Template 335k ± 0% 335k ± 0% -0.12% (p=0.000 n=10+10) Unicode 339k ± 0% 339k ± 0% -0.01% (p=0.001 n=10+10) GoTypes 1.18M ± 0% 1.17M ± 0% -0.12% (p=0.000 n=10+10) Compiler 4.94M ± 0% 4.94M ± 0% -0.06% (p=0.000 n=10+10) SSA 12.5M ± 0% 12.5M ± 0% -0.07% (p=0.000 n=10+10) Flate 223k ± 0% 223k ± 0% -0.11% (p=0.000 n=10+10) GoParser 281k ± 0% 281k ± 0% -0.08% (p=0.000 n=10+10) Reflect 963k ± 0% 960k ± 0% -0.23% (p=0.000 n=10+9) Tar 330k ± 0% 330k ± 0% -0.12% (p=0.000 n=10+10) XML 392k ± 0% 392k ± 0% -0.08% (p=0.000 n=10+10) [Geo mean] 761k 760k -0.10% Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps, plus these optimizations, is: name old time/op new time/op delta Template 186ms ± 1% 194ms ± 1% +4.41% (p=0.000 n=9+10) Unicode 96.5ms ± 1% 99.1ms ± 1% +2.76% (p=0.000 n=9+10) GoTypes 659ms ± 1% 689ms ± 0% +4.56% (p=0.000 n=9+10) Compiler 3.14s ± 2% 3.29s ± 1% +4.95% (p=0.000 n=9+9) SSA 7.68s ± 3% 8.02s ± 2% +4.41% (p=0.000 n=10+10) Flate 126ms ± 0% 131ms ± 1% +4.14% (p=0.000 n=10+9) GoParser 153ms ± 1% 162ms ± 1% +5.90% (p=0.000 n=10+10) Reflect 436ms ± 1% 454ms ± 0% +4.14% (p=0.000 n=10+8) Tar 177ms ± 1% 185ms ± 1% +4.28% (p=0.000 n=8+9) XML 224ms ± 1% 235ms ± 1% +5.23% (p=0.000 n=10+9) [Geo mean] 396ms 414ms +4.47% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 35.6MB ± 0% +3.24% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.5MB ± 0% +0.51% (p=0.000 n=9+10) GoTypes 113MB ± 0% 117MB ± 0% +3.31% (p=0.000 n=8+9) Compiler 509MB ± 0% 532MB ± 0% +4.46% (p=0.000 n=10+10) SSA 1.49GB ± 0% 1.55GB ± 0% +4.10% (p=0.000 n=10+10) Flate 23.8MB ± 0% 24.5MB ± 0% +2.92% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 28.7MB ± 0% +2.88% (p=0.000 n=10+10) Reflect 77.4MB ± 0% 80.5MB ± 0% +4.01% (p=0.000 n=10+10) Tar 34.1MB ± 0% 35.1MB ± 0% +3.12% (p=0.000 n=10+10) XML 42.6MB ± 0% 43.7MB ± 0% +2.65% (p=0.000 n=10+10) [Geo mean] 76.1MB 78.4MB +3.11% name old allocs/op new allocs/op delta Template 320k ± 0% 335k ± 0% +4.60% (p=0.000 n=10+10) Unicode 336k ± 0% 339k ± 0% +0.96% (p=0.000 n=9+10) GoTypes 1.12M ± 0% 1.18M ± 0% +4.55% (p=0.000 n=10+10) Compiler 4.66M ± 0% 4.94M ± 0% +6.18% (p=0.000 n=10+10) SSA 11.9M ± 0% 12.5M ± 0% +5.37% (p=0.000 n=10+10) Flate 214k ± 0% 223k ± 0% +4.15% (p=0.000 n=9+10) GoParser 270k ± 0% 281k ± 0% +4.15% (p=0.000 n=10+10) Reflect 921k ± 0% 963k ± 0% +4.49% (p=0.000 n=10+10) Tar 317k ± 0% 330k ± 0% +4.25% (p=0.000 n=10+10) XML 375k ± 0% 392k ± 0% +4.75% (p=0.000 n=10+10) [Geo mean] 729k 761k +4.34% Updates #24543. Change-Id: Ia951fdb3c17ae1c156e1d05fc42e69caba33c91a Reviewed-on: https://go-review.googlesource.com/110179 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2018-04-21 16:15:41 -04:00
type livenessFuncCache struct {
be []BlockEffects
livenessMap LivenessMap
}
// Constructs a new liveness structure used to hold the global state of the
// liveness computation. The cfg argument is a slice of *BasicBlocks and the
// vars argument is a slice of *Nodes.
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkptrsize int64) *Liveness {
lv := &Liveness{
fn: fn,
f: f,
vars: vars,
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
idx: idx,
stkptrsize: stkptrsize,
}
cmd/compile: reuse liveness structures Currently liveness analysis is a significant source of allocations in the compiler. This CL mitigates this by moving the main sources of allocation to the ssa.Cache, allowing them to be reused between different liveness runs. Passes toolstash -cmp. name old time/op new time/op delta Template 194ms ± 1% 193ms ± 1% ~ (p=0.156 n=10+9) Unicode 99.1ms ± 1% 99.3ms ± 2% ~ (p=0.853 n=10+10) GoTypes 689ms ± 0% 687ms ± 0% -0.27% (p=0.022 n=10+9) Compiler 3.29s ± 1% 3.30s ± 1% ~ (p=0.489 n=9+9) SSA 8.02s ± 2% 7.97s ± 1% -0.71% (p=0.011 n=10+10) Flate 131ms ± 1% 130ms ± 1% -0.59% (p=0.043 n=9+10) GoParser 162ms ± 1% 160ms ± 1% -1.53% (p=0.000 n=10+10) Reflect 454ms ± 0% 454ms ± 0% ~ (p=0.959 n=8+8) Tar 185ms ± 1% 185ms ± 2% ~ (p=0.905 n=9+10) XML 235ms ± 1% 232ms ± 1% -1.15% (p=0.001 n=9+10) [Geo mean] 414ms 412ms -0.39% name old alloc/op new alloc/op delta Template 35.6MB ± 0% 34.2MB ± 0% -3.75% (p=0.000 n=10+10) Unicode 29.5MB ± 0% 29.4MB ± 0% -0.26% (p=0.000 n=10+9) GoTypes 117MB ± 0% 112MB ± 0% -3.78% (p=0.000 n=9+10) Compiler 532MB ± 0% 512MB ± 0% -3.80% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.48GB ± 0% -4.82% (p=0.000 n=10+10) Flate 24.5MB ± 0% 23.6MB ± 0% -3.61% (p=0.000 n=10+9) GoParser 28.7MB ± 0% 27.7MB ± 0% -3.43% (p=0.000 n=10+10) Reflect 80.5MB ± 0% 78.1MB ± 0% -2.96% (p=0.000 n=10+10) Tar 35.1MB ± 0% 33.9MB ± 0% -3.49% (p=0.000 n=10+10) XML 43.7MB ± 0% 42.4MB ± 0% -3.05% (p=0.000 n=10+10) [Geo mean] 78.4MB 75.8MB -3.30% name old allocs/op new allocs/op delta Template 335k ± 0% 335k ± 0% -0.12% (p=0.000 n=10+10) Unicode 339k ± 0% 339k ± 0% -0.01% (p=0.001 n=10+10) GoTypes 1.18M ± 0% 1.17M ± 0% -0.12% (p=0.000 n=10+10) Compiler 4.94M ± 0% 4.94M ± 0% -0.06% (p=0.000 n=10+10) SSA 12.5M ± 0% 12.5M ± 0% -0.07% (p=0.000 n=10+10) Flate 223k ± 0% 223k ± 0% -0.11% (p=0.000 n=10+10) GoParser 281k ± 0% 281k ± 0% -0.08% (p=0.000 n=10+10) Reflect 963k ± 0% 960k ± 0% -0.23% (p=0.000 n=10+9) Tar 330k ± 0% 330k ± 0% -0.12% (p=0.000 n=10+10) XML 392k ± 0% 392k ± 0% -0.08% (p=0.000 n=10+10) [Geo mean] 761k 760k -0.10% Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps, plus these optimizations, is: name old time/op new time/op delta Template 186ms ± 1% 194ms ± 1% +4.41% (p=0.000 n=9+10) Unicode 96.5ms ± 1% 99.1ms ± 1% +2.76% (p=0.000 n=9+10) GoTypes 659ms ± 1% 689ms ± 0% +4.56% (p=0.000 n=9+10) Compiler 3.14s ± 2% 3.29s ± 1% +4.95% (p=0.000 n=9+9) SSA 7.68s ± 3% 8.02s ± 2% +4.41% (p=0.000 n=10+10) Flate 126ms ± 0% 131ms ± 1% +4.14% (p=0.000 n=10+9) GoParser 153ms ± 1% 162ms ± 1% +5.90% (p=0.000 n=10+10) Reflect 436ms ± 1% 454ms ± 0% +4.14% (p=0.000 n=10+8) Tar 177ms ± 1% 185ms ± 1% +4.28% (p=0.000 n=8+9) XML 224ms ± 1% 235ms ± 1% +5.23% (p=0.000 n=10+9) [Geo mean] 396ms 414ms +4.47% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 35.6MB ± 0% +3.24% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.5MB ± 0% +0.51% (p=0.000 n=9+10) GoTypes 113MB ± 0% 117MB ± 0% +3.31% (p=0.000 n=8+9) Compiler 509MB ± 0% 532MB ± 0% +4.46% (p=0.000 n=10+10) SSA 1.49GB ± 0% 1.55GB ± 0% +4.10% (p=0.000 n=10+10) Flate 23.8MB ± 0% 24.5MB ± 0% +2.92% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 28.7MB ± 0% +2.88% (p=0.000 n=10+10) Reflect 77.4MB ± 0% 80.5MB ± 0% +4.01% (p=0.000 n=10+10) Tar 34.1MB ± 0% 35.1MB ± 0% +3.12% (p=0.000 n=10+10) XML 42.6MB ± 0% 43.7MB ± 0% +2.65% (p=0.000 n=10+10) [Geo mean] 76.1MB 78.4MB +3.11% name old allocs/op new allocs/op delta Template 320k ± 0% 335k ± 0% +4.60% (p=0.000 n=10+10) Unicode 336k ± 0% 339k ± 0% +0.96% (p=0.000 n=9+10) GoTypes 1.12M ± 0% 1.18M ± 0% +4.55% (p=0.000 n=10+10) Compiler 4.66M ± 0% 4.94M ± 0% +6.18% (p=0.000 n=10+10) SSA 11.9M ± 0% 12.5M ± 0% +5.37% (p=0.000 n=10+10) Flate 214k ± 0% 223k ± 0% +4.15% (p=0.000 n=9+10) GoParser 270k ± 0% 281k ± 0% +4.15% (p=0.000 n=10+10) Reflect 921k ± 0% 963k ± 0% +4.49% (p=0.000 n=10+10) Tar 317k ± 0% 330k ± 0% +4.25% (p=0.000 n=10+10) XML 375k ± 0% 392k ± 0% +4.75% (p=0.000 n=10+10) [Geo mean] 729k 761k +4.34% Updates #24543. Change-Id: Ia951fdb3c17ae1c156e1d05fc42e69caba33c91a Reviewed-on: https://go-review.googlesource.com/110179 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2018-04-21 16:15:41 -04:00
// Significant sources of allocation are kept in the ssa.Cache
// and reused. Surprisingly, the bit vectors themselves aren't
// a major source of allocation, but the liveness maps are.
cmd/compile: reuse liveness structures Currently liveness analysis is a significant source of allocations in the compiler. This CL mitigates this by moving the main sources of allocation to the ssa.Cache, allowing them to be reused between different liveness runs. Passes toolstash -cmp. name old time/op new time/op delta Template 194ms ± 1% 193ms ± 1% ~ (p=0.156 n=10+9) Unicode 99.1ms ± 1% 99.3ms ± 2% ~ (p=0.853 n=10+10) GoTypes 689ms ± 0% 687ms ± 0% -0.27% (p=0.022 n=10+9) Compiler 3.29s ± 1% 3.30s ± 1% ~ (p=0.489 n=9+9) SSA 8.02s ± 2% 7.97s ± 1% -0.71% (p=0.011 n=10+10) Flate 131ms ± 1% 130ms ± 1% -0.59% (p=0.043 n=9+10) GoParser 162ms ± 1% 160ms ± 1% -1.53% (p=0.000 n=10+10) Reflect 454ms ± 0% 454ms ± 0% ~ (p=0.959 n=8+8) Tar 185ms ± 1% 185ms ± 2% ~ (p=0.905 n=9+10) XML 235ms ± 1% 232ms ± 1% -1.15% (p=0.001 n=9+10) [Geo mean] 414ms 412ms -0.39% name old alloc/op new alloc/op delta Template 35.6MB ± 0% 34.2MB ± 0% -3.75% (p=0.000 n=10+10) Unicode 29.5MB ± 0% 29.4MB ± 0% -0.26% (p=0.000 n=10+9) GoTypes 117MB ± 0% 112MB ± 0% -3.78% (p=0.000 n=9+10) Compiler 532MB ± 0% 512MB ± 0% -3.80% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.48GB ± 0% -4.82% (p=0.000 n=10+10) Flate 24.5MB ± 0% 23.6MB ± 0% -3.61% (p=0.000 n=10+9) GoParser 28.7MB ± 0% 27.7MB ± 0% -3.43% (p=0.000 n=10+10) Reflect 80.5MB ± 0% 78.1MB ± 0% -2.96% (p=0.000 n=10+10) Tar 35.1MB ± 0% 33.9MB ± 0% -3.49% (p=0.000 n=10+10) XML 43.7MB ± 0% 42.4MB ± 0% -3.05% (p=0.000 n=10+10) [Geo mean] 78.4MB 75.8MB -3.30% name old allocs/op new allocs/op delta Template 335k ± 0% 335k ± 0% -0.12% (p=0.000 n=10+10) Unicode 339k ± 0% 339k ± 0% -0.01% (p=0.001 n=10+10) GoTypes 1.18M ± 0% 1.17M ± 0% -0.12% (p=0.000 n=10+10) Compiler 4.94M ± 0% 4.94M ± 0% -0.06% (p=0.000 n=10+10) SSA 12.5M ± 0% 12.5M ± 0% -0.07% (p=0.000 n=10+10) Flate 223k ± 0% 223k ± 0% -0.11% (p=0.000 n=10+10) GoParser 281k ± 0% 281k ± 0% -0.08% (p=0.000 n=10+10) Reflect 963k ± 0% 960k ± 0% -0.23% (p=0.000 n=10+9) Tar 330k ± 0% 330k ± 0% -0.12% (p=0.000 n=10+10) XML 392k ± 0% 392k ± 0% -0.08% (p=0.000 n=10+10) [Geo mean] 761k 760k -0.10% Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps, plus these optimizations, is: name old time/op new time/op delta Template 186ms ± 1% 194ms ± 1% +4.41% (p=0.000 n=9+10) Unicode 96.5ms ± 1% 99.1ms ± 1% +2.76% (p=0.000 n=9+10) GoTypes 659ms ± 1% 689ms ± 0% +4.56% (p=0.000 n=9+10) Compiler 3.14s ± 2% 3.29s ± 1% +4.95% (p=0.000 n=9+9) SSA 7.68s ± 3% 8.02s ± 2% +4.41% (p=0.000 n=10+10) Flate 126ms ± 0% 131ms ± 1% +4.14% (p=0.000 n=10+9) GoParser 153ms ± 1% 162ms ± 1% +5.90% (p=0.000 n=10+10) Reflect 436ms ± 1% 454ms ± 0% +4.14% (p=0.000 n=10+8) Tar 177ms ± 1% 185ms ± 1% +4.28% (p=0.000 n=8+9) XML 224ms ± 1% 235ms ± 1% +5.23% (p=0.000 n=10+9) [Geo mean] 396ms 414ms +4.47% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 35.6MB ± 0% +3.24% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.5MB ± 0% +0.51% (p=0.000 n=9+10) GoTypes 113MB ± 0% 117MB ± 0% +3.31% (p=0.000 n=8+9) Compiler 509MB ± 0% 532MB ± 0% +4.46% (p=0.000 n=10+10) SSA 1.49GB ± 0% 1.55GB ± 0% +4.10% (p=0.000 n=10+10) Flate 23.8MB ± 0% 24.5MB ± 0% +2.92% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 28.7MB ± 0% +2.88% (p=0.000 n=10+10) Reflect 77.4MB ± 0% 80.5MB ± 0% +4.01% (p=0.000 n=10+10) Tar 34.1MB ± 0% 35.1MB ± 0% +3.12% (p=0.000 n=10+10) XML 42.6MB ± 0% 43.7MB ± 0% +2.65% (p=0.000 n=10+10) [Geo mean] 76.1MB 78.4MB +3.11% name old allocs/op new allocs/op delta Template 320k ± 0% 335k ± 0% +4.60% (p=0.000 n=10+10) Unicode 336k ± 0% 339k ± 0% +0.96% (p=0.000 n=9+10) GoTypes 1.12M ± 0% 1.18M ± 0% +4.55% (p=0.000 n=10+10) Compiler 4.66M ± 0% 4.94M ± 0% +6.18% (p=0.000 n=10+10) SSA 11.9M ± 0% 12.5M ± 0% +5.37% (p=0.000 n=10+10) Flate 214k ± 0% 223k ± 0% +4.15% (p=0.000 n=9+10) GoParser 270k ± 0% 281k ± 0% +4.15% (p=0.000 n=10+10) Reflect 921k ± 0% 963k ± 0% +4.49% (p=0.000 n=10+10) Tar 317k ± 0% 330k ± 0% +4.25% (p=0.000 n=10+10) XML 375k ± 0% 392k ± 0% +4.75% (p=0.000 n=10+10) [Geo mean] 729k 761k +4.34% Updates #24543. Change-Id: Ia951fdb3c17ae1c156e1d05fc42e69caba33c91a Reviewed-on: https://go-review.googlesource.com/110179 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2018-04-21 16:15:41 -04:00
if lc, _ := f.Cache.Liveness.(*livenessFuncCache); lc == nil {
// Prep the cache so liveness can fill it later.
f.Cache.Liveness = new(livenessFuncCache)
} else {
if cap(lc.be) >= f.NumBlocks() {
lv.be = lc.be[:f.NumBlocks()]
}
lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessDontCare}
lc.livenessMap.vals = nil
cmd/compile: reuse liveness structures Currently liveness analysis is a significant source of allocations in the compiler. This CL mitigates this by moving the main sources of allocation to the ssa.Cache, allowing them to be reused between different liveness runs. Passes toolstash -cmp. name old time/op new time/op delta Template 194ms ± 1% 193ms ± 1% ~ (p=0.156 n=10+9) Unicode 99.1ms ± 1% 99.3ms ± 2% ~ (p=0.853 n=10+10) GoTypes 689ms ± 0% 687ms ± 0% -0.27% (p=0.022 n=10+9) Compiler 3.29s ± 1% 3.30s ± 1% ~ (p=0.489 n=9+9) SSA 8.02s ± 2% 7.97s ± 1% -0.71% (p=0.011 n=10+10) Flate 131ms ± 1% 130ms ± 1% -0.59% (p=0.043 n=9+10) GoParser 162ms ± 1% 160ms ± 1% -1.53% (p=0.000 n=10+10) Reflect 454ms ± 0% 454ms ± 0% ~ (p=0.959 n=8+8) Tar 185ms ± 1% 185ms ± 2% ~ (p=0.905 n=9+10) XML 235ms ± 1% 232ms ± 1% -1.15% (p=0.001 n=9+10) [Geo mean] 414ms 412ms -0.39% name old alloc/op new alloc/op delta Template 35.6MB ± 0% 34.2MB ± 0% -3.75% (p=0.000 n=10+10) Unicode 29.5MB ± 0% 29.4MB ± 0% -0.26% (p=0.000 n=10+9) GoTypes 117MB ± 0% 112MB ± 0% -3.78% (p=0.000 n=9+10) Compiler 532MB ± 0% 512MB ± 0% -3.80% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.48GB ± 0% -4.82% (p=0.000 n=10+10) Flate 24.5MB ± 0% 23.6MB ± 0% -3.61% (p=0.000 n=10+9) GoParser 28.7MB ± 0% 27.7MB ± 0% -3.43% (p=0.000 n=10+10) Reflect 80.5MB ± 0% 78.1MB ± 0% -2.96% (p=0.000 n=10+10) Tar 35.1MB ± 0% 33.9MB ± 0% -3.49% (p=0.000 n=10+10) XML 43.7MB ± 0% 42.4MB ± 0% -3.05% (p=0.000 n=10+10) [Geo mean] 78.4MB 75.8MB -3.30% name old allocs/op new allocs/op delta Template 335k ± 0% 335k ± 0% -0.12% (p=0.000 n=10+10) Unicode 339k ± 0% 339k ± 0% -0.01% (p=0.001 n=10+10) GoTypes 1.18M ± 0% 1.17M ± 0% -0.12% (p=0.000 n=10+10) Compiler 4.94M ± 0% 4.94M ± 0% -0.06% (p=0.000 n=10+10) SSA 12.5M ± 0% 12.5M ± 0% -0.07% (p=0.000 n=10+10) Flate 223k ± 0% 223k ± 0% -0.11% (p=0.000 n=10+10) GoParser 281k ± 0% 281k ± 0% -0.08% (p=0.000 n=10+10) Reflect 963k ± 0% 960k ± 0% -0.23% (p=0.000 n=10+9) Tar 330k ± 0% 330k ± 0% -0.12% (p=0.000 n=10+10) XML 392k ± 0% 392k ± 0% -0.08% (p=0.000 n=10+10) [Geo mean] 761k 760k -0.10% Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps, plus these optimizations, is: name old time/op new time/op delta Template 186ms ± 1% 194ms ± 1% +4.41% (p=0.000 n=9+10) Unicode 96.5ms ± 1% 99.1ms ± 1% +2.76% (p=0.000 n=9+10) GoTypes 659ms ± 1% 689ms ± 0% +4.56% (p=0.000 n=9+10) Compiler 3.14s ± 2% 3.29s ± 1% +4.95% (p=0.000 n=9+9) SSA 7.68s ± 3% 8.02s ± 2% +4.41% (p=0.000 n=10+10) Flate 126ms ± 0% 131ms ± 1% +4.14% (p=0.000 n=10+9) GoParser 153ms ± 1% 162ms ± 1% +5.90% (p=0.000 n=10+10) Reflect 436ms ± 1% 454ms ± 0% +4.14% (p=0.000 n=10+8) Tar 177ms ± 1% 185ms ± 1% +4.28% (p=0.000 n=8+9) XML 224ms ± 1% 235ms ± 1% +5.23% (p=0.000 n=10+9) [Geo mean] 396ms 414ms +4.47% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 35.6MB ± 0% +3.24% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.5MB ± 0% +0.51% (p=0.000 n=9+10) GoTypes 113MB ± 0% 117MB ± 0% +3.31% (p=0.000 n=8+9) Compiler 509MB ± 0% 532MB ± 0% +4.46% (p=0.000 n=10+10) SSA 1.49GB ± 0% 1.55GB ± 0% +4.10% (p=0.000 n=10+10) Flate 23.8MB ± 0% 24.5MB ± 0% +2.92% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 28.7MB ± 0% +2.88% (p=0.000 n=10+10) Reflect 77.4MB ± 0% 80.5MB ± 0% +4.01% (p=0.000 n=10+10) Tar 34.1MB ± 0% 35.1MB ± 0% +3.12% (p=0.000 n=10+10) XML 42.6MB ± 0% 43.7MB ± 0% +2.65% (p=0.000 n=10+10) [Geo mean] 76.1MB 78.4MB +3.11% name old allocs/op new allocs/op delta Template 320k ± 0% 335k ± 0% +4.60% (p=0.000 n=10+10) Unicode 336k ± 0% 339k ± 0% +0.96% (p=0.000 n=9+10) GoTypes 1.12M ± 0% 1.18M ± 0% +4.55% (p=0.000 n=10+10) Compiler 4.66M ± 0% 4.94M ± 0% +6.18% (p=0.000 n=10+10) SSA 11.9M ± 0% 12.5M ± 0% +5.37% (p=0.000 n=10+10) Flate 214k ± 0% 223k ± 0% +4.15% (p=0.000 n=9+10) GoParser 270k ± 0% 281k ± 0% +4.15% (p=0.000 n=10+10) Reflect 921k ± 0% 963k ± 0% +4.49% (p=0.000 n=10+10) Tar 317k ± 0% 330k ± 0% +4.25% (p=0.000 n=10+10) XML 375k ± 0% 392k ± 0% +4.75% (p=0.000 n=10+10) [Geo mean] 729k 761k +4.34% Updates #24543. Change-Id: Ia951fdb3c17ae1c156e1d05fc42e69caba33c91a Reviewed-on: https://go-review.googlesource.com/110179 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2018-04-21 16:15:41 -04:00
}
if lv.be == nil {
lv.be = make([]BlockEffects, f.NumBlocks())
}
nblocks := int32(len(f.Blocks))
nvars := int32(len(vars))
bulk := bvbulkalloc(nvars, nblocks*7)
for _, b := range f.Blocks {
be := lv.blockEffects(b)
be.uevar = bulk.next()
be.varkill = bulk.next()
be.livein = bulk.next()
be.liveout = bulk.next()
}
lv.livenessMap.reset()
2018-02-26 20:48:53 -05:00
lv.markUnsafePoints()
return lv
}
func (lv *Liveness) blockEffects(b *ssa.Block) *BlockEffects {
return &lv.be[b.ID]
}
// NOTE: The bitmap for a specific type t could be cached in t after
// the first run and then simply copied into bv at the correct offset
// on future calls with the same type t.
func onebitwalktype1(t *types.Type, off int64, bv bvec) {
if t.Align > 0 && off&int64(t.Align-1) != 0 {
cmd/compile: shrink liveness maps The GC maps don't care about trailing non-pointers in args. Work harder to eliminate them. This should provide a slight speedup to everything that reads these maps, mainly GC and stack copying. The non-ptr-y runtime benchmarks happen to go from having a non-empty args map to an empty args map, so they have a significant speedup. name old time/op new time/op delta StackCopyPtr-8 80.2ms ± 4% 79.7ms ± 2% -0.63% (p=0.001 n=94+91) StackCopy-8 63.3ms ± 3% 59.2ms ± 3% -6.45% (p=0.000 n=98+97) StackCopyNoCache-8 107ms ± 3% 98ms ± 3% -8.00% (p=0.000 n=95+88) It also shrinks object files a tiny bit: name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.03% (p=0.008 n=5+5) Unicode 218kB ± 0% 218kB ± 0% -0.09% (p=0.008 n=5+5) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.03% (p=0.008 n=5+5) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.06% (p=0.008 n=5+5) SSA 15.9MB ± 0% 15.9MB ± 0% -0.06% (p=0.008 n=5+5) Flate 304kB ± 0% 303kB ± 0% -0.29% (p=0.008 n=5+5) GoParser 370kB ± 0% 370kB ± 0% +0.02% (p=0.008 n=5+5) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.07% (p=0.008 n=5+5) Tar 421kB ± 0% 421kB ± 0% -0.05% (p=0.008 n=5+5) XML 518kB ± 0% 517kB ± 0% -0.06% (p=0.008 n=5+5) [Geo mean] 934kB 933kB -0.07% Note that some object files do grow; this can happen because some maps that were duplicates of each others must be stored separately. Change-Id: Ie076891bd8e9d269ff2ff5435d5d25c721e0e31d Reviewed-on: https://go-review.googlesource.com/104175 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-04-02 14:21:27 -07:00
Fatalf("onebitwalktype1: invalid initial alignment: type %v has alignment %d, but offset is %v", t, t.Align, off)
}
if !t.HasPointers() {
// Note: this case ensures that pointers to go:notinheap types
// are not considered pointers by garbage collection and stack copying.
return
}
switch t.Etype {
case TPTR, TUNSAFEPTR, TFUNC, TCHAN, TMAP:
if off&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid alignment, %v", t)
}
bv.Set(int32(off / int64(Widthptr))) // pointer
case TSTRING:
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// struct { byte *str; intgo len; }
if off&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid alignment, %v", t)
}
bv.Set(int32(off / int64(Widthptr))) //pointer in first slot
case TINTER:
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// struct { Itab *tab; void *data; }
// or, when isnilinter(t)==true:
// struct { Type *type; void *data; }
if off&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid alignment, %v", t)
}
// The first word of an interface is a pointer, but we don't
// treat it as such.
// 1. If it is a non-empty interface, the pointer points to an itab
// which is always in persistentalloc space.
// 2. If it is an empty interface, the pointer points to a _type.
// a. If it is a compile-time-allocated type, it points into
// the read-only data section.
// b. If it is a reflect-allocated type, it points into the Go heap.
// Reflect is responsible for keeping a reference to
// the underlying type so it won't be GCd.
// If we ever have a moving GC, we need to change this for 2b (as
// well as scan itabs to update their itab._type fields).
bv.Set(int32(off/int64(Widthptr) + 1)) // pointer in second slot
case TSLICE:
// struct { byte *array; uintgo len; uintgo cap; }
if off&int64(Widthptr-1) != 0 {
Fatalf("onebitwalktype1: invalid TARRAY alignment, %v", t)
}
bv.Set(int32(off / int64(Widthptr))) // pointer in first slot (BitsPointer)
case TARRAY:
elt := t.Elem()
if elt.Width == 0 {
// Short-circuit for #20739.
break
}
for i := int64(0); i < t.NumElem(); i++ {
onebitwalktype1(elt, off, bv)
off += elt.Width
}
case TSTRUCT:
for _, f := range t.Fields().Slice() {
onebitwalktype1(f.Type, off+f.Offset, bv)
}
default:
Fatalf("onebitwalktype1: unexpected type, %v", t)
}
}
// Generates live pointer value maps for arguments and local variables. The
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
func (lv *Liveness) pointerMap(liveout bvec, vars []*Node, args, locals bvec) {
for i := int32(0); ; i++ {
i = liveout.Next(i)
if i < 0 {
break
}
node := vars[i]
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch node.Class() {
case PAUTO:
onebitwalktype1(node.Type, node.Xoffset+lv.stkptrsize, locals)
case PPARAM, PPARAMOUT:
onebitwalktype1(node.Type, node.Xoffset, args)
}
}
}
// allUnsafe indicates that all points in this function are
// unsafe-points.
func allUnsafe(f *ssa.Func) bool {
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// The runtime assumes the only safe-points are function
// prologues (because that's how it used to be). We could and
// should improve that, but for now keep consider all points
// in the runtime unsafe. obj will add prologues and their
// safe-points.
//
// go:nosplit functions are similar. Since safe points used to
// be coupled with stack checks, go:nosplit often actually
// means "no safe points in this function".
return compiling_runtime || f.NoSplit
}
// markUnsafePoints finds unsafe points and computes lv.unsafePoints.
func (lv *Liveness) markUnsafePoints() {
if allUnsafe(lv.f) {
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
// No complex analysis necessary.
lv.allUnsafe = true
2018-02-26 20:48:53 -05:00
return
}
lv.unsafePoints = bvalloc(int32(lv.f.NumValues()))
// Mark architecture-specific unsafe points.
for _, b := range lv.f.Blocks {
for _, v := range b.Values {
if v.Op.UnsafePoint() {
lv.unsafePoints.Set(int32(v.ID))
}
}
}
2018-02-26 20:48:53 -05:00
// Mark write barrier unsafe points.
for _, wbBlock := range lv.f.WBLoads {
if wbBlock.Kind == ssa.BlockPlain && len(wbBlock.Values) == 0 {
// The write barrier block was optimized away
// but we haven't done dead block elimination.
// (This can happen in -N mode.)
continue
}
2018-02-26 20:48:53 -05:00
// Check that we have the expected diamond shape.
if len(wbBlock.Succs) != 2 {
lv.f.Fatalf("expected branch at write barrier block %v", wbBlock)
}
s0, s1 := wbBlock.Succs[0].Block(), wbBlock.Succs[1].Block()
if s0 == s1 {
// There's no difference between write barrier on and off.
// Thus there's no unsafe locations. See issue 26024.
continue
}
2018-02-26 20:48:53 -05:00
if s0.Kind != ssa.BlockPlain || s1.Kind != ssa.BlockPlain {
lv.f.Fatalf("expected successors of write barrier block %v to be plain", wbBlock)
}
if s0.Succs[0].Block() != s1.Succs[0].Block() {
lv.f.Fatalf("expected successors of write barrier block %v to converge", wbBlock)
}
// Flow backwards from the control value to find the
// flag load. We don't know what lowered ops we're
// looking for, but all current arches produce a
// single op that does the memory load from the flag
// address, so we look for that.
var load *ssa.Value
cmd/compile: allow multiple SSA block control values Control values are used to choose which successor of a block is jumped to. Typically a control value takes the form of a 'flags' value that represents the result of a comparison. Some architectures however use a variable in a register as a control value. Up until now we have managed with a single control value per block. However some architectures (e.g. s390x and riscv64) have combined compare-and-branch instructions that take two variables in registers as parameters. To generate these instructions we need to support 2 control values per block. This CL allows up to 2 control values to be used in a block in order to support the addition of compare-and-branch instructions. I have implemented s390x compare-and-branch instructions in a different CL. Passes toolstash-check -all. Results of compilebench: name old time/op new time/op delta Template 208ms ± 1% 209ms ± 1% ~ (p=0.289 n=20+20) Unicode 83.7ms ± 1% 83.3ms ± 3% -0.49% (p=0.017 n=18+18) GoTypes 748ms ± 1% 748ms ± 0% ~ (p=0.460 n=20+18) Compiler 3.47s ± 1% 3.48s ± 1% ~ (p=0.070 n=19+18) SSA 11.5s ± 1% 11.7s ± 1% +1.64% (p=0.000 n=19+18) Flate 130ms ± 1% 130ms ± 1% ~ (p=0.588 n=19+20) GoParser 160ms ± 1% 161ms ± 1% ~ (p=0.211 n=20+20) Reflect 465ms ± 1% 467ms ± 1% +0.42% (p=0.007 n=20+20) Tar 184ms ± 1% 185ms ± 2% ~ (p=0.087 n=18+20) XML 253ms ± 1% 253ms ± 1% ~ (p=0.377 n=20+18) LinkCompiler 769ms ± 2% 774ms ± 2% ~ (p=0.070 n=19+19) ExternalLinkCompiler 3.59s ±11% 3.68s ± 6% ~ (p=0.072 n=20+20) LinkWithoutDebugCompiler 446ms ± 5% 454ms ± 3% +1.79% (p=0.002 n=19+20) StdCmd 26.0s ± 2% 26.0s ± 2% ~ (p=0.799 n=20+20) name old user-time/op new user-time/op delta Template 238ms ± 5% 240ms ± 5% ~ (p=0.142 n=20+20) Unicode 105ms ±11% 106ms ±10% ~ (p=0.512 n=20+20) GoTypes 876ms ± 2% 873ms ± 4% ~ (p=0.647 n=20+19) Compiler 4.17s ± 2% 4.19s ± 1% ~ (p=0.093 n=20+18) SSA 13.9s ± 1% 14.1s ± 1% +1.45% (p=0.000 n=18+18) Flate 145ms ±13% 146ms ± 5% ~ (p=0.851 n=20+18) GoParser 185ms ± 5% 188ms ± 7% ~ (p=0.174 n=20+20) Reflect 534ms ± 3% 538ms ± 2% ~ (p=0.105 n=20+18) Tar 215ms ± 4% 211ms ± 9% ~ (p=0.079 n=19+20) XML 295ms ± 6% 295ms ± 5% ~ (p=0.968 n=20+20) LinkCompiler 832ms ± 4% 837ms ± 7% ~ (p=0.707 n=17+20) ExternalLinkCompiler 1.58s ± 8% 1.60s ± 4% ~ (p=0.296 n=20+19) LinkWithoutDebugCompiler 478ms ±12% 489ms ±10% ~ (p=0.429 n=20+20) name old object-bytes new object-bytes delta Template 559kB ± 0% 559kB ± 0% ~ (all equal) Unicode 216kB ± 0% 216kB ± 0% ~ (all equal) GoTypes 2.03MB ± 0% 2.03MB ± 0% ~ (all equal) Compiler 8.07MB ± 0% 8.07MB ± 0% -0.06% (p=0.000 n=20+20) SSA 27.1MB ± 0% 27.3MB ± 0% +0.89% (p=0.000 n=20+20) Flate 343kB ± 0% 343kB ± 0% ~ (all equal) GoParser 441kB ± 0% 441kB ± 0% ~ (all equal) Reflect 1.36MB ± 0% 1.36MB ± 0% ~ (all equal) Tar 487kB ± 0% 487kB ± 0% ~ (all equal) XML 632kB ± 0% 632kB ± 0% ~ (all equal) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 110kB ± 0% +0.72% (p=0.000 n=20+20) SSA 137kB ± 0% 138kB ± 0% +0.58% (p=0.000 n=20+20) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 761kB ± 0% 761kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.13MB ± 0% 1.13MB ± 0% ~ (all equal) CmdGoSize 15.1MB ± 0% 15.1MB ± 0% ~ (all equal) Change-Id: I3cc2f9829a109543d9a68be4a21775d2d3e9801f Reviewed-on: https://go-review.googlesource.com/c/go/+/196557 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Keith Randall <khr@golang.org>
2019-08-12 20:19:58 +01:00
v := wbBlock.Controls[0]
2018-02-26 20:48:53 -05:00
for {
if sym, ok := v.Aux.(*obj.LSym); ok && sym == writeBarrier {
load = v
break
}
switch v.Op {
case ssa.Op386TESTL:
// 386 lowers Neq32 to (TESTL cond cond),
if v.Args[0] == v.Args[1] {
v = v.Args[0]
continue
}
case ssa.Op386MOVLload, ssa.OpARM64MOVWUload, ssa.OpPPC64MOVWZload, ssa.OpWasmI64Load32U:
2018-02-26 20:48:53 -05:00
// Args[0] is the address of the write
// barrier control. Ignore Args[1],
// which is the mem operand.
// TODO: Just ignore mem operands?
2018-02-26 20:48:53 -05:00
v = v.Args[0]
continue
}
// Common case: just flow backwards.
if len(v.Args) != 1 {
v.Fatalf("write barrier control value has more than one argument: %s", v.LongString())
}
v = v.Args[0]
}
// Mark everything after the load unsafe.
found := false
for _, v := range wbBlock.Values {
found = found || v == load
if found {
lv.unsafePoints.Set(int32(v.ID))
}
}
// Mark the two successor blocks unsafe. These come
// back together immediately after the direct write in
// one successor and the last write barrier call in
// the other, so there's no need to be more precise.
for _, succ := range wbBlock.Succs {
for _, v := range succ.Block().Values {
lv.unsafePoints.Set(int32(v.ID))
}
}
}
// Find uintptr -> unsafe.Pointer conversions and flood
// unsafeness back to a call (which is always a safe point).
//
// Looking for the uintptr -> unsafe.Pointer conversion has a
// few advantages over looking for unsafe.Pointer -> uintptr
// conversions:
//
// 1. We avoid needlessly blocking safe-points for
// unsafe.Pointer -> uintptr conversions that never go back to
// a Pointer.
//
// 2. We don't have to detect calls to reflect.Value.Pointer,
// reflect.Value.UnsafeAddr, and reflect.Value.InterfaceData,
// which are implicit unsafe.Pointer -> uintptr conversions.
// We can't even reliably detect this if there's an indirect
// call to one of these methods.
//
// TODO: For trivial unsafe.Pointer arithmetic, it would be
// nice to only flood as far as the unsafe.Pointer -> uintptr
// conversion, but it's hard to know which argument of an Add
// or Sub to follow.
var flooded bvec
var flood func(b *ssa.Block, vi int)
flood = func(b *ssa.Block, vi int) {
if flooded.n == 0 {
flooded = bvalloc(int32(lv.f.NumBlocks()))
}
if flooded.Get(int32(b.ID)) {
return
}
for i := vi - 1; i >= 0; i-- {
v := b.Values[i]
if v.Op.IsCall() {
// Uintptrs must not contain live
// pointers across calls, so stop
// flooding.
return
}
lv.unsafePoints.Set(int32(v.ID))
}
if vi == len(b.Values) {
// We marked all values in this block, so no
// need to flood this block again.
flooded.Set(int32(b.ID))
}
for _, pred := range b.Preds {
flood(pred.Block(), len(pred.Block().Values))
}
}
for _, b := range lv.f.Blocks {
for i, v := range b.Values {
if !(v.Op == ssa.OpConvert && v.Type.IsPtrShaped()) {
continue
}
// Flood the unsafe-ness of this backwards
// until we hit a call.
flood(b, i+1)
}
}
}
// Returns true for instructions that must have a stack map.
//
// This does not necessarily mean the instruction is a safe-point. In
// particular, call Values can have a stack map in case the callee
// grows the stack, but not themselves be a safe-point.
func (lv *Liveness) hasStackMap(v *ssa.Value) bool {
if !v.Op.IsCall() {
return false
2018-02-26 20:48:53 -05:00
}
// typedmemclr and typedmemmove are write barriers and
// deeply non-preemptible. They are unsafe points and
// hence should not have liveness maps.
if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) {
2018-02-26 20:48:53 -05:00
return false
}
return true
}
// Initializes the sets for solving the live variables. Visits all the
// instructions in each basic block to summarizes the information at each basic
// block
func (lv *Liveness) prologue() {
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
lv.initcache()
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
// Walk the block instructions backward and update the block
// effects with the each prog effects.
for j := len(b.Values) - 1; j >= 0; j-- {
pos, e := lv.valueEffects(b.Values[j])
if e&varkill != 0 {
be.varkill.Set(pos)
be.uevar.Unset(pos)
cmd/compile: make liveness more efficient When the number of variables in a function is very large, liveness analysis gets less efficient, since every bit vector is O(number of variables). Improve the situation by returning a sparse representation from progeffects. In all scenarios, progeffects either returns a slice that is shared function-wide, and which is usually small, or a slice that is guaranteed to have at most three values. Reduces compilation time for the code in #8225 Comment 1 by ~10%. Minor effects on regular packages (below). Passes toolstash -cmp. Updates #8225 name old time/op new time/op delta Template 215ms ± 2% 212ms ± 4% -1.31% (p=0.001 n=30+30) Unicode 98.3ms ± 3% 98.4ms ± 5% ~ (p=0.971 n=30+30) GoTypes 657ms ± 3% 651ms ± 2% -0.98% (p=0.001 n=30+27) Compiler 2.78s ± 2% 2.77s ± 2% -0.60% (p=0.006 n=30+30) Flate 130ms ± 4% 130ms ± 4% ~ (p=0.712 n=29+30) GoParser 159ms ± 5% 158ms ± 3% ~ (p=0.331 n=29+30) Reflect 406ms ± 3% 404ms ± 3% -0.69% (p=0.041 n=29+30) Tar 117ms ± 4% 117ms ± 3% ~ (p=0.886 n=30+29) XML 219ms ± 2% 217ms ± 2% ~ (p=0.091 n=29+24) name old user-ns/op new user-ns/op delta Template 272user-ms ± 3% 270user-ms ± 3% -1.03% (p=0.004 n=30+30) Unicode 138user-ms ± 2% 138user-ms ± 3% ~ (p=0.902 n=29+29) GoTypes 891user-ms ± 2% 883user-ms ± 2% -0.95% (p=0.000 n=29+29) Compiler 3.85user-s ± 2% 3.84user-s ± 2% ~ (p=0.236 n=30+30) Flate 167user-ms ± 2% 166user-ms ± 4% ~ (p=0.511 n=28+30) GoParser 211user-ms ± 4% 210user-ms ± 3% ~ (p=0.287 n=29+30) Reflect 539user-ms ± 3% 536user-ms ± 2% -0.59% (p=0.034 n=29+30) Tar 154user-ms ± 3% 155user-ms ± 4% ~ (p=0.786 n=30+30) XML 289user-ms ± 3% 288user-ms ± 4% ~ (p=0.249 n=30+26) name old alloc/op new alloc/op delta Template 40.7MB ± 0% 40.8MB ± 0% +0.09% (p=0.001 n=30+30) Unicode 30.8MB ± 0% 30.8MB ± 0% ~ (p=0.112 n=30+30) GoTypes 123MB ± 0% 124MB ± 0% +0.09% (p=0.000 n=30+30) Compiler 473MB ± 0% 473MB ± 0% +0.05% (p=0.000 n=30+30) Flate 26.5MB ± 0% 26.5MB ± 0% ~ (p=0.186 n=29+30) GoParser 32.3MB ± 0% 32.4MB ± 0% +0.07% (p=0.021 n=28+30) Reflect 84.4MB ± 0% 84.6MB ± 0% +0.21% (p=0.000 n=30+30) Tar 27.3MB ± 0% 27.3MB ± 0% +0.09% (p=0.010 n=30+28) XML 44.7MB ± 0% 44.7MB ± 0% +0.07% (p=0.002 n=30+30) name old allocs/op new allocs/op delta Template 401k ± 1% 400k ± 1% ~ (p=0.321 n=30+30) Unicode 331k ± 1% 331k ± 1% ~ (p=0.357 n=30+28) GoTypes 1.24M ± 0% 1.24M ± 1% -0.19% (p=0.001 n=30+30) Compiler 4.27M ± 0% 4.27M ± 0% -0.13% (p=0.000 n=30+30) Flate 252k ± 1% 251k ± 1% -0.30% (p=0.005 n=30+30) GoParser 325k ± 1% 325k ± 1% ~ (p=0.224 n=28+30) Reflect 1.06M ± 0% 1.05M ± 0% -0.34% (p=0.000 n=30+30) Tar 266k ± 1% 266k ± 1% ~ (p=0.333 n=30+30) XML 416k ± 1% 415k ± 1% ~ (p=0.144 n=30+29) Change-Id: I6ba67a9203516373062a2618122306da73333d98 Reviewed-on: https://go-review.googlesource.com/36211 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-01-14 23:43:26 -08:00
}
if e&uevar != 0 {
be.uevar.Set(pos)
}
}
}
}
// Solve the liveness dataflow equations.
func (lv *Liveness) solve() {
// These temporary bitvectors exist to avoid successive allocations and
// frees within the loop.
nvars := int32(len(lv.vars))
newlivein := bvalloc(nvars)
newliveout := bvalloc(nvars)
// Walk blocks in postorder ordering. This improves convergence.
po := lv.f.Postorder()
// Iterate through the blocks in reverse round-robin fashion. A work
// queue might be slightly faster. As is, the number of iterations is
// so low that it hardly seems to be worth the complexity.
for change := true; change; {
change = false
for _, b := range po {
be := lv.blockEffects(b)
newliveout.Clear()
switch b.Kind {
case ssa.BlockRet:
for _, pos := range lv.cache.retuevar {
newliveout.Set(pos)
}
case ssa.BlockRetJmp:
for _, pos := range lv.cache.tailuevar {
newliveout.Set(pos)
}
case ssa.BlockExit:
// panic exit - nothing to do
default:
// A variable is live on output from this block
// if it is live on input to some successor.
//
// out[b] = \bigcup_{s \in succ[b]} in[s]
newliveout.Copy(lv.blockEffects(b.Succs[0].Block()).livein)
for _, succ := range b.Succs[1:] {
newliveout.Or(newliveout, lv.blockEffects(succ.Block()).livein)
}
}
if !be.liveout.Eq(newliveout) {
change = true
be.liveout.Copy(newliveout)
}
// A variable is live on input to this block
// if it is used by this block, or live on output from this block and
// not set by the code in this block.
//
// in[b] = uevar[b] \cup (out[b] \setminus varkill[b])
newlivein.AndNot(be.liveout, be.varkill)
be.livein.Or(newlivein, be.uevar)
}
}
}
// Visits all instructions in a basic block and computes a bit vector of live
// variables at each safe point locations.
func (lv *Liveness) epilogue() {
nvars := int32(len(lv.vars))
liveout := bvalloc(nvars)
livedefer := bvalloc(nvars) // always-live variables
// If there is a defer (that could recover), then all output
// parameters are live all the time. In addition, any locals
// that are pointers to heap-allocated output parameters are
// also always live (post-deferreturn code needs these
// pointers to copy values back to the stack).
// TODO: if the output parameter is heap-allocated, then we
// don't need to keep the stack copy live?
if lv.fn.Func.HasDefer() {
for i, n := range lv.vars {
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if n.Class() == PPARAMOUT {
if n.Name.IsOutputParamHeapAddr() {
// Just to be paranoid. Heap addresses are PAUTOs.
Fatalf("variable %v both output param and heap output param", n)
}
if n.Name.Param.Heapaddr != nil {
// If this variable moved to the heap, then
// its stack copy is not live.
continue
}
// Note: zeroing is handled by zeroResults in walk.go.
livedefer.Set(int32(i))
}
if n.Name.IsOutputParamHeapAddr() {
// This variable will be overwritten early in the function
// prologue (from the result of a mallocgc) but we need to
// zero it in case that malloc causes a stack scan.
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.Name.SetNeedzero(true)
livedefer.Set(int32(i))
}
if n.Name.OpenDeferSlot() {
// Open-coded defer args slots must be live
// everywhere in a function, since a panic can
// occur (almost) anywhere. Because it is live
// everywhere, it must be zeroed on entry.
livedefer.Set(int32(i))
// It was already marked as Needzero when created.
if !n.Name.Needzero() {
Fatalf("all pointer-containing defer arg slots should have Needzero set")
}
}
}
}
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
// We must analyze the entry block first. The runtime assumes
// the function entry map is index 0. Conveniently, layout
// already ensured that the entry block is first.
if lv.f.Entry != lv.f.Blocks[0] {
lv.f.Fatalf("entry block must be first")
}
{
// Reserve an entry for function entry.
live := bvalloc(nvars)
lv.livevars = append(lv.livevars, live)
}
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
// Walk forward through the basic block instructions and
// allocate liveness maps for those instructions that need them.
for _, v := range b.Values {
if !lv.hasStackMap(v) {
continue
}
live := bvalloc(nvars)
lv.livevars = append(lv.livevars, live)
}
// walk backward, construct maps at each safe point
index := int32(len(lv.livevars) - 1)
liveout.Copy(be.liveout)
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if lv.hasStackMap(v) {
// Found an interesting instruction, record the
// corresponding liveness information.
live := &lv.livevars[index]
live.Or(*live, liveout)
live.Or(*live, livedefer) // only for non-entry safe points
index--
}
// Update liveness information.
pos, e := lv.valueEffects(v)
if e&varkill != 0 {
liveout.Unset(pos)
}
if e&uevar != 0 {
liveout.Set(pos)
}
}
if b == lv.f.Entry {
if index != 0 {
Fatalf("bad index for entry point: %v", index)
}
// Check to make sure only input variables are live.
for i, n := range lv.vars {
if !liveout.Get(int32(i)) {
continue
}
if n.Class() == PPARAM {
continue // ok
}
Fatalf("bad live variable at entry of %v: %L", lv.fn.Func.Nname, n)
}
// Record live variables.
live := &lv.livevars[index]
live.Or(*live, liveout)
}
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
// The liveness maps for this block are now complete. Compact them.
lv.compact(b)
}
// If we have an open-coded deferreturn call, make a liveness map for it.
if lv.fn.Func.OpenCodedDeferDisallowed() {
lv.livenessMap.deferreturn = LivenessDontCare
} else {
lv.livenessMap.deferreturn = LivenessIndex{
stackMapIndex: lv.stackMapSet.add(livedefer),
isUnsafePoint: false,
}
}
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
// Done compacting. Throw out the stack map set.
lv.stackMaps = lv.stackMapSet.extractUniqe()
lv.stackMapSet = bvecSet{}
// Useful sanity check: on entry to the function,
// the only things that can possibly be live are the
// input parameters.
for j, n := range lv.vars {
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
if n.Class() != PPARAM && lv.stackMaps[0].Get(int32(j)) {
lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n)
}
}
}
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
// Compact coalesces identical bitmaps from lv.livevars into the sets
// lv.stackMapSet.
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
//
// Compact clears lv.livevars.
//
// There are actually two lists of bitmaps, one list for the local variables and one
// list for the function arguments. Both lists are indexed by the same PCDATA
// index, so the corresponding pairs must be considered together when
// merging duplicates. The argument bitmaps change much less often during
// function execution than the local variable bitmaps, so it is possible that
// we could introduce a separate PCDATA index for arguments vs locals and
// then compact the set of argument bitmaps separately from the set of
// local variable bitmaps. As of 2014-04-02, doing this to the godoc binary
// is actually a net loss: we save about 50k of argument bitmaps but the new
// PCDATA tables cost about 100k. So for now we keep using a single index for
// both bitmap lists.
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
func (lv *Liveness) compact(b *ssa.Block) {
pos := 0
if b == lv.f.Entry {
// Handle entry stack map.
lv.stackMapSet.add(lv.livevars[0])
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
pos++
}
for _, v := range b.Values {
hasStackMap := lv.hasStackMap(v)
isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
idx := LivenessIndex{StackMapDontCare, isUnsafePoint}
if hasStackMap {
idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos])
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
pos++
}
if hasStackMap || isUnsafePoint {
lv.livenessMap.set(v, idx)
}
}
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
// Reset livevars.
lv.livevars = lv.livevars[:0]
}
func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
if debuglive == 0 || lv.fn.funcname() == "init" || strings.HasPrefix(lv.fn.funcname(), ".") {
return
}
2018-02-26 20:48:53 -05:00
if !(v == nil || v.Op.IsCall()) {
// Historically we only printed this information at
// calls. Keep doing so.
return
}
if live.IsEmpty() {
return
}
pos := lv.fn.Func.Nname.Pos
if v != nil {
pos = v.Pos
}
s := "live at "
if v == nil {
s += fmt.Sprintf("entry to %s:", lv.fn.funcname())
} else if sym, ok := v.Aux.(*ssa.AuxCall); ok && sym.Fn != nil {
fn := sym.Fn.Name
if pos := strings.Index(fn, "."); pos >= 0 {
fn = fn[pos+1:]
}
s += fmt.Sprintf("call to %s:", fn)
} else {
s += "indirect call:"
}
for j, n := range lv.vars {
if live.Get(int32(j)) {
s += fmt.Sprintf(" %v", n)
}
}
Warnl(pos, s)
}
func (lv *Liveness) printbvec(printed bool, name string, live bvec) bool {
if live.IsEmpty() {
return printed
}
if !printed {
fmt.Printf("\t")
} else {
fmt.Printf(" ")
}
fmt.Printf("%s=", name)
comma := ""
for i, n := range lv.vars {
if !live.Get(int32(i)) {
continue
}
fmt.Printf("%s%s", comma, n.Sym.Name)
comma = ","
}
return true
}
// printeffect is like printbvec, but for valueEffects.
func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool) bool {
if !x {
return printed
}
if !printed {
fmt.Printf("\t")
} else {
fmt.Printf(" ")
}
fmt.Printf("%s=", name)
if x {
fmt.Printf("%s", lv.vars[pos].Sym.Name)
}
return true
}
// Prints the computed liveness information and inputs, for debugging.
// This format synthesizes the information used during the multiple passes
// into a single presentation.
func (lv *Liveness) printDebug() {
fmt.Printf("liveness: %s\n", lv.fn.funcname())
for i, b := range lv.f.Blocks {
if i > 0 {
fmt.Printf("\n")
}
// bb#0 pred=1,2 succ=3,4
fmt.Printf("bb#%d pred=", b.ID)
for j, pred := range b.Preds {
if j > 0 {
fmt.Printf(",")
}
fmt.Printf("%d", pred.Block().ID)
}
fmt.Printf(" succ=")
for j, succ := range b.Succs {
if j > 0 {
fmt.Printf(",")
}
fmt.Printf("%d", succ.Block().ID)
}
fmt.Printf("\n")
be := lv.blockEffects(b)
// initial settings
printed := false
printed = lv.printbvec(printed, "uevar", be.uevar)
printed = lv.printbvec(printed, "livein", be.livein)
if printed {
fmt.Printf("\n")
}
// program listing, with individual effects listed
if b == lv.f.Entry {
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
live := lv.stackMaps[0]
fmt.Printf("(%s) function entry\n", linestr(lv.fn.Func.Nname.Pos))
fmt.Printf("\tlive=")
printed = false
for j, n := range lv.vars {
if !live.Get(int32(j)) {
continue
}
if printed {
fmt.Printf(",")
}
fmt.Printf("%v", n)
printed = true
}
fmt.Printf("\n")
}
for _, v := range b.Values {
fmt.Printf("(%s) %v\n", linestr(v.Pos), v.LongString())
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
pcdata := lv.livenessMap.Get(v)
pos, effect := lv.valueEffects(v)
printed = false
printed = lv.printeffect(printed, "uevar", pos, effect&uevar != 0)
printed = lv.printeffect(printed, "varkill", pos, effect&varkill != 0)
if printed {
fmt.Printf("\n")
}
if pcdata.StackMapValid() {
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
fmt.Printf("\tlive=")
printed = false
if pcdata.StackMapValid() {
live := lv.stackMaps[pcdata.stackMapIndex]
for j, n := range lv.vars {
if !live.Get(int32(j)) {
continue
}
if printed {
fmt.Printf(",")
}
fmt.Printf("%v", n)
printed = true
}
}
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
fmt.Printf("\n")
}
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
if pcdata.isUnsafePoint {
fmt.Printf("\tunsafe-point\n")
}
}
// bb bitsets
fmt.Printf("end\n")
printed = false
printed = lv.printbvec(printed, "varkill", be.varkill)
printed = lv.printbvec(printed, "liveout", be.liveout)
if printed {
fmt.Printf("\n")
}
}
fmt.Printf("\n")
}
// Dumps a slice of bitmaps to a symbol as a sequence of uint32 values. The
// first word dumped is the total number of bitmaps. The second word is the
// length of the bitmaps. All bitmaps are assumed to be of equal length. The
// remaining bytes are the raw bitmaps.
func (lv *Liveness) emit() (argsSym, liveSym *obj.LSym) {
cmd/compile: shrink liveness maps The GC maps don't care about trailing non-pointers in args. Work harder to eliminate them. This should provide a slight speedup to everything that reads these maps, mainly GC and stack copying. The non-ptr-y runtime benchmarks happen to go from having a non-empty args map to an empty args map, so they have a significant speedup. name old time/op new time/op delta StackCopyPtr-8 80.2ms ± 4% 79.7ms ± 2% -0.63% (p=0.001 n=94+91) StackCopy-8 63.3ms ± 3% 59.2ms ± 3% -6.45% (p=0.000 n=98+97) StackCopyNoCache-8 107ms ± 3% 98ms ± 3% -8.00% (p=0.000 n=95+88) It also shrinks object files a tiny bit: name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.03% (p=0.008 n=5+5) Unicode 218kB ± 0% 218kB ± 0% -0.09% (p=0.008 n=5+5) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.03% (p=0.008 n=5+5) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.06% (p=0.008 n=5+5) SSA 15.9MB ± 0% 15.9MB ± 0% -0.06% (p=0.008 n=5+5) Flate 304kB ± 0% 303kB ± 0% -0.29% (p=0.008 n=5+5) GoParser 370kB ± 0% 370kB ± 0% +0.02% (p=0.008 n=5+5) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.07% (p=0.008 n=5+5) Tar 421kB ± 0% 421kB ± 0% -0.05% (p=0.008 n=5+5) XML 518kB ± 0% 517kB ± 0% -0.06% (p=0.008 n=5+5) [Geo mean] 934kB 933kB -0.07% Note that some object files do grow; this can happen because some maps that were duplicates of each others must be stored separately. Change-Id: Ie076891bd8e9d269ff2ff5435d5d25c721e0e31d Reviewed-on: https://go-review.googlesource.com/104175 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-04-02 14:21:27 -07:00
// Size args bitmaps to be just large enough to hold the largest pointer.
// First, find the largest Xoffset node we care about.
// (Nodes without pointers aren't in lv.vars; see livenessShouldTrack.)
var maxArgNode *Node
for _, n := range lv.vars {
switch n.Class() {
case PPARAM, PPARAMOUT:
if maxArgNode == nil || n.Xoffset > maxArgNode.Xoffset {
maxArgNode = n
}
}
}
// Next, find the offset of the largest pointer in the largest node.
var maxArgs int64
if maxArgNode != nil {
maxArgs = maxArgNode.Xoffset + typeptrdata(maxArgNode.Type)
}
// Size locals bitmaps to be stkptrsize sized.
// We cannot shrink them to only hold the largest pointer,
// because their size is used to calculate the beginning
// of the local variables frame.
// Further discussion in https://golang.org/cl/104175.
// TODO: consider trimming leading zeros.
// This would require shifting all bitmaps.
maxLocals := lv.stkptrsize
// Temporary symbols for encoding bitmaps.
var argsSymTmp, liveSymTmp obj.LSym
cmd/compile: shrink liveness maps The GC maps don't care about trailing non-pointers in args. Work harder to eliminate them. This should provide a slight speedup to everything that reads these maps, mainly GC and stack copying. The non-ptr-y runtime benchmarks happen to go from having a non-empty args map to an empty args map, so they have a significant speedup. name old time/op new time/op delta StackCopyPtr-8 80.2ms ± 4% 79.7ms ± 2% -0.63% (p=0.001 n=94+91) StackCopy-8 63.3ms ± 3% 59.2ms ± 3% -6.45% (p=0.000 n=98+97) StackCopyNoCache-8 107ms ± 3% 98ms ± 3% -8.00% (p=0.000 n=95+88) It also shrinks object files a tiny bit: name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.03% (p=0.008 n=5+5) Unicode 218kB ± 0% 218kB ± 0% -0.09% (p=0.008 n=5+5) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.03% (p=0.008 n=5+5) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.06% (p=0.008 n=5+5) SSA 15.9MB ± 0% 15.9MB ± 0% -0.06% (p=0.008 n=5+5) Flate 304kB ± 0% 303kB ± 0% -0.29% (p=0.008 n=5+5) GoParser 370kB ± 0% 370kB ± 0% +0.02% (p=0.008 n=5+5) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.07% (p=0.008 n=5+5) Tar 421kB ± 0% 421kB ± 0% -0.05% (p=0.008 n=5+5) XML 518kB ± 0% 517kB ± 0% -0.06% (p=0.008 n=5+5) [Geo mean] 934kB 933kB -0.07% Note that some object files do grow; this can happen because some maps that were duplicates of each others must be stored separately. Change-Id: Ie076891bd8e9d269ff2ff5435d5d25c721e0e31d Reviewed-on: https://go-review.googlesource.com/104175 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-04-02 14:21:27 -07:00
args := bvalloc(int32(maxArgs / int64(Widthptr)))
aoff := duint32(&argsSymTmp, 0, uint32(len(lv.stackMaps))) // number of bitmaps
aoff = duint32(&argsSymTmp, aoff, uint32(args.n)) // number of bits in each bitmap
cmd/compile: shrink liveness maps The GC maps don't care about trailing non-pointers in args. Work harder to eliminate them. This should provide a slight speedup to everything that reads these maps, mainly GC and stack copying. The non-ptr-y runtime benchmarks happen to go from having a non-empty args map to an empty args map, so they have a significant speedup. name old time/op new time/op delta StackCopyPtr-8 80.2ms ± 4% 79.7ms ± 2% -0.63% (p=0.001 n=94+91) StackCopy-8 63.3ms ± 3% 59.2ms ± 3% -6.45% (p=0.000 n=98+97) StackCopyNoCache-8 107ms ± 3% 98ms ± 3% -8.00% (p=0.000 n=95+88) It also shrinks object files a tiny bit: name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.03% (p=0.008 n=5+5) Unicode 218kB ± 0% 218kB ± 0% -0.09% (p=0.008 n=5+5) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.03% (p=0.008 n=5+5) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.06% (p=0.008 n=5+5) SSA 15.9MB ± 0% 15.9MB ± 0% -0.06% (p=0.008 n=5+5) Flate 304kB ± 0% 303kB ± 0% -0.29% (p=0.008 n=5+5) GoParser 370kB ± 0% 370kB ± 0% +0.02% (p=0.008 n=5+5) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.07% (p=0.008 n=5+5) Tar 421kB ± 0% 421kB ± 0% -0.05% (p=0.008 n=5+5) XML 518kB ± 0% 517kB ± 0% -0.06% (p=0.008 n=5+5) [Geo mean] 934kB 933kB -0.07% Note that some object files do grow; this can happen because some maps that were duplicates of each others must be stored separately. Change-Id: Ie076891bd8e9d269ff2ff5435d5d25c721e0e31d Reviewed-on: https://go-review.googlesource.com/104175 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-04-02 14:21:27 -07:00
locals := bvalloc(int32(maxLocals / int64(Widthptr)))
loff := duint32(&liveSymTmp, 0, uint32(len(lv.stackMaps))) // number of bitmaps
loff = duint32(&liveSymTmp, loff, uint32(locals.n)) // number of bits in each bitmap
for _, live := range lv.stackMaps {
args.Clear()
locals.Clear()
lv.pointerMap(live, lv.vars, args, locals)
aoff = dbvec(&argsSymTmp, aoff, args)
loff = dbvec(&liveSymTmp, loff, locals)
}
cmd/internal/obj: rework gclocals handling The compiler handled gcargs and gclocals LSyms unusually. It generated placeholder symbols (makefuncdatasym), filled them in, and then renamed them for content-addressability. This is an important binary size optimization; the same locals information occurs over and over. This CL continues to treat these LSyms unusually, but in a slightly more explicit way, and importantly for concurrent compilation, in a way that does not require concurrent modification of Ctxt.Hash. Instead of creating gcargs and gclocals in the usual way, by creating a types.Sym and then an obj.LSym, we add them directly to obj.FuncInfo, initialize them in obj.InitTextSym, and deduplicate and add them to ctxt.Data at the end. Then the backend's job is simply to fill them in and rename them appropriately. Updates #15756 name old alloc/op new alloc/op delta Template 38.8MB ± 0% 38.7MB ± 0% -0.22% (p=0.016 n=5+5) Unicode 29.8MB ± 0% 29.8MB ± 0% ~ (p=0.690 n=5+5) GoTypes 113MB ± 0% 113MB ± 0% -0.24% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.24GB ± 0% -0.39% (p=0.008 n=5+5) Flate 25.3MB ± 0% 25.2MB ± 0% -0.43% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 31.7MB ± 0% -0.22% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 77.6MB ± 0% -0.80% (p=0.008 n=5+5) Tar 26.6MB ± 0% 26.3MB ± 0% -0.85% (p=0.008 n=5+5) XML 42.4MB ± 0% 41.9MB ± 0% -1.04% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 378k ± 0% 377k ± 1% ~ (p=0.151 n=5+5) Unicode 321k ± 1% 321k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% -0.47% (p=0.016 n=5+5) SSA 9.71M ± 0% 9.67M ± 0% -0.33% (p=0.008 n=5+5) Flate 233k ± 1% 232k ± 1% ~ (p=0.151 n=5+5) GoParser 316k ± 0% 315k ± 0% -0.49% (p=0.016 n=5+5) Reflect 979k ± 0% 972k ± 0% -0.75% (p=0.008 n=5+5) Tar 250k ± 0% 247k ± 1% -0.92% (p=0.008 n=5+5) XML 392k ± 1% 389k ± 0% -0.67% (p=0.008 n=5+5) Change-Id: Idc36186ca9d2f8214b5f7720bbc27b6bb22fdc48 Reviewed-on: https://go-review.googlesource.com/40697 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-14 06:35:53 -07:00
// Give these LSyms content-addressable names,
// so that they can be de-duplicated.
// This provides significant binary size savings.
//
// These symbols will be added to Ctxt.Data by addGCLocals
// after parallel compilation is done.
makeSym := func(tmpSym *obj.LSym) *obj.LSym {
return Ctxt.LookupInit(fmt.Sprintf("gclocals·%x", md5.Sum(tmpSym.P)), func(lsym *obj.LSym) {
lsym.P = tmpSym.P
lsym.Set(obj.AttrContentAddressable, true)
})
}
return makeSym(&argsSymTmp), makeSym(&liveSymTmp)
}
// Entry pointer for liveness analysis. Solves for the liveness of
// pointer variables in the function and emits a runtime data
// structure read by the garbage collector.
// Returns a map from GC safe points to their corresponding stack map index.
func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
// Construct the global liveness state.
cmd/compile: use a map to track liveness variable indices It is not safe to modify Node.Opt in the backend. Instead of using Node.Opt to store liveness variable indices, use a map. This simplifies the code and makes it much more clearly race-free. There are generally few such variables, so the maps are not a significant source of allocations; this also remove some allocations from putting int32s into interfaces. Because map lookups are more expensive than interface value extraction, reorder valueEffects to do the map lookup last. The only remaining use of Node.Opt is now in esc.go. Passes toolstash-check. Fixes #20144 name old alloc/op new alloc/op delta Template 37.8MB ± 0% 37.9MB ± 0% ~ (p=0.548 n=5+5) Unicode 28.9MB ± 0% 28.9MB ± 0% ~ (p=0.548 n=5+5) GoTypes 110MB ± 0% 110MB ± 0% +0.16% (p=0.008 n=5+5) Compiler 461MB ± 0% 462MB ± 0% +0.08% (p=0.008 n=5+5) SSA 1.11GB ± 0% 1.11GB ± 0% +0.11% (p=0.008 n=5+5) Flate 24.7MB ± 0% 24.7MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.1MB ± 0% 31.1MB ± 0% ~ (p=0.841 n=5+5) Reflect 73.7MB ± 0% 73.8MB ± 0% +0.23% (p=0.008 n=5+5) Tar 25.8MB ± 0% 25.7MB ± 0% ~ (p=0.690 n=5+5) XML 41.2MB ± 0% 41.2MB ± 0% ~ (p=0.841 n=5+5) [Geo mean] 71.9MB 71.9MB +0.06% name old allocs/op new allocs/op delta Template 385k ± 0% 384k ± 0% ~ (p=0.548 n=5+5) Unicode 344k ± 0% 343k ± 1% ~ (p=0.421 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.690 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% ~ (p=0.095 n=5+5) SSA 9.86M ± 0% 9.84M ± 0% -0.19% (p=0.008 n=5+5) Flate 238k ± 0% 238k ± 0% ~ (p=1.000 n=5+5) GoParser 321k ± 0% 320k ± 0% ~ (p=0.310 n=5+5) Reflect 956k ± 0% 956k ± 0% ~ (p=1.000 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.056 n=5+5) XML 402k ± 1% 400k ± 1% -0.57% (p=0.032 n=5+5) [Geo mean] 740k 739k -0.19% Change-Id: Id5916c9def76add272e89c59fe10968f0a6bb01d Reviewed-on: https://go-review.googlesource.com/42135 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-27 16:27:47 -07:00
vars, idx := getvariables(e.curfn)
lv := newliveness(e.curfn, f, vars, idx, e.stkptrsize)
// Run the dataflow framework.
lv.prologue()
lv.solve()
lv.epilogue()
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
if debuglive > 0 {
lv.showlive(nil, lv.stackMaps[0])
cmd/compile: make LivenessMap dense Currently liveness information is kept in a map keyed by *ssa.Value. This made sense when liveness information was sparse, but now we have liveness for nearly every ssa.Value. There's a fair amount of memory and CPU overhead to this map now. This CL replaces this map with a slice indexed by value ID. Passes toolstash -cmp. name old time/op new time/op delta Template 197ms ± 1% 194ms ± 1% -1.60% (p=0.000 n=9+10) Unicode 100ms ± 2% 99ms ± 1% -1.31% (p=0.012 n=8+10) GoTypes 695ms ± 1% 689ms ± 0% -0.94% (p=0.000 n=10+10) Compiler 3.34s ± 2% 3.29s ± 1% -1.26% (p=0.000 n=10+9) SSA 8.08s ± 0% 8.02s ± 2% -0.70% (p=0.034 n=8+10) Flate 133ms ± 1% 131ms ± 1% -1.04% (p=0.006 n=10+9) GoParser 163ms ± 1% 162ms ± 1% -0.79% (p=0.034 n=8+10) Reflect 459ms ± 1% 454ms ± 0% -1.06% (p=0.000 n=10+8) Tar 186ms ± 1% 185ms ± 1% -0.87% (p=0.003 n=9+9) XML 238ms ± 1% 235ms ± 1% -1.01% (p=0.004 n=8+9) [Geo mean] 418ms 414ms -1.06% name old alloc/op new alloc/op delta Template 36.4MB ± 0% 35.6MB ± 0% -2.29% (p=0.000 n=9+10) Unicode 29.7MB ± 0% 29.5MB ± 0% -0.68% (p=0.000 n=10+10) GoTypes 119MB ± 0% 117MB ± 0% -2.30% (p=0.000 n=9+9) Compiler 546MB ± 0% 532MB ± 0% -2.47% (p=0.000 n=10+10) SSA 1.59GB ± 0% 1.55GB ± 0% -2.41% (p=0.000 n=10+10) Flate 24.9MB ± 0% 24.5MB ± 0% -1.77% (p=0.000 n=8+10) GoParser 29.5MB ± 0% 28.7MB ± 0% -2.60% (p=0.000 n=9+10) Reflect 81.7MB ± 0% 80.5MB ± 0% -1.49% (p=0.000 n=10+10) Tar 35.7MB ± 0% 35.1MB ± 0% -1.64% (p=0.000 n=10+10) XML 45.0MB ± 0% 43.7MB ± 0% -2.76% (p=0.000 n=9+10) [Geo mean] 80.1MB 78.4MB -2.04% name old allocs/op new allocs/op delta Template 336k ± 0% 335k ± 0% -0.31% (p=0.000 n=9+10) Unicode 339k ± 0% 339k ± 0% -0.05% (p=0.000 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.26% (p=0.000 n=10+10) Compiler 4.96M ± 0% 4.94M ± 0% -0.24% (p=0.000 n=10+10) SSA 12.6M ± 0% 12.5M ± 0% -0.30% (p=0.000 n=10+10) Flate 224k ± 0% 223k ± 0% -0.30% (p=0.000 n=10+10) GoParser 282k ± 0% 281k ± 0% -0.32% (p=0.000 n=10+10) Reflect 965k ± 0% 963k ± 0% -0.27% (p=0.000 n=9+10) Tar 331k ± 0% 330k ± 0% -0.27% (p=0.000 n=10+10) XML 393k ± 0% 392k ± 0% -0.26% (p=0.000 n=10+10) [Geo mean] 763k 761k -0.26% Updates #24543. Change-Id: I4cfd2461510d3c026a262760bca225dc37482341 Reviewed-on: https://go-review.googlesource.com/110178 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-21 15:40:56 -04:00
for _, b := range f.Blocks {
for _, val := range b.Values {
cmd/compile: fix unsafe-points with stack maps The compiler currently conflates whether a Value has a stack map with whether it's an unsafe point. For the most part, unsafe-points don't have stack maps, so this is mostly fine, but call instructions can be both an unsafe-point *and* have a stack map. For example, none of the instructions in a nosplit function should be preemptible, but calls must still have stack maps in case the called function grows the stack or get preempted. Currently, the compiler can't distinguish this case, so calls in nosplit functions are marked as safe-points just because they have stack maps. This is particularly problematic if a nosplit function calls another nosplit function, since this can introduce a preemption point where there should be none. We realized this was a problem for split-stack prologues a while back, and CL 207349 changed the encoding of unsafe-points to use the register map index instead of the stack map index so we could record both a stack map and an unsafe-point at the same instruction. But this was never extended into the compiler. This CL fixes this problem in the compiler. We make LivenessIndex slightly more abstract by separating unsafe-point marks from stack and register map indexes. We map this to the PCDATA encoding later when producing Progs. This isn't enough to fix the whole problem for nosplit functions, because obj still adds prologues and marks those as preemptible, but it's a step in the right direction. I checked this CL by comparing maps before and after this change in the runtime and net/http. In net/http, unsafe-points match exactly; at anything that isn't an unsafe-point, both the stack and register maps are unchanged by this CL. In the runtime, at every point that was a safe-point before this change, the stack maps agree (and mostly the runtime doesn't have register maps at all now). In both, all CALLs (except write barrier calls) have stack maps. For #36365. Change-Id: I066628938b02e78be5c81a6614295bcf7cc566c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/230541 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-21 14:23:04 -04:00
if idx := lv.livenessMap.Get(val); idx.StackMapValid() {
cmd/compile: make LivenessMap dense Currently liveness information is kept in a map keyed by *ssa.Value. This made sense when liveness information was sparse, but now we have liveness for nearly every ssa.Value. There's a fair amount of memory and CPU overhead to this map now. This CL replaces this map with a slice indexed by value ID. Passes toolstash -cmp. name old time/op new time/op delta Template 197ms ± 1% 194ms ± 1% -1.60% (p=0.000 n=9+10) Unicode 100ms ± 2% 99ms ± 1% -1.31% (p=0.012 n=8+10) GoTypes 695ms ± 1% 689ms ± 0% -0.94% (p=0.000 n=10+10) Compiler 3.34s ± 2% 3.29s ± 1% -1.26% (p=0.000 n=10+9) SSA 8.08s ± 0% 8.02s ± 2% -0.70% (p=0.034 n=8+10) Flate 133ms ± 1% 131ms ± 1% -1.04% (p=0.006 n=10+9) GoParser 163ms ± 1% 162ms ± 1% -0.79% (p=0.034 n=8+10) Reflect 459ms ± 1% 454ms ± 0% -1.06% (p=0.000 n=10+8) Tar 186ms ± 1% 185ms ± 1% -0.87% (p=0.003 n=9+9) XML 238ms ± 1% 235ms ± 1% -1.01% (p=0.004 n=8+9) [Geo mean] 418ms 414ms -1.06% name old alloc/op new alloc/op delta Template 36.4MB ± 0% 35.6MB ± 0% -2.29% (p=0.000 n=9+10) Unicode 29.7MB ± 0% 29.5MB ± 0% -0.68% (p=0.000 n=10+10) GoTypes 119MB ± 0% 117MB ± 0% -2.30% (p=0.000 n=9+9) Compiler 546MB ± 0% 532MB ± 0% -2.47% (p=0.000 n=10+10) SSA 1.59GB ± 0% 1.55GB ± 0% -2.41% (p=0.000 n=10+10) Flate 24.9MB ± 0% 24.5MB ± 0% -1.77% (p=0.000 n=8+10) GoParser 29.5MB ± 0% 28.7MB ± 0% -2.60% (p=0.000 n=9+10) Reflect 81.7MB ± 0% 80.5MB ± 0% -1.49% (p=0.000 n=10+10) Tar 35.7MB ± 0% 35.1MB ± 0% -1.64% (p=0.000 n=10+10) XML 45.0MB ± 0% 43.7MB ± 0% -2.76% (p=0.000 n=9+10) [Geo mean] 80.1MB 78.4MB -2.04% name old allocs/op new allocs/op delta Template 336k ± 0% 335k ± 0% -0.31% (p=0.000 n=9+10) Unicode 339k ± 0% 339k ± 0% -0.05% (p=0.000 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.26% (p=0.000 n=10+10) Compiler 4.96M ± 0% 4.94M ± 0% -0.24% (p=0.000 n=10+10) SSA 12.6M ± 0% 12.5M ± 0% -0.30% (p=0.000 n=10+10) Flate 224k ± 0% 223k ± 0% -0.30% (p=0.000 n=10+10) GoParser 282k ± 0% 281k ± 0% -0.32% (p=0.000 n=10+10) Reflect 965k ± 0% 963k ± 0% -0.27% (p=0.000 n=9+10) Tar 331k ± 0% 330k ± 0% -0.27% (p=0.000 n=10+10) XML 393k ± 0% 392k ± 0% -0.26% (p=0.000 n=10+10) [Geo mean] 763k 761k -0.26% Updates #24543. Change-Id: I4cfd2461510d3c026a262760bca225dc37482341 Reviewed-on: https://go-review.googlesource.com/110178 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-21 15:40:56 -04:00
lv.showlive(val, lv.stackMaps[idx.stackMapIndex])
}
}
cmd/compile: incrementally compact liveness maps The per-Value slice of liveness maps is currently one of the largest sources of allocation in the compiler. On cmd/compile/internal/ssa, it's 5% of overall allocation, or 75MB in total. Enabling liveness maps everywhere significantly increased this allocation footprint, which in turn slowed down the compiler. Improve this by compacting the liveness maps after every block is processed. There are typically very few distinct liveness maps, so compacting the maps after every block, rather than at the end of the function, can significantly reduce these allocations. Passes toolstash -cmp. name old time/op new time/op delta Template 198ms ± 2% 196ms ± 1% -1.11% (p=0.008 n=9+10) Unicode 100ms ± 1% 99ms ± 1% -0.94% (p=0.015 n=8+9) GoTypes 703ms ± 2% 695ms ± 1% -1.15% (p=0.000 n=10+10) Compiler 3.38s ± 3% 3.33s ± 0% -1.66% (p=0.000 n=10+9) SSA 7.96s ± 1% 7.93s ± 1% ~ (p=0.113 n=9+10) Flate 134ms ± 1% 132ms ± 1% -1.30% (p=0.000 n=8+10) GoParser 165ms ± 2% 163ms ± 1% -1.32% (p=0.013 n=9+10) Reflect 462ms ± 2% 459ms ± 0% -0.65% (p=0.036 n=9+8) Tar 188ms ± 2% 186ms ± 1% ~ (p=0.173 n=8+10) XML 243ms ± 7% 239ms ± 1% ~ (p=0.684 n=10+10) [Geo mean] 421ms 416ms -1.10% name old alloc/op new alloc/op delta Template 38.0MB ± 0% 36.5MB ± 0% -3.98% (p=0.000 n=10+10) Unicode 30.3MB ± 0% 29.6MB ± 0% -2.21% (p=0.000 n=10+10) GoTypes 125MB ± 0% 120MB ± 0% -4.51% (p=0.000 n=10+9) Compiler 575MB ± 0% 546MB ± 0% -5.06% (p=0.000 n=10+10) SSA 1.64GB ± 0% 1.55GB ± 0% -4.97% (p=0.000 n=10+10) Flate 25.9MB ± 0% 25.0MB ± 0% -3.41% (p=0.000 n=10+10) GoParser 30.7MB ± 0% 29.5MB ± 0% -3.97% (p=0.000 n=10+10) Reflect 84.1MB ± 0% 81.9MB ± 0% -2.64% (p=0.000 n=10+10) Tar 37.0MB ± 0% 35.8MB ± 0% -3.27% (p=0.000 n=10+9) XML 47.2MB ± 0% 45.0MB ± 0% -4.57% (p=0.000 n=10+10) [Geo mean] 83.2MB 79.9MB -3.86% name old allocs/op new allocs/op delta Template 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.01% (p=0.014 n=10+10) GoTypes 1.18M ± 0% 1.18M ± 0% -0.04% (p=0.000 n=10+10) Compiler 4.97M ± 0% 4.97M ± 0% -0.03% (p=0.000 n=10+10) SSA 12.3M ± 0% 12.3M ± 0% -0.01% (p=0.000 n=10+10) Flate 226k ± 0% 225k ± 0% -0.09% (p=0.000 n=10+10) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.000 n=10+9) Reflect 972k ± 0% 971k ± 0% -0.04% (p=0.000 n=10+8) Tar 333k ± 0% 332k ± 0% -0.05% (p=0.000 n=10+9) XML 395k ± 0% 395k ± 0% -0.04% (p=0.000 n=10+10) [Geo mean] 764k 764k -0.04% Updates #24543. Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b Reviewed-on: https://go-review.googlesource.com/110177 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-04-20 15:48:46 -04:00
}
}
if debuglive >= 2 {
lv.printDebug()
}
cmd/compile: reuse liveness structures Currently liveness analysis is a significant source of allocations in the compiler. This CL mitigates this by moving the main sources of allocation to the ssa.Cache, allowing them to be reused between different liveness runs. Passes toolstash -cmp. name old time/op new time/op delta Template 194ms ± 1% 193ms ± 1% ~ (p=0.156 n=10+9) Unicode 99.1ms ± 1% 99.3ms ± 2% ~ (p=0.853 n=10+10) GoTypes 689ms ± 0% 687ms ± 0% -0.27% (p=0.022 n=10+9) Compiler 3.29s ± 1% 3.30s ± 1% ~ (p=0.489 n=9+9) SSA 8.02s ± 2% 7.97s ± 1% -0.71% (p=0.011 n=10+10) Flate 131ms ± 1% 130ms ± 1% -0.59% (p=0.043 n=9+10) GoParser 162ms ± 1% 160ms ± 1% -1.53% (p=0.000 n=10+10) Reflect 454ms ± 0% 454ms ± 0% ~ (p=0.959 n=8+8) Tar 185ms ± 1% 185ms ± 2% ~ (p=0.905 n=9+10) XML 235ms ± 1% 232ms ± 1% -1.15% (p=0.001 n=9+10) [Geo mean] 414ms 412ms -0.39% name old alloc/op new alloc/op delta Template 35.6MB ± 0% 34.2MB ± 0% -3.75% (p=0.000 n=10+10) Unicode 29.5MB ± 0% 29.4MB ± 0% -0.26% (p=0.000 n=10+9) GoTypes 117MB ± 0% 112MB ± 0% -3.78% (p=0.000 n=9+10) Compiler 532MB ± 0% 512MB ± 0% -3.80% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.48GB ± 0% -4.82% (p=0.000 n=10+10) Flate 24.5MB ± 0% 23.6MB ± 0% -3.61% (p=0.000 n=10+9) GoParser 28.7MB ± 0% 27.7MB ± 0% -3.43% (p=0.000 n=10+10) Reflect 80.5MB ± 0% 78.1MB ± 0% -2.96% (p=0.000 n=10+10) Tar 35.1MB ± 0% 33.9MB ± 0% -3.49% (p=0.000 n=10+10) XML 43.7MB ± 0% 42.4MB ± 0% -3.05% (p=0.000 n=10+10) [Geo mean] 78.4MB 75.8MB -3.30% name old allocs/op new allocs/op delta Template 335k ± 0% 335k ± 0% -0.12% (p=0.000 n=10+10) Unicode 339k ± 0% 339k ± 0% -0.01% (p=0.001 n=10+10) GoTypes 1.18M ± 0% 1.17M ± 0% -0.12% (p=0.000 n=10+10) Compiler 4.94M ± 0% 4.94M ± 0% -0.06% (p=0.000 n=10+10) SSA 12.5M ± 0% 12.5M ± 0% -0.07% (p=0.000 n=10+10) Flate 223k ± 0% 223k ± 0% -0.11% (p=0.000 n=10+10) GoParser 281k ± 0% 281k ± 0% -0.08% (p=0.000 n=10+10) Reflect 963k ± 0% 960k ± 0% -0.23% (p=0.000 n=10+9) Tar 330k ± 0% 330k ± 0% -0.12% (p=0.000 n=10+10) XML 392k ± 0% 392k ± 0% -0.08% (p=0.000 n=10+10) [Geo mean] 761k 760k -0.10% Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps, plus these optimizations, is: name old time/op new time/op delta Template 186ms ± 1% 194ms ± 1% +4.41% (p=0.000 n=9+10) Unicode 96.5ms ± 1% 99.1ms ± 1% +2.76% (p=0.000 n=9+10) GoTypes 659ms ± 1% 689ms ± 0% +4.56% (p=0.000 n=9+10) Compiler 3.14s ± 2% 3.29s ± 1% +4.95% (p=0.000 n=9+9) SSA 7.68s ± 3% 8.02s ± 2% +4.41% (p=0.000 n=10+10) Flate 126ms ± 0% 131ms ± 1% +4.14% (p=0.000 n=10+9) GoParser 153ms ± 1% 162ms ± 1% +5.90% (p=0.000 n=10+10) Reflect 436ms ± 1% 454ms ± 0% +4.14% (p=0.000 n=10+8) Tar 177ms ± 1% 185ms ± 1% +4.28% (p=0.000 n=8+9) XML 224ms ± 1% 235ms ± 1% +5.23% (p=0.000 n=10+9) [Geo mean] 396ms 414ms +4.47% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 35.6MB ± 0% +3.24% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.5MB ± 0% +0.51% (p=0.000 n=9+10) GoTypes 113MB ± 0% 117MB ± 0% +3.31% (p=0.000 n=8+9) Compiler 509MB ± 0% 532MB ± 0% +4.46% (p=0.000 n=10+10) SSA 1.49GB ± 0% 1.55GB ± 0% +4.10% (p=0.000 n=10+10) Flate 23.8MB ± 0% 24.5MB ± 0% +2.92% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 28.7MB ± 0% +2.88% (p=0.000 n=10+10) Reflect 77.4MB ± 0% 80.5MB ± 0% +4.01% (p=0.000 n=10+10) Tar 34.1MB ± 0% 35.1MB ± 0% +3.12% (p=0.000 n=10+10) XML 42.6MB ± 0% 43.7MB ± 0% +2.65% (p=0.000 n=10+10) [Geo mean] 76.1MB 78.4MB +3.11% name old allocs/op new allocs/op delta Template 320k ± 0% 335k ± 0% +4.60% (p=0.000 n=10+10) Unicode 336k ± 0% 339k ± 0% +0.96% (p=0.000 n=9+10) GoTypes 1.12M ± 0% 1.18M ± 0% +4.55% (p=0.000 n=10+10) Compiler 4.66M ± 0% 4.94M ± 0% +6.18% (p=0.000 n=10+10) SSA 11.9M ± 0% 12.5M ± 0% +5.37% (p=0.000 n=10+10) Flate 214k ± 0% 223k ± 0% +4.15% (p=0.000 n=9+10) GoParser 270k ± 0% 281k ± 0% +4.15% (p=0.000 n=10+10) Reflect 921k ± 0% 963k ± 0% +4.49% (p=0.000 n=10+10) Tar 317k ± 0% 330k ± 0% +4.25% (p=0.000 n=10+10) XML 375k ± 0% 392k ± 0% +4.75% (p=0.000 n=10+10) [Geo mean] 729k 761k +4.34% Updates #24543. Change-Id: Ia951fdb3c17ae1c156e1d05fc42e69caba33c91a Reviewed-on: https://go-review.googlesource.com/110179 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2018-04-21 16:15:41 -04:00
// Update the function cache.
{
cache := f.Cache.Liveness.(*livenessFuncCache)
if cap(lv.be) < 2000 { // Threshold from ssa.Cache slices.
for i := range lv.be {
lv.be[i] = BlockEffects{}
}
cache.be = lv.be
}
if len(lv.livenessMap.vals) < 2000 {
cmd/compile: reuse liveness structures Currently liveness analysis is a significant source of allocations in the compiler. This CL mitigates this by moving the main sources of allocation to the ssa.Cache, allowing them to be reused between different liveness runs. Passes toolstash -cmp. name old time/op new time/op delta Template 194ms ± 1% 193ms ± 1% ~ (p=0.156 n=10+9) Unicode 99.1ms ± 1% 99.3ms ± 2% ~ (p=0.853 n=10+10) GoTypes 689ms ± 0% 687ms ± 0% -0.27% (p=0.022 n=10+9) Compiler 3.29s ± 1% 3.30s ± 1% ~ (p=0.489 n=9+9) SSA 8.02s ± 2% 7.97s ± 1% -0.71% (p=0.011 n=10+10) Flate 131ms ± 1% 130ms ± 1% -0.59% (p=0.043 n=9+10) GoParser 162ms ± 1% 160ms ± 1% -1.53% (p=0.000 n=10+10) Reflect 454ms ± 0% 454ms ± 0% ~ (p=0.959 n=8+8) Tar 185ms ± 1% 185ms ± 2% ~ (p=0.905 n=9+10) XML 235ms ± 1% 232ms ± 1% -1.15% (p=0.001 n=9+10) [Geo mean] 414ms 412ms -0.39% name old alloc/op new alloc/op delta Template 35.6MB ± 0% 34.2MB ± 0% -3.75% (p=0.000 n=10+10) Unicode 29.5MB ± 0% 29.4MB ± 0% -0.26% (p=0.000 n=10+9) GoTypes 117MB ± 0% 112MB ± 0% -3.78% (p=0.000 n=9+10) Compiler 532MB ± 0% 512MB ± 0% -3.80% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.48GB ± 0% -4.82% (p=0.000 n=10+10) Flate 24.5MB ± 0% 23.6MB ± 0% -3.61% (p=0.000 n=10+9) GoParser 28.7MB ± 0% 27.7MB ± 0% -3.43% (p=0.000 n=10+10) Reflect 80.5MB ± 0% 78.1MB ± 0% -2.96% (p=0.000 n=10+10) Tar 35.1MB ± 0% 33.9MB ± 0% -3.49% (p=0.000 n=10+10) XML 43.7MB ± 0% 42.4MB ± 0% -3.05% (p=0.000 n=10+10) [Geo mean] 78.4MB 75.8MB -3.30% name old allocs/op new allocs/op delta Template 335k ± 0% 335k ± 0% -0.12% (p=0.000 n=10+10) Unicode 339k ± 0% 339k ± 0% -0.01% (p=0.001 n=10+10) GoTypes 1.18M ± 0% 1.17M ± 0% -0.12% (p=0.000 n=10+10) Compiler 4.94M ± 0% 4.94M ± 0% -0.06% (p=0.000 n=10+10) SSA 12.5M ± 0% 12.5M ± 0% -0.07% (p=0.000 n=10+10) Flate 223k ± 0% 223k ± 0% -0.11% (p=0.000 n=10+10) GoParser 281k ± 0% 281k ± 0% -0.08% (p=0.000 n=10+10) Reflect 963k ± 0% 960k ± 0% -0.23% (p=0.000 n=10+9) Tar 330k ± 0% 330k ± 0% -0.12% (p=0.000 n=10+10) XML 392k ± 0% 392k ± 0% -0.08% (p=0.000 n=10+10) [Geo mean] 761k 760k -0.10% Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps, plus these optimizations, is: name old time/op new time/op delta Template 186ms ± 1% 194ms ± 1% +4.41% (p=0.000 n=9+10) Unicode 96.5ms ± 1% 99.1ms ± 1% +2.76% (p=0.000 n=9+10) GoTypes 659ms ± 1% 689ms ± 0% +4.56% (p=0.000 n=9+10) Compiler 3.14s ± 2% 3.29s ± 1% +4.95% (p=0.000 n=9+9) SSA 7.68s ± 3% 8.02s ± 2% +4.41% (p=0.000 n=10+10) Flate 126ms ± 0% 131ms ± 1% +4.14% (p=0.000 n=10+9) GoParser 153ms ± 1% 162ms ± 1% +5.90% (p=0.000 n=10+10) Reflect 436ms ± 1% 454ms ± 0% +4.14% (p=0.000 n=10+8) Tar 177ms ± 1% 185ms ± 1% +4.28% (p=0.000 n=8+9) XML 224ms ± 1% 235ms ± 1% +5.23% (p=0.000 n=10+9) [Geo mean] 396ms 414ms +4.47% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 35.6MB ± 0% +3.24% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.5MB ± 0% +0.51% (p=0.000 n=9+10) GoTypes 113MB ± 0% 117MB ± 0% +3.31% (p=0.000 n=8+9) Compiler 509MB ± 0% 532MB ± 0% +4.46% (p=0.000 n=10+10) SSA 1.49GB ± 0% 1.55GB ± 0% +4.10% (p=0.000 n=10+10) Flate 23.8MB ± 0% 24.5MB ± 0% +2.92% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 28.7MB ± 0% +2.88% (p=0.000 n=10+10) Reflect 77.4MB ± 0% 80.5MB ± 0% +4.01% (p=0.000 n=10+10) Tar 34.1MB ± 0% 35.1MB ± 0% +3.12% (p=0.000 n=10+10) XML 42.6MB ± 0% 43.7MB ± 0% +2.65% (p=0.000 n=10+10) [Geo mean] 76.1MB 78.4MB +3.11% name old allocs/op new allocs/op delta Template 320k ± 0% 335k ± 0% +4.60% (p=0.000 n=10+10) Unicode 336k ± 0% 339k ± 0% +0.96% (p=0.000 n=9+10) GoTypes 1.12M ± 0% 1.18M ± 0% +4.55% (p=0.000 n=10+10) Compiler 4.66M ± 0% 4.94M ± 0% +6.18% (p=0.000 n=10+10) SSA 11.9M ± 0% 12.5M ± 0% +5.37% (p=0.000 n=10+10) Flate 214k ± 0% 223k ± 0% +4.15% (p=0.000 n=9+10) GoParser 270k ± 0% 281k ± 0% +4.15% (p=0.000 n=10+10) Reflect 921k ± 0% 963k ± 0% +4.49% (p=0.000 n=10+10) Tar 317k ± 0% 330k ± 0% +4.25% (p=0.000 n=10+10) XML 375k ± 0% 392k ± 0% +4.75% (p=0.000 n=10+10) [Geo mean] 729k 761k +4.34% Updates #24543. Change-Id: Ia951fdb3c17ae1c156e1d05fc42e69caba33c91a Reviewed-on: https://go-review.googlesource.com/110179 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2018-04-21 16:15:41 -04:00
cache.livenessMap = lv.livenessMap
}
}
// Emit the live pointer map data structures
ls := e.curfn.Func.lsym
fninfo := ls.Func()
fninfo.GCArgs, fninfo.GCLocals = lv.emit()
p := pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = fninfo.GCArgs
p = pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_LocalsPointerMaps)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = fninfo.GCLocals
return lv.livenessMap
}
// isfat reports whether a variable of type t needs multiple assignments to initialize.
// For example:
//
// type T struct { x, y int }
// x := T{x: 0, y: 1}
//
// Then we need:
//
// var t T
// t.x = 0
// t.y = 1
//
// to fully initialize t.
func isfat(t *types.Type) bool {
if t != nil {
switch t.Etype {
case TSLICE, TSTRING,
TINTER: // maybe remove later
return true
case TARRAY:
// Array of 1 element, check if element is fat
if t.NumElem() == 1 {
return isfat(t.Elem())
}
return true
case TSTRUCT:
// Struct with 1 field, check if field is fat
if t.NumFields() == 1 {
return isfat(t.Field(0).Type)
}
return true
}
}
return false
}