go/src/cmd/compile/internal/gc/inl.go

1228 lines
32 KiB
Go
Raw Normal View History

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// The inlining facility makes 2 passes: first caninl determines which
// functions are suitable for inlining, and for those that are it
// saves a copy of the body. Then inlcalls walks each function body to
// expand calls to inlinable functions.
//
// The debug['l'] flag controls the aggressiveness. Note that main() swaps level 0 and 1,
// making 1 the default and -l disable. Additional levels (beyond -l) may be buggy and
// are not supported.
// 0: disabled
// 1: 80-nodes leaf functions, oneliners, lazy typechecking (default)
// 2: (unassigned)
// 3: (unassigned)
// 4: allow non-leaf functions
//
// At some point this may get another default and become switch-offable with -N.
//
// The -d typcheckinl flag enables early typechecking of all imported bodies,
// which is useful to flush out bugs.
//
// The debug['m'] flag enables diagnostic output. a single -m is useful for verifying
// which calls get inlined or not, more is for debugging, and may go away at any point.
package gc
import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/src"
"fmt"
"strings"
)
// Get the function's package. For ordinary functions it's on the ->sym, but for imported methods
// the ->sym can be re-used in the local package, so peel it off the receiver's type.
func fnpkg(fn *Node) *types.Pkg {
if fn.IsMethod() {
// method
rcvr := fn.Type.Recv().Type
if rcvr.IsPtr() {
rcvr = rcvr.Elem()
}
if rcvr.Sym == nil {
Fatalf("receiver with no sym: [%v] %L (%v)", fn.Sym, fn, rcvr)
}
return rcvr.Sym.Pkg
}
// non-method
return fn.Sym.Pkg
}
// Lazy typechecking of imported bodies. For local functions, caninl will set ->typecheck
// because they're a copy of an already checked body.
func typecheckinl(fn *Node) {
lno := setlineno(fn)
// typecheckinl is only for imported functions;
// their bodies may refer to unsafe as long as the package
// was marked safe during import (which was checked then).
// the ->inl of a local function has been typechecked before caninl copied it.
pkg := fnpkg(fn)
if pkg == localpkg || pkg == nil {
return // typecheckinl on local function
}
cmd/compile: export inlined function bodies Completed implementation for exporting inlined functions using the new binary export format. This change passes (export GO_GCFLAGS=-newexport; make all.bash) but for gc's builtin_test.go which we need to adjust before enabling this code by default. For a high-level description of the export format see the comment at the top of bexport.go. Major changes: 1) The export format for the platform independent export data changed: When we export inlined function bodies, additional objects (other functions, types, etc.) that are referred to by the function bodies will need to be exported. While this doesn't affect the platform-independent portion directly, it adds more objects to the exportlist while we are exporting. Instead of trying to sort the objects into groups, just export objects as they appear in the export list. This is slightly less compact (one extra byte per object), but it is simpler and much more flexible. 2) The export format contains now three sections: 1) The plat- form independent objects, 2) the objects pulled in for export via inlined function bodies, and 3) the inlined function bodies. 3) Completed the exporting and importing code for inlined function bodies. The format is completely compiler-specific and easily changeable w/o affecting other tools. There is still quite a bit of room for denser encoding. This can happen at any time in the future. This change contains also the adjustments for go/internal/gcimporter, necessary because of the export format change 1) mentioned above. For #13241. Change-Id: I86bca0bd984b12ccf13d0d30892e6e25f6d04ed5 Reviewed-on: https://go-review.googlesource.com/21172 Run-TryBot: Robert Griesemer <gri@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-03-18 17:21:32 -07:00
if Debug['m'] > 2 || Debug_export != 0 {
fmt.Printf("typecheck import [%v] %L { %#v }\n", fn.Sym, fn, asNodes(fn.Func.Inl.Body))
}
save_safemode := safemode
safemode = false
savefn := Curfn
Curfn = fn
typecheckslice(fn.Func.Inl.Body, Etop)
Curfn = savefn
// During typechecking, declarations are added to
// Curfn.Func.Dcl. Move them to Inl.Dcl for consistency with
// how local functions behave. (Append because typecheckinl
// may be called multiple times.)
fn.Func.Inl.Dcl = append(fn.Func.Inl.Dcl, fn.Func.Dcl...)
fn.Func.Dcl = nil
safemode = save_safemode
lineno = lno
}
// Caninl determines whether fn is inlineable.
// If so, caninl saves fn->nbody in fn->inl and substitutes it with a copy.
// fn and ->nbody will already have been typechecked.
func caninl(fn *Node) {
if fn.Op != ODCLFUNC {
Fatalf("caninl %v", fn)
}
if fn.Func.Nname == nil {
Fatalf("caninl no nname %+v", fn)
}
var reason string // reason, if any, that the function was not inlined
if Debug['m'] > 1 {
defer func() {
if reason != "" {
fmt.Printf("%v: cannot inline %v: %s\n", fn.Line(), fn.Func.Nname, reason)
}
}()
}
// If marked "go:noinline", don't inline
if fn.Func.Pragma&Noinline != 0 {
reason = "marked go:noinline"
return
}
// If marked "go:cgo_unsafe_args", don't inline, since the
// function makes assumptions about its argument frame layout.
if fn.Func.Pragma&CgoUnsafeArgs != 0 {
reason = "marked go:cgo_unsafe_args"
return
}
// The nowritebarrierrec checker currently works at function
// granularity, so inlining yeswritebarrierrec functions can
// confuse it (#22342). As a workaround, disallow inlining
// them for now.
if fn.Func.Pragma&Yeswritebarrierrec != 0 {
reason = "marked go:yeswritebarrierrec"
return
}
// If fn has no body (is defined outside of Go), cannot inline it.
if fn.Nbody.Len() == 0 {
reason = "no function body"
return
}
if fn.Typecheck() == 0 {
Fatalf("caninl on non-typechecked function %v", fn)
}
n := fn.Func.Nname
if n.Func.InlinabilityChecked() {
return
}
defer n.Func.SetInlinabilityChecked(true)
const maxBudget = 80
visitor := hairyVisitor{budget: maxBudget}
if visitor.visitList(fn.Nbody) {
reason = visitor.reason
return
}
if visitor.budget < 0 {
reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", maxBudget-visitor.budget, maxBudget)
return
}
n.Func.Inl = &Inline{
Cost: maxBudget - visitor.budget,
Dcl: inlcopylist(n.Name.Defn.Func.Dcl),
Body: inlcopylist(fn.Nbody.Slice()),
}
// hack, TODO, check for better way to link method nodes back to the thing with the ->inl
// this is so export can find the body of a method
fn.Type.FuncType().Nname = asTypesNode(n)
if Debug['m'] > 1 {
fmt.Printf("%v: can inline %#v as: %#v { %#v }\n", fn.Line(), n, fn.Type, asNodes(n.Func.Inl.Body))
} else if Debug['m'] != 0 {
fmt.Printf("%v: can inline %v\n", fn.Line(), n)
}
}
cmd/compile: don't export unreachable inline method bodies Previously, anytime we exported a function or method declaration (which includes methods for every type transitively exported), we included the inline function bodies, if any. However, in many cases, it's impossible (or at least very unlikely) for the importing package to call the method. For example: package p type T int func (t T) M() { t.u() } func (t T) u() {} func (t T) v() {} T.M and T.u are inlineable, and they're both reachable through calls to T.M, which is exported. However, t.v is also inlineable, but cannot be reached. Exception: if p.T is embedded in another type q.U, p.T.v will be promoted to q.U.v, and the generated wrapper function could have inlined the call to p.T.v. However, in practice, this doesn't happen, and a missed inlining opportunity doesn't affect correctness. To implement this, this CL introduces an extra flood fill pass before exporting to mark inline bodies that are actually reachable, so the exporter can skip over methods like t.v. This reduces Kubernetes build time (as measured by "time go build -a k8s.io/kubernetes/cmd/...") on an HP Z620 measurably: == before == real 0m44.658s user 11m19.136s sys 0m53.844s == after == real 0m41.702s user 10m29.732s sys 0m50.908s It also significantly cuts down the cost of enabling mid-stack inlining (-l=4): == before (-l=4) == real 1m19.236s user 20m6.528s sys 1m17.328s == after (-l=4) == real 0m59.100s user 13m12.808s sys 0m58.776s Updates #19348. Change-Id: Iade58233ca42af823a1630517a53848b5d3c7a7e Reviewed-on: https://go-review.googlesource.com/74110 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2017-10-27 15:36:59 -07:00
// inlFlood marks n's inline body for export and recursively ensures
// all called functions are marked too.
func inlFlood(n *Node) {
if n == nil {
return
}
if n.Op != ONAME || n.Class() != PFUNC {
Fatalf("inlFlood: unexpected %v, %v, %v", n, n.Op, n.Class())
}
if n.Func == nil {
Fatalf("inlFlood: missing Func on %v", n)
}
if n.Func.Inl == nil {
cmd/compile: don't export unreachable inline method bodies Previously, anytime we exported a function or method declaration (which includes methods for every type transitively exported), we included the inline function bodies, if any. However, in many cases, it's impossible (or at least very unlikely) for the importing package to call the method. For example: package p type T int func (t T) M() { t.u() } func (t T) u() {} func (t T) v() {} T.M and T.u are inlineable, and they're both reachable through calls to T.M, which is exported. However, t.v is also inlineable, but cannot be reached. Exception: if p.T is embedded in another type q.U, p.T.v will be promoted to q.U.v, and the generated wrapper function could have inlined the call to p.T.v. However, in practice, this doesn't happen, and a missed inlining opportunity doesn't affect correctness. To implement this, this CL introduces an extra flood fill pass before exporting to mark inline bodies that are actually reachable, so the exporter can skip over methods like t.v. This reduces Kubernetes build time (as measured by "time go build -a k8s.io/kubernetes/cmd/...") on an HP Z620 measurably: == before == real 0m44.658s user 11m19.136s sys 0m53.844s == after == real 0m41.702s user 10m29.732s sys 0m50.908s It also significantly cuts down the cost of enabling mid-stack inlining (-l=4): == before (-l=4) == real 1m19.236s user 20m6.528s sys 1m17.328s == after (-l=4) == real 0m59.100s user 13m12.808s sys 0m58.776s Updates #19348. Change-Id: Iade58233ca42af823a1630517a53848b5d3c7a7e Reviewed-on: https://go-review.googlesource.com/74110 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2017-10-27 15:36:59 -07:00
return
}
if n.Func.ExportInline() {
return
}
n.Func.SetExportInline(true)
typecheckinl(n)
inspectList(asNodes(n.Func.Inl.Body), func(n *Node) bool {
cmd/compile: don't export unreachable inline method bodies Previously, anytime we exported a function or method declaration (which includes methods for every type transitively exported), we included the inline function bodies, if any. However, in many cases, it's impossible (or at least very unlikely) for the importing package to call the method. For example: package p type T int func (t T) M() { t.u() } func (t T) u() {} func (t T) v() {} T.M and T.u are inlineable, and they're both reachable through calls to T.M, which is exported. However, t.v is also inlineable, but cannot be reached. Exception: if p.T is embedded in another type q.U, p.T.v will be promoted to q.U.v, and the generated wrapper function could have inlined the call to p.T.v. However, in practice, this doesn't happen, and a missed inlining opportunity doesn't affect correctness. To implement this, this CL introduces an extra flood fill pass before exporting to mark inline bodies that are actually reachable, so the exporter can skip over methods like t.v. This reduces Kubernetes build time (as measured by "time go build -a k8s.io/kubernetes/cmd/...") on an HP Z620 measurably: == before == real 0m44.658s user 11m19.136s sys 0m53.844s == after == real 0m41.702s user 10m29.732s sys 0m50.908s It also significantly cuts down the cost of enabling mid-stack inlining (-l=4): == before (-l=4) == real 1m19.236s user 20m6.528s sys 1m17.328s == after (-l=4) == real 0m59.100s user 13m12.808s sys 0m58.776s Updates #19348. Change-Id: Iade58233ca42af823a1630517a53848b5d3c7a7e Reviewed-on: https://go-review.googlesource.com/74110 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2017-10-27 15:36:59 -07:00
switch n.Op {
case ONAME:
// Mark any referenced global variables or
// functions for reexport. Skip methods,
// because they're reexported alongside their
// receiver type.
if n.Class() == PEXTERN || n.Class() == PFUNC && !n.isMethodExpression() {
exportsym(n)
}
cmd/compile: don't export unreachable inline method bodies Previously, anytime we exported a function or method declaration (which includes methods for every type transitively exported), we included the inline function bodies, if any. However, in many cases, it's impossible (or at least very unlikely) for the importing package to call the method. For example: package p type T int func (t T) M() { t.u() } func (t T) u() {} func (t T) v() {} T.M and T.u are inlineable, and they're both reachable through calls to T.M, which is exported. However, t.v is also inlineable, but cannot be reached. Exception: if p.T is embedded in another type q.U, p.T.v will be promoted to q.U.v, and the generated wrapper function could have inlined the call to p.T.v. However, in practice, this doesn't happen, and a missed inlining opportunity doesn't affect correctness. To implement this, this CL introduces an extra flood fill pass before exporting to mark inline bodies that are actually reachable, so the exporter can skip over methods like t.v. This reduces Kubernetes build time (as measured by "time go build -a k8s.io/kubernetes/cmd/...") on an HP Z620 measurably: == before == real 0m44.658s user 11m19.136s sys 0m53.844s == after == real 0m41.702s user 10m29.732s sys 0m50.908s It also significantly cuts down the cost of enabling mid-stack inlining (-l=4): == before (-l=4) == real 1m19.236s user 20m6.528s sys 1m17.328s == after (-l=4) == real 0m59.100s user 13m12.808s sys 0m58.776s Updates #19348. Change-Id: Iade58233ca42af823a1630517a53848b5d3c7a7e Reviewed-on: https://go-review.googlesource.com/74110 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2017-10-27 15:36:59 -07:00
case OCALLFUNC, OCALLMETH:
// Recursively flood any functions called by
// this one.
cmd/compile: don't export unreachable inline method bodies Previously, anytime we exported a function or method declaration (which includes methods for every type transitively exported), we included the inline function bodies, if any. However, in many cases, it's impossible (or at least very unlikely) for the importing package to call the method. For example: package p type T int func (t T) M() { t.u() } func (t T) u() {} func (t T) v() {} T.M and T.u are inlineable, and they're both reachable through calls to T.M, which is exported. However, t.v is also inlineable, but cannot be reached. Exception: if p.T is embedded in another type q.U, p.T.v will be promoted to q.U.v, and the generated wrapper function could have inlined the call to p.T.v. However, in practice, this doesn't happen, and a missed inlining opportunity doesn't affect correctness. To implement this, this CL introduces an extra flood fill pass before exporting to mark inline bodies that are actually reachable, so the exporter can skip over methods like t.v. This reduces Kubernetes build time (as measured by "time go build -a k8s.io/kubernetes/cmd/...") on an HP Z620 measurably: == before == real 0m44.658s user 11m19.136s sys 0m53.844s == after == real 0m41.702s user 10m29.732s sys 0m50.908s It also significantly cuts down the cost of enabling mid-stack inlining (-l=4): == before (-l=4) == real 1m19.236s user 20m6.528s sys 1m17.328s == after (-l=4) == real 0m59.100s user 13m12.808s sys 0m58.776s Updates #19348. Change-Id: Iade58233ca42af823a1630517a53848b5d3c7a7e Reviewed-on: https://go-review.googlesource.com/74110 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2017-10-27 15:36:59 -07:00
inlFlood(asNode(n.Left.Type.Nname()))
}
return true
})
}
// hairyVisitor visits a function body to determine its inlining
// hairiness and whether or not it can be inlined.
type hairyVisitor struct {
budget int32
reason string
}
// Look for anything we want to punt on.
func (v *hairyVisitor) visitList(ll Nodes) bool {
for _, n := range ll.Slice() {
if v.visit(n) {
return true
}
}
return false
}
func (v *hairyVisitor) visit(n *Node) bool {
if n == nil {
return false
}
switch n.Op {
// Call is okay if inlinable and we have the budget for the body.
case OCALLFUNC:
if isIntrinsicCall(n) {
v.budget--
break
}
// Functions that call runtime.getcaller{pc,sp} can not be inlined
// because getcaller{pc,sp} expect a pointer to the caller's first argument.
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if n.Left.Op == ONAME && n.Left.Class() == PFUNC && isRuntimePkg(n.Left.Sym.Pkg) {
fn := n.Left.Sym.Name
if fn == "getcallerpc" || fn == "getcallersp" {
v.reason = "call to " + fn
return true
}
}
if fn := n.Left.Func; fn != nil && fn.Inl != nil {
v.budget -= fn.Inl.Cost
break
}
if n.Left.isMethodExpression() {
if d := asNode(n.Left.Sym.Def); d != nil && d.Func.Inl != nil {
v.budget -= d.Func.Inl.Cost
break
}
}
// TODO(mdempsky): Budget for OCLOSURE calls if we
// ever allow that. See #15561 and #23093.
if Debug['l'] < 4 {
v.reason = "non-leaf function"
return true
}
// Call is okay if inlinable and we have the budget for the body.
case OCALLMETH:
t := n.Left.Type
if t == nil {
Fatalf("no function type for [%p] %+v\n", n.Left, n.Left)
}
if t.Nname() == nil {
Fatalf("no function definition for [%p] %+v\n", t, t)
}
runtime: support a two-level arena map Currently, the heap arena map is a single, large array that covers every possible arena frame in the entire address space. This is practical up to about 48 bits of address space with 64 MB arenas. However, there are two problems with this: 1. mips64, ppc64, and s390x support full 64-bit address spaces (though on Linux only s390x has kernel support for 64-bit address spaces). On these platforms, it would be good to support these larger address spaces. 2. On Windows, processes are charged for untouched memory, so for processes with small heaps, the mostly-untouched 32 MB arena map plus a 64 MB arena are significant overhead. Hence, it would be good to reduce both the arena map size and the arena size, but with a single-level arena, these are inversely proportional. This CL adds support for a two-level arena map. Arena frame numbers are now divided into arenaL1Bits of L1 index and arenaL2Bits of L2 index. At the moment, arenaL1Bits is always 0, so we effectively have a single level map. We do a few things so that this has no cost beyond the current single-level map: 1. We embed the L2 array directly in mheap, so if there's a single entry in the L2 array, the representation is identical to the current representation and there's no extra level of indirection. 2. Hot code that accesses the arena map is structured so that it optimizes to nearly the same machine code as it does currently. 3. We make some small tweaks to hot code paths and to the inliner itself to keep some important functions inlined despite their now-larger ASTs. In particular, this is necessary for heapBitsForAddr and heapBits.next. Possibly as a result of some of the tweaks, this actually slightly improves the performance of the x/benchmarks garbage benchmark: name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.28ms ± 1% 2.26ms ± 1% -1.07% (p=0.000 n=17+19) (https://perf.golang.org/search?q=upload:20180223.2) For #23900. Change-Id: If5164e0961754f97eb9eca58f837f36d759505ff Reviewed-on: https://go-review.googlesource.com/96779 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
2018-02-22 20:38:09 -05:00
if isRuntimePkg(n.Left.Sym.Pkg) {
fn := n.Left.Sym.Name
if fn == "heapBits.nextArena" {
// Special case: explicitly allow
// mid-stack inlining of
// runtime.heapBits.next even though
// it calls slow-path
// runtime.heapBits.nextArena.
//
// TODO(austin): Once mid-stack
// inlining is the default, remove
// this special case.
break
}
}
if inlfn := asNode(t.FuncType().Nname).Func; inlfn.Inl != nil {
v.budget -= inlfn.Inl.Cost
break
}
if Debug['l'] < 4 {
v.reason = "non-leaf method"
return true
}
// Things that are too hairy, irrespective of the budget
case OCALL, OCALLINTER, OPANIC:
if Debug['l'] < 4 {
v.reason = "non-leaf op " + n.Op.String()
return true
}
case ORECOVER:
// recover matches the argument frame pointer to find
// the right panic value, so it needs an argument frame.
v.reason = "call to recover"
return true
case OCLOSURE,
OCALLPART,
ORANGE,
OFOR,
OFORUNTIL,
OSELECT,
OTYPESW,
OPROC,
ODEFER,
ODCLTYPE, // can't print yet
OBREAK,
ORETJMP:
v.reason = "unhandled op " + n.Op.String()
return true
case ODCLCONST, OEMPTY, OFALL, OLABEL:
// These nodes don't produce code; omit from inlining budget.
return false
case OIF:
if Isconst(n.Left, CTBOOL) {
// This if and the condition cost nothing.
return v.visitList(n.Ninit) || v.visitList(n.Nbody) ||
v.visitList(n.Rlist)
}
}
v.budget--
// TODO(mdempsky/josharian): Hacks to appease toolstash; remove.
// See issue 17566 and CL 31674 for discussion.
switch n.Op {
case OSTRUCTKEY:
v.budget--
case OSLICE, OSLICEARR, OSLICESTR:
v.budget--
case OSLICE3, OSLICE3ARR:
v.budget -= 2
}
// When debugging, don't stop early, to get full cost of inlining this function
if v.budget < 0 && Debug['m'] < 2 {
return true
}
return v.visit(n.Left) || v.visit(n.Right) ||
v.visitList(n.List) || v.visitList(n.Rlist) ||
v.visitList(n.Ninit) || v.visitList(n.Nbody)
}
// Inlcopy and inlcopylist recursively copy the body of a function.
// Any name-like node of non-local class is marked for re-export by adding it to
// the exportlist.
func inlcopylist(ll []*Node) []*Node {
s := make([]*Node, 0, len(ll))
for _, n := range ll {
s = append(s, inlcopy(n))
}
return s
}
func inlcopy(n *Node) *Node {
if n == nil {
return nil
}
switch n.Op {
case ONAME, OTYPE, OLITERAL:
return n
}
m := n.copy()
if m.Func != nil {
Fatalf("unexpected Func: %v", m)
}
m.Left = inlcopy(n.Left)
m.Right = inlcopy(n.Right)
m.List.Set(inlcopylist(n.List.Slice()))
m.Rlist.Set(inlcopylist(n.Rlist.Slice()))
m.Ninit.Set(inlcopylist(n.Ninit.Slice()))
m.Nbody.Set(inlcopylist(n.Nbody.Slice()))
return m
}
// Inlcalls/nodelist/node walks fn's statements and expressions and substitutes any
// calls made to inlineable functions. This is the external entry point.
func inlcalls(fn *Node) {
savefn := Curfn
Curfn = fn
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = inlnode(fn)
if fn != Curfn {
Fatalf("inlnode replaced curfn")
}
Curfn = savefn
}
// Turn an OINLCALL into a statement.
func inlconv2stmt(n *Node) {
n.Op = OBLOCK
// n->ninit stays
n.List.Set(n.Nbody.Slice())
n.Nbody.Set(nil)
n.Rlist.Set(nil)
}
// Turn an OINLCALL into a single valued expression.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of inlconv2expr MUST be assigned back to n, e.g.
// n.Left = inlconv2expr(n.Left)
func inlconv2expr(n *Node) *Node {
r := n.Rlist.First()
return addinit(r, append(n.Ninit.Slice(), n.Nbody.Slice()...))
}
// Turn the rlist (with the return values) of the OINLCALL in
// n into an expression list lumping the ninit and body
// containing the inlined statements on the first list element so
// order will be preserved Used in return, oas2func and call
// statements.
func inlconv2list(n *Node) []*Node {
if n.Op != OINLCALL || n.Rlist.Len() == 0 {
Fatalf("inlconv2list %+v\n", n)
}
s := n.Rlist.Slice()
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[0] = addinit(s[0], append(n.Ninit.Slice(), n.Nbody.Slice()...))
return s
}
func inlnodelist(l Nodes) {
s := l.Slice()
for i := range s {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i] = inlnode(s[i])
}
}
// inlnode recurses over the tree to find inlineable calls, which will
// be turned into OINLCALLs by mkinlcall. When the recursion comes
// back up will examine left, right, list, rlist, ninit, ntest, nincr,
// nbody and nelse and use one of the 4 inlconv/glue functions above
// to turn the OINLCALL into an expression, a statement, or patch it
// in to this nodes list or rlist as appropriate.
// NOTE it makes no sense to pass the glue functions down the
// recursion to the level where the OINLCALL gets created because they
// have to edit /this/ n, so you'd have to push that one down as well,
// but then you may as well do it here. so this is cleaner and
// shorter and less complicated.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of inlnode MUST be assigned back to n, e.g.
// n.Left = inlnode(n.Left)
func inlnode(n *Node) *Node {
if n == nil {
return n
}
switch n.Op {
// inhibit inlining of their argument
case ODEFER, OPROC:
switch n.Left.Op {
case OCALLFUNC, OCALLMETH:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.Left.SetNoInline(true)
}
return n
// TODO do them here (or earlier),
// so escape analysis can avoid more heapmoves.
case OCLOSURE:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
lno := setlineno(n)
inlnodelist(n.Ninit)
for _, n1 := range n.Ninit.Slice() {
if n1.Op == OINLCALL {
inlconv2stmt(n1)
}
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = inlnode(n.Left)
if n.Left != nil && n.Left.Op == OINLCALL {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = inlconv2expr(n.Left)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = inlnode(n.Right)
if n.Right != nil && n.Right.Op == OINLCALL {
if n.Op == OFOR || n.Op == OFORUNTIL {
inlconv2stmt(n.Right)
} else {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = inlconv2expr(n.Right)
}
}
inlnodelist(n.List)
switch n.Op {
case OBLOCK:
for _, n2 := range n.List.Slice() {
if n2.Op == OINLCALL {
inlconv2stmt(n2)
}
}
case ORETURN, OCALLFUNC, OCALLMETH, OCALLINTER, OAPPEND, OCOMPLEX:
// if we just replaced arg in f(arg()) or return arg with an inlined call
// and arg returns multiple values, glue as list
if n.List.Len() == 1 && n.List.First().Op == OINLCALL && n.List.First().Rlist.Len() > 1 {
n.List.Set(inlconv2list(n.List.First()))
break
}
fallthrough
default:
s := n.List.Slice()
for i1, n1 := range s {
if n1 != nil && n1.Op == OINLCALL {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i1] = inlconv2expr(s[i1])
}
}
}
inlnodelist(n.Rlist)
if n.Op == OAS2FUNC && n.Rlist.First().Op == OINLCALL {
n.Rlist.Set(inlconv2list(n.Rlist.First()))
n.Op = OAS2
n.SetTypecheck(0)
n = typecheck(n, Etop)
} else {
s := n.Rlist.Slice()
for i1, n1 := range s {
if n1.Op == OINLCALL {
if n.Op == OIF {
inlconv2stmt(n1)
} else {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i1] = inlconv2expr(s[i1])
}
}
}
}
inlnodelist(n.Nbody)
for _, n := range n.Nbody.Slice() {
if n.Op == OINLCALL {
inlconv2stmt(n)
}
}
// with all the branches out of the way, it is now time to
// transmogrify this node itself unless inhibited by the
// switch at the top of this function.
switch n.Op {
case OCALLFUNC, OCALLMETH:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if n.NoInline() {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
}
switch n.Op {
case OCALLFUNC:
if Debug['m'] > 3 {
fmt.Printf("%v:call to func %+v\n", n.Line(), n.Left)
}
if n.Left.Func != nil && n.Left.Func.Inl != nil && !isIntrinsicCall(n) { // normal case
n = mkinlcall(n, n.Left)
} else if n.Left.isMethodExpression() && asNode(n.Left.Sym.Def) != nil {
n = mkinlcall(n, asNode(n.Left.Sym.Def))
} else if n.Left.Op == OCLOSURE {
if f := inlinableClosure(n.Left); f != nil {
n = mkinlcall(n, f)
}
} else if n.Left.Op == ONAME && n.Left.Name != nil && n.Left.Name.Defn != nil {
if d := n.Left.Name.Defn; d.Op == OAS && d.Right.Op == OCLOSURE {
if f := inlinableClosure(d.Right); f != nil {
// NB: this check is necessary to prevent indirect re-assignment of the variable
// having the address taken after the invocation or only used for reads is actually fine
// but we have no easy way to distinguish the safe cases
if d.Left.Addrtaken() {
if Debug['m'] > 1 {
fmt.Printf("%v: cannot inline escaping closure variable %v\n", n.Line(), n.Left)
}
break
}
// ensure the variable is never re-assigned
if unsafe, a := reassigned(n.Left); unsafe {
if Debug['m'] > 1 {
if a != nil {
fmt.Printf("%v: cannot inline re-assigned closure variable at %v: %v\n", n.Line(), a.Line(), a)
} else {
fmt.Printf("%v: cannot inline global closure variable %v\n", n.Line(), n.Left)
}
}
break
}
n = mkinlcall(n, f)
}
}
}
case OCALLMETH:
if Debug['m'] > 3 {
fmt.Printf("%v:call to meth %L\n", n.Line(), n.Left.Right)
}
// typecheck should have resolved ODOTMETH->type, whose nname points to the actual function.
if n.Left.Type == nil {
Fatalf("no function type for [%p] %+v\n", n.Left, n.Left)
}
if n.Left.Type.Nname() == nil {
Fatalf("no function definition for [%p] %+v\n", n.Left.Type, n.Left.Type)
}
n = mkinlcall(n, asNode(n.Left.Type.FuncType().Nname))
}
lineno = lno
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
cmd/compile: inline closures with captures When inlining a closure with captured variables, walk up the param chain to find the one that is defined inside the scope into which the function is being inlined, and map occurrences of the captures to temporary inlvars, similarly to what is done for function parameters. No noticeable impact on compilation speed and binary size. Minor improvements to go1 benchmarks on darwin/amd64 name old time/op new time/op delta BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19) FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20) FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20) FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20) FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19) FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20) FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20) FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20) GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19) GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19) Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19) Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20) HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19) JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19) Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20) GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19) RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18) RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20) RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20) RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20) RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19) RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20) Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18) Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20) TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20) TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20) [Geo mean] 47.4µs 47.1µs -0.73% name old speed new speed delta GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19) GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19) Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19) Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20) JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19) GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19) RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19) RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20) RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20) RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20) RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19) RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20) Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18) Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20) [Geo mean] 124MB/s 126MB/s +1.54% Updates #15561 Updates #18270 Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e Reviewed-on: https://go-review.googlesource.com/72490 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-21 15:58:37 -07:00
// inlinableClosure takes an OCLOSURE node and follows linkage to the matching ONAME with
// the inlinable body. Returns nil if the function is not inlinable.
func inlinableClosure(n *Node) *Node {
c := n.Func.Closure
caninl(c)
f := c.Func.Nname
if f == nil || f.Func.Inl == nil {
cmd/compile: inline closures with captures When inlining a closure with captured variables, walk up the param chain to find the one that is defined inside the scope into which the function is being inlined, and map occurrences of the captures to temporary inlvars, similarly to what is done for function parameters. No noticeable impact on compilation speed and binary size. Minor improvements to go1 benchmarks on darwin/amd64 name old time/op new time/op delta BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19) FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20) FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20) FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20) FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19) FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20) FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20) FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20) GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19) GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19) Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19) Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20) HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19) JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19) Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20) GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19) RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18) RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20) RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20) RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20) RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19) RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20) Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18) Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20) TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20) TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20) [Geo mean] 47.4µs 47.1µs -0.73% name old speed new speed delta GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19) GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19) Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19) Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20) JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19) GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19) RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19) RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20) RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20) RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20) RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19) RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20) Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18) Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20) [Geo mean] 124MB/s 126MB/s +1.54% Updates #15561 Updates #18270 Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e Reviewed-on: https://go-review.googlesource.com/72490 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-21 15:58:37 -07:00
return nil
}
cmd/compile: inline closures with captures When inlining a closure with captured variables, walk up the param chain to find the one that is defined inside the scope into which the function is being inlined, and map occurrences of the captures to temporary inlvars, similarly to what is done for function parameters. No noticeable impact on compilation speed and binary size. Minor improvements to go1 benchmarks on darwin/amd64 name old time/op new time/op delta BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19) FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20) FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20) FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20) FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19) FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20) FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20) FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20) GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19) GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19) Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19) Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20) HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19) JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19) Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20) GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19) RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18) RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20) RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20) RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20) RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19) RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20) Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18) Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20) TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20) TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20) [Geo mean] 47.4µs 47.1µs -0.73% name old speed new speed delta GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19) GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19) Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19) Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20) JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19) GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19) RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19) RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20) RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20) RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20) RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19) RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20) Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18) Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20) [Geo mean] 124MB/s 126MB/s +1.54% Updates #15561 Updates #18270 Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e Reviewed-on: https://go-review.googlesource.com/72490 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-21 15:58:37 -07:00
return f
}
// reassigned takes an ONAME node, walks the function in which it is defined, and returns a boolean
// indicating whether the name has any assignments other than its declaration.
// The second return value is the first such assignment encountered in the walk, if any. It is mostly
// useful for -m output documenting the reason for inhibited optimizations.
// NB: global variables are always considered to be re-assigned.
// TODO: handle initial declaration not including an assignment and followed by a single assignment?
func reassigned(n *Node) (bool, *Node) {
if n.Op != ONAME {
Fatalf("reassigned %v", n)
}
// no way to reliably check for no-reassignment of globals, assume it can be
if n.Name.Curfn == nil {
return true, nil
}
f := n.Name.Curfn
// There just might be a good reason for this although this can be pretty surprising:
// local variables inside a closure have Curfn pointing to the OCLOSURE node instead
// of the corresponding ODCLFUNC.
// We need to walk the function body to check for reassignments so we follow the
// linkage to the ODCLFUNC node as that is where body is held.
if f.Op == OCLOSURE {
f = f.Func.Closure
}
v := reassignVisitor{name: n}
a := v.visitList(f.Nbody)
return a != nil, a
}
type reassignVisitor struct {
name *Node
}
func (v *reassignVisitor) visit(n *Node) *Node {
if n == nil {
return nil
}
switch n.Op {
case OAS:
if n.Left == v.name && n != v.name.Name.Defn {
return n
}
return nil
case OAS2, OAS2FUNC, OAS2MAPR, OAS2DOTTYPE:
for _, p := range n.List.Slice() {
if p == v.name && n != v.name.Name.Defn {
return n
}
}
return nil
}
if a := v.visit(n.Left); a != nil {
return a
}
if a := v.visit(n.Right); a != nil {
return a
}
if a := v.visitList(n.List); a != nil {
return a
}
if a := v.visitList(n.Rlist); a != nil {
return a
}
if a := v.visitList(n.Ninit); a != nil {
return a
}
if a := v.visitList(n.Nbody); a != nil {
return a
}
return nil
}
func (v *reassignVisitor) visitList(l Nodes) *Node {
for _, n := range l.Slice() {
if a := v.visit(n); a != nil {
return a
}
}
return nil
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of mkinlcall MUST be assigned back to n, e.g.
// n.Left = mkinlcall(n.Left, fn, isddd)
func mkinlcall(n *Node, fn *Node) *Node {
save_safemode := safemode
// imported functions may refer to unsafe as long as the
// package was marked safe during import (already checked).
pkg := fnpkg(fn)
if pkg != localpkg && pkg != nil {
safemode = false
}
n = mkinlcall1(n, fn)
safemode = save_safemode
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
func tinlvar(t *types.Field, inlvars map[*Node]*Node) *Node {
cmd/compile: replace Field.Nname.Pos with Field.Pos For struct fields and methods, Field.Nname was only used to store position information, which means we're allocating an entire ONAME Node+Name+Param structure just for one field. We can optimize away these ONAME allocations by instead adding a Field.Pos field. Unfortunately, we can't get rid of Field.Nname, because it's needed for function parameters, so Field grows a little bit and now has more redundant information in those cases. However, that was already the case (e.g., Field.Sym and Field.Nname.Sym), and it's still a net win for allocations as demonstrated by the benchmarks below. Additionally, by moving the ONAME allocation for function parameters to funcargs, we can avoid allocating them for function parameters that aren't used in corresponding function bodies (e.g., interface methods, function-typed variables, and imported functions/methods without inline bodies). name old time/op new time/op delta Template 254ms ± 6% 251ms ± 6% -1.04% (p=0.000 n=487+488) Unicode 128ms ± 7% 128ms ± 7% ~ (p=0.294 n=482+467) GoTypes 862ms ± 5% 860ms ± 4% ~ (p=0.075 n=488+471) Compiler 3.91s ± 4% 3.90s ± 4% -0.39% (p=0.000 n=468+473) name old user-time/op new user-time/op delta Template 339ms ±14% 336ms ±14% -1.02% (p=0.001 n=498+494) Unicode 176ms ±18% 176ms ±25% ~ (p=0.940 n=491+499) GoTypes 1.13s ± 8% 1.13s ± 9% ~ (p=0.157 n=496+493) Compiler 5.24s ± 6% 5.21s ± 6% -0.57% (p=0.000 n=485+489) name old alloc/op new alloc/op delta Template 38.3MB ± 0% 37.3MB ± 0% -2.58% (p=0.000 n=499+497) Unicode 29.1MB ± 0% 29.1MB ± 0% -0.03% (p=0.000 n=500+493) GoTypes 116MB ± 0% 115MB ± 0% -0.65% (p=0.000 n=498+499) Compiler 492MB ± 0% 487MB ± 0% -1.00% (p=0.000 n=497+498) name old allocs/op new allocs/op delta Template 364k ± 0% 360k ± 0% -1.15% (p=0.000 n=499+499) Unicode 336k ± 0% 336k ± 0% -0.01% (p=0.000 n=500+493) GoTypes 1.16M ± 0% 1.16M ± 0% -0.30% (p=0.000 n=499+499) Compiler 4.54M ± 0% 4.51M ± 0% -0.58% (p=0.000 n=494+495) Passes toolstash-check -gcflags=-dwarf=false. Changes DWARF output because position information is now tracked more precisely for function parameters. Change-Id: Ib8077d70d564cc448c5e4290baceab3a4396d712 Reviewed-on: https://go-review.googlesource.com/108217 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2018-04-18 22:57:10 -07:00
if n := asNode(t.Nname); n != nil && !n.isBlank() {
inlvar := inlvars[n]
if inlvar == nil {
cmd/compile: replace Field.Nname.Pos with Field.Pos For struct fields and methods, Field.Nname was only used to store position information, which means we're allocating an entire ONAME Node+Name+Param structure just for one field. We can optimize away these ONAME allocations by instead adding a Field.Pos field. Unfortunately, we can't get rid of Field.Nname, because it's needed for function parameters, so Field grows a little bit and now has more redundant information in those cases. However, that was already the case (e.g., Field.Sym and Field.Nname.Sym), and it's still a net win for allocations as demonstrated by the benchmarks below. Additionally, by moving the ONAME allocation for function parameters to funcargs, we can avoid allocating them for function parameters that aren't used in corresponding function bodies (e.g., interface methods, function-typed variables, and imported functions/methods without inline bodies). name old time/op new time/op delta Template 254ms ± 6% 251ms ± 6% -1.04% (p=0.000 n=487+488) Unicode 128ms ± 7% 128ms ± 7% ~ (p=0.294 n=482+467) GoTypes 862ms ± 5% 860ms ± 4% ~ (p=0.075 n=488+471) Compiler 3.91s ± 4% 3.90s ± 4% -0.39% (p=0.000 n=468+473) name old user-time/op new user-time/op delta Template 339ms ±14% 336ms ±14% -1.02% (p=0.001 n=498+494) Unicode 176ms ±18% 176ms ±25% ~ (p=0.940 n=491+499) GoTypes 1.13s ± 8% 1.13s ± 9% ~ (p=0.157 n=496+493) Compiler 5.24s ± 6% 5.21s ± 6% -0.57% (p=0.000 n=485+489) name old alloc/op new alloc/op delta Template 38.3MB ± 0% 37.3MB ± 0% -2.58% (p=0.000 n=499+497) Unicode 29.1MB ± 0% 29.1MB ± 0% -0.03% (p=0.000 n=500+493) GoTypes 116MB ± 0% 115MB ± 0% -0.65% (p=0.000 n=498+499) Compiler 492MB ± 0% 487MB ± 0% -1.00% (p=0.000 n=497+498) name old allocs/op new allocs/op delta Template 364k ± 0% 360k ± 0% -1.15% (p=0.000 n=499+499) Unicode 336k ± 0% 336k ± 0% -0.01% (p=0.000 n=500+493) GoTypes 1.16M ± 0% 1.16M ± 0% -0.30% (p=0.000 n=499+499) Compiler 4.54M ± 0% 4.51M ± 0% -0.58% (p=0.000 n=494+495) Passes toolstash-check -gcflags=-dwarf=false. Changes DWARF output because position information is now tracked more precisely for function parameters. Change-Id: Ib8077d70d564cc448c5e4290baceab3a4396d712 Reviewed-on: https://go-review.googlesource.com/108217 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2018-04-18 22:57:10 -07:00
Fatalf("missing inlvar for %v\n", n)
}
return inlvar
}
return typecheck(nblank, Erv|Easgn)
}
var inlgen int
// If n is a call, and fn is a function with an inlinable body,
// return an OINLCALL.
// On return ninit has the parameter assignments, the nbody is the
// inlined function body and list, rlist contain the input, output
// parameters.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of mkinlcall1 MUST be assigned back to n, e.g.
// n.Left = mkinlcall1(n.Left, fn, isddd)
func mkinlcall1(n, fn *Node) *Node {
if fn.Func.Inl == nil {
// No inlinable body.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
if fn == Curfn || fn.Name.Defn == Curfn {
// Can't recursively inline a function into itself.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
if instrumenting && isRuntimePkg(fn.Sym.Pkg) {
// Runtime package must not be instrumented.
// Instrument skips runtime package. However, some runtime code can be
// inlined into other packages and instrumented there. To avoid this,
// we disable inlining of runtime functions when instrumenting.
// The example that we observed is inlining of LockOSThread,
// which lead to false race reports on m contents.
return n
}
if Debug_typecheckinl == 0 {
typecheckinl(fn)
}
// We have a function node, and it has an inlineable body.
if Debug['m'] > 1 {
fmt.Printf("%v: inlining call to %v %#v { %#v }\n", n.Line(), fn.Sym, fn.Type, asNodes(fn.Func.Inl.Body))
} else if Debug['m'] != 0 {
fmt.Printf("%v: inlining call to %v\n", n.Line(), fn)
}
if Debug['m'] > 2 {
fmt.Printf("%v: Before inlining: %+v\n", n.Line(), n)
}
ninit := n.Ninit
cmd/compile: inline closures with captures When inlining a closure with captured variables, walk up the param chain to find the one that is defined inside the scope into which the function is being inlined, and map occurrences of the captures to temporary inlvars, similarly to what is done for function parameters. No noticeable impact on compilation speed and binary size. Minor improvements to go1 benchmarks on darwin/amd64 name old time/op new time/op delta BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19) FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20) FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20) FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20) FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19) FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20) FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20) FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20) GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19) GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19) Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19) Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20) HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19) JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19) Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20) GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19) RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18) RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20) RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20) RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20) RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19) RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20) Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18) Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20) TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20) TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20) [Geo mean] 47.4µs 47.1µs -0.73% name old speed new speed delta GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19) GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19) Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19) Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20) JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19) GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19) RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19) RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20) RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20) RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20) RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19) RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20) Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18) Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20) [Geo mean] 124MB/s 126MB/s +1.54% Updates #15561 Updates #18270 Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e Reviewed-on: https://go-review.googlesource.com/72490 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-21 15:58:37 -07:00
// Make temp names to use instead of the originals.
inlvars := make(map[*Node]*Node)
// record formals/locals for later post-processing
var inlfvars []*Node
// Handle captured variables when inlining closures.
if fn.Name.Defn != nil {
cmd/compile: inline closures with captures When inlining a closure with captured variables, walk up the param chain to find the one that is defined inside the scope into which the function is being inlined, and map occurrences of the captures to temporary inlvars, similarly to what is done for function parameters. No noticeable impact on compilation speed and binary size. Minor improvements to go1 benchmarks on darwin/amd64 name old time/op new time/op delta BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19) FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20) FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20) FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20) FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19) FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20) FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20) FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20) GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19) GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19) Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19) Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20) HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19) JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19) Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20) GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19) RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18) RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20) RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20) RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20) RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19) RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20) Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18) Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20) TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20) TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20) [Geo mean] 47.4µs 47.1µs -0.73% name old speed new speed delta GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19) GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19) Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19) Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20) JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19) GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19) RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19) RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20) RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20) RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20) RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19) RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20) Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18) Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20) [Geo mean] 124MB/s 126MB/s +1.54% Updates #15561 Updates #18270 Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e Reviewed-on: https://go-review.googlesource.com/72490 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-21 15:58:37 -07:00
if c := fn.Name.Defn.Func.Closure; c != nil {
for _, v := range c.Func.Closure.Func.Cvars.Slice() {
cmd/compile: inline closures with captures When inlining a closure with captured variables, walk up the param chain to find the one that is defined inside the scope into which the function is being inlined, and map occurrences of the captures to temporary inlvars, similarly to what is done for function parameters. No noticeable impact on compilation speed and binary size. Minor improvements to go1 benchmarks on darwin/amd64 name old time/op new time/op delta BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19) FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20) FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20) FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20) FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19) FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20) FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20) FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20) GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19) GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19) Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19) Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20) HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19) JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19) Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20) GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19) RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18) RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20) RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20) RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20) RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19) RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20) Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18) Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20) TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20) TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20) [Geo mean] 47.4µs 47.1µs -0.73% name old speed new speed delta GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19) GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19) Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19) Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20) JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20) JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19) GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19) RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20) RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20) RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19) RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20) RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20) RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20) RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19) RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20) Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18) Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20) [Geo mean] 124MB/s 126MB/s +1.54% Updates #15561 Updates #18270 Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e Reviewed-on: https://go-review.googlesource.com/72490 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-21 15:58:37 -07:00
if v.Op == OXXX {
continue
}
o := v.Name.Param.Outer
// make sure the outer param matches the inlining location
// NB: if we enabled inlining of functions containing OCLOSURE or refined
// the reassigned check via some sort of copy propagation this would most
// likely need to be changed to a loop to walk up to the correct Param
if o == nil || (o.Name.Curfn != Curfn && o.Name.Curfn.Func.Closure != Curfn) {
Fatalf("%v: unresolvable capture %v %v\n", n.Line(), fn, v)
}
if v.Name.Byval() {
iv := typecheck(inlvar(v), Erv)
ninit.Append(nod(ODCL, iv, nil))
ninit.Append(typecheck(nod(OAS, iv, o), Etop))
inlvars[v] = iv
} else {
addr := newname(lookup("&" + v.Sym.Name))
addr.Type = types.NewPtr(v.Type)
ia := typecheck(inlvar(addr), Erv)
ninit.Append(nod(ODCL, ia, nil))
ninit.Append(typecheck(nod(OAS, ia, nod(OADDR, o, nil)), Etop))
inlvars[addr] = ia
// When capturing by reference, all occurrence of the captured var
// must be substituted with dereference of the temporary address
inlvars[v] = typecheck(nod(OIND, ia, nil), Erv)
}
}
}
}
for _, ln := range fn.Func.Inl.Dcl {
if ln.Op != ONAME {
continue
}
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if ln.Class() == PPARAMOUT { // return values handled below.
continue
}
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if ln.isParamStackCopy() { // ignore the on-stack copy of a parameter that moved to the heap
continue
}
inlvars[ln] = typecheck(inlvar(ln), Erv)
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if ln.Class() == PPARAM || ln.Name.Param.Stackcopy != nil && ln.Name.Param.Stackcopy.Class() == PPARAM {
ninit.Append(nod(ODCL, inlvars[ln], nil))
}
if genDwarfInline > 0 {
inlf := inlvars[ln]
if ln.Class() == PPARAM {
inlf.SetInlFormal(true)
} else {
inlf.SetInlLocal(true)
}
inlf.Pos = ln.Pos
inlfvars = append(inlfvars, inlf)
}
}
// temporaries for return values.
var retvars []*Node
for i, t := range fn.Type.Results().Fields().Slice() {
var m *Node
cmd/compile: replace Field.Nname.Pos with Field.Pos For struct fields and methods, Field.Nname was only used to store position information, which means we're allocating an entire ONAME Node+Name+Param structure just for one field. We can optimize away these ONAME allocations by instead adding a Field.Pos field. Unfortunately, we can't get rid of Field.Nname, because it's needed for function parameters, so Field grows a little bit and now has more redundant information in those cases. However, that was already the case (e.g., Field.Sym and Field.Nname.Sym), and it's still a net win for allocations as demonstrated by the benchmarks below. Additionally, by moving the ONAME allocation for function parameters to funcargs, we can avoid allocating them for function parameters that aren't used in corresponding function bodies (e.g., interface methods, function-typed variables, and imported functions/methods without inline bodies). name old time/op new time/op delta Template 254ms ± 6% 251ms ± 6% -1.04% (p=0.000 n=487+488) Unicode 128ms ± 7% 128ms ± 7% ~ (p=0.294 n=482+467) GoTypes 862ms ± 5% 860ms ± 4% ~ (p=0.075 n=488+471) Compiler 3.91s ± 4% 3.90s ± 4% -0.39% (p=0.000 n=468+473) name old user-time/op new user-time/op delta Template 339ms ±14% 336ms ±14% -1.02% (p=0.001 n=498+494) Unicode 176ms ±18% 176ms ±25% ~ (p=0.940 n=491+499) GoTypes 1.13s ± 8% 1.13s ± 9% ~ (p=0.157 n=496+493) Compiler 5.24s ± 6% 5.21s ± 6% -0.57% (p=0.000 n=485+489) name old alloc/op new alloc/op delta Template 38.3MB ± 0% 37.3MB ± 0% -2.58% (p=0.000 n=499+497) Unicode 29.1MB ± 0% 29.1MB ± 0% -0.03% (p=0.000 n=500+493) GoTypes 116MB ± 0% 115MB ± 0% -0.65% (p=0.000 n=498+499) Compiler 492MB ± 0% 487MB ± 0% -1.00% (p=0.000 n=497+498) name old allocs/op new allocs/op delta Template 364k ± 0% 360k ± 0% -1.15% (p=0.000 n=499+499) Unicode 336k ± 0% 336k ± 0% -0.01% (p=0.000 n=500+493) GoTypes 1.16M ± 0% 1.16M ± 0% -0.30% (p=0.000 n=499+499) Compiler 4.54M ± 0% 4.51M ± 0% -0.58% (p=0.000 n=494+495) Passes toolstash-check -gcflags=-dwarf=false. Changes DWARF output because position information is now tracked more precisely for function parameters. Change-Id: Ib8077d70d564cc448c5e4290baceab3a4396d712 Reviewed-on: https://go-review.googlesource.com/108217 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2018-04-18 22:57:10 -07:00
mpos := t.Pos
if n := asNode(t.Nname); n != nil && !n.isBlank() {
m = inlvar(n)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
m = typecheck(m, Erv)
cmd/compile: replace Field.Nname.Pos with Field.Pos For struct fields and methods, Field.Nname was only used to store position information, which means we're allocating an entire ONAME Node+Name+Param structure just for one field. We can optimize away these ONAME allocations by instead adding a Field.Pos field. Unfortunately, we can't get rid of Field.Nname, because it's needed for function parameters, so Field grows a little bit and now has more redundant information in those cases. However, that was already the case (e.g., Field.Sym and Field.Nname.Sym), and it's still a net win for allocations as demonstrated by the benchmarks below. Additionally, by moving the ONAME allocation for function parameters to funcargs, we can avoid allocating them for function parameters that aren't used in corresponding function bodies (e.g., interface methods, function-typed variables, and imported functions/methods without inline bodies). name old time/op new time/op delta Template 254ms ± 6% 251ms ± 6% -1.04% (p=0.000 n=487+488) Unicode 128ms ± 7% 128ms ± 7% ~ (p=0.294 n=482+467) GoTypes 862ms ± 5% 860ms ± 4% ~ (p=0.075 n=488+471) Compiler 3.91s ± 4% 3.90s ± 4% -0.39% (p=0.000 n=468+473) name old user-time/op new user-time/op delta Template 339ms ±14% 336ms ±14% -1.02% (p=0.001 n=498+494) Unicode 176ms ±18% 176ms ±25% ~ (p=0.940 n=491+499) GoTypes 1.13s ± 8% 1.13s ± 9% ~ (p=0.157 n=496+493) Compiler 5.24s ± 6% 5.21s ± 6% -0.57% (p=0.000 n=485+489) name old alloc/op new alloc/op delta Template 38.3MB ± 0% 37.3MB ± 0% -2.58% (p=0.000 n=499+497) Unicode 29.1MB ± 0% 29.1MB ± 0% -0.03% (p=0.000 n=500+493) GoTypes 116MB ± 0% 115MB ± 0% -0.65% (p=0.000 n=498+499) Compiler 492MB ± 0% 487MB ± 0% -1.00% (p=0.000 n=497+498) name old allocs/op new allocs/op delta Template 364k ± 0% 360k ± 0% -1.15% (p=0.000 n=499+499) Unicode 336k ± 0% 336k ± 0% -0.01% (p=0.000 n=500+493) GoTypes 1.16M ± 0% 1.16M ± 0% -0.30% (p=0.000 n=499+499) Compiler 4.54M ± 0% 4.51M ± 0% -0.58% (p=0.000 n=494+495) Passes toolstash-check -gcflags=-dwarf=false. Changes DWARF output because position information is now tracked more precisely for function parameters. Change-Id: Ib8077d70d564cc448c5e4290baceab3a4396d712 Reviewed-on: https://go-review.googlesource.com/108217 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2018-04-18 22:57:10 -07:00
inlvars[n] = m
} else {
// anonymous return values, synthesize names for use in assignment that replaces return
m = retvar(t, i)
}
if genDwarfInline > 0 {
// Don't update the src.Pos on a return variable if it
// was manufactured by the inliner (e.g. "~R2"); such vars
// were not part of the original callee.
if !strings.HasPrefix(m.Sym.Name, "~R") {
m.SetInlFormal(true)
m.Pos = mpos
inlfvars = append(inlfvars, m)
}
}
ninit.Append(nod(ODCL, m, nil))
retvars = append(retvars, m)
}
// Assign arguments to the parameters' temp names.
as := nod(OAS2, nil, nil)
as.Rlist.Set(n.List.Slice())
// For non-dotted calls to variadic functions, we assign the
// variadic parameter's temp name separately.
var vas *Node
if fn.IsMethod() {
rcv := fn.Type.Recv()
if n.Left.Op == ODOTMETH {
// For x.M(...), assign x directly to the
// receiver parameter.
if n.Left.Left == nil {
Fatalf("method call without receiver: %+v", n)
}
ras := nod(OAS, tinlvar(rcv, inlvars), n.Left.Left)
ras = typecheck(ras, Etop)
ninit.Append(ras)
} else {
// For T.M(...), add the receiver parameter to
// as.List, so it's assigned by the normal
// arguments.
if as.Rlist.Len() == 0 {
Fatalf("non-method call to method without first arg: %+v", n)
}
as.List.Append(tinlvar(rcv, inlvars))
}
}
for _, param := range fn.Type.Params().Fields().Slice() {
// For ordinary parameters or variadic parameters in
// dotted calls, just add the variable to the
// assignment list, and we're done.
if !param.Isddd() || n.Isddd() {
as.List.Append(tinlvar(param, inlvars))
continue
}
// Otherwise, we need to collect the remaining values
// to pass as a slice.
numvals := n.List.Len()
if numvals == 1 && n.List.First().Type.IsFuncArgStruct() {
numvals = n.List.First().Type.NumFields()
}
x := as.List.Len()
for as.List.Len() < numvals {
as.List.Append(argvar(param.Type, as.List.Len()))
}
varargs := as.List.Slice()[x:]
vas = nod(OAS, tinlvar(param, inlvars), nil)
if len(varargs) == 0 {
vas.Right = nodnil()
vas.Right.Type = param.Type
} else {
vas.Right = nod(OCOMPLIT, nil, typenod(param.Type))
vas.Right.List.Set(varargs)
}
}
if as.Rlist.Len() != 0 {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
as = typecheck(as, Etop)
ninit.Append(as)
}
if vas != nil {
vas = typecheck(vas, Etop)
ninit.Append(vas)
}
// Zero the return parameters.
for _, n := range retvars {
ras := nod(OAS, n, nil)
ras = typecheck(ras, Etop)
ninit.Append(ras)
}
retlabel := autolabel(".i")
inlgen++
parent := -1
if b := Ctxt.PosTable.Pos(n.Pos).Base(); b != nil {
parent = b.InliningIndex()
}
newIndex := Ctxt.InlTree.Add(parent, n.Pos, fn.Sym.Linksym())
if genDwarfInline > 0 {
if !fn.Sym.Linksym().WasInlined() {
Ctxt.DwFixups.SetPrecursorFunc(fn.Sym.Linksym(), fn)
fn.Sym.Linksym().Set(obj.AttrWasInlined, true)
}
}
subst := inlsubst{
retlabel: retlabel,
retvars: retvars,
inlvars: inlvars,
bases: make(map[*src.PosBase]*src.PosBase),
newInlIndex: newIndex,
}
body := subst.list(asNodes(fn.Func.Inl.Body))
lab := nod(OLABEL, retlabel, nil)
body = append(body, lab)
typecheckslice(body, Etop)
if genDwarfInline > 0 {
for _, v := range inlfvars {
v.Pos = subst.updatedPos(v.Pos)
}
}
//dumplist("ninit post", ninit);
call := nod(OINLCALL, nil, nil)
call.Ninit.Set(ninit.Slice())
call.Nbody.Set(body)
call.Rlist.Set(retvars)
call.Type = n.Type
call.SetTypecheck(1)
// transitive inlining
// might be nice to do this before exporting the body,
// but can't emit the body with inlining expanded.
// instead we emit the things that the body needs
// and each use must redo the inlining.
// luckily these are small.
inlnodelist(call.Nbody)
for _, n := range call.Nbody.Slice() {
if n.Op == OINLCALL {
inlconv2stmt(n)
}
}
if Debug['m'] > 2 {
fmt.Printf("%v: After inlining %+v\n\n", call.Line(), call)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return call
}
// Every time we expand a function we generate a new set of tmpnames,
// PAUTO's in the calling functions, and link them off of the
// PPARAM's, PAUTOS and PPARAMOUTs of the called function.
func inlvar(var_ *Node) *Node {
if Debug['m'] > 3 {
fmt.Printf("inlvar %+v\n", var_)
}
n := newname(var_.Sym)
n.Type = var_.Type
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
n.SetClass(PAUTO)
n.Name.SetUsed(true)
n.Name.Curfn = Curfn // the calling function, not the called one
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.SetAddrtaken(var_.Addrtaken())
Curfn.Func.Dcl = append(Curfn.Func.Dcl, n)
return n
}
// Synthesize a variable to store the inlined function's results in.
func retvar(t *types.Field, i int) *Node {
n := newname(lookupN("~R", i))
n.Type = t.Type
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
n.SetClass(PAUTO)
n.Name.SetUsed(true)
n.Name.Curfn = Curfn // the calling function, not the called one
Curfn.Func.Dcl = append(Curfn.Func.Dcl, n)
return n
}
// Synthesize a variable to store the inlined function's arguments
// when they come from a multiple return call.
func argvar(t *types.Type, i int) *Node {
n := newname(lookupN("~arg", i))
n.Type = t.Elem()
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
n.SetClass(PAUTO)
n.Name.SetUsed(true)
n.Name.Curfn = Curfn // the calling function, not the called one
Curfn.Func.Dcl = append(Curfn.Func.Dcl, n)
return n
}
// The inlsubst type implements the actual inlining of a single
// function call.
type inlsubst struct {
// Target of the goto substituted in place of a return.
retlabel *Node
// Temporary result variables.
retvars []*Node
inlvars map[*Node]*Node
// bases maps from original PosBase to PosBase with an extra
// inlined call frame.
bases map[*src.PosBase]*src.PosBase
// newInlIndex is the index of the inlined call frame to
// insert for inlined nodes.
newInlIndex int
}
// list inlines a list of nodes.
func (subst *inlsubst) list(ll Nodes) []*Node {
s := make([]*Node, 0, ll.Len())
for _, n := range ll.Slice() {
s = append(s, subst.node(n))
}
return s
}
// node recursively copies a node from the saved pristine body of the
// inlined function, substituting references to input/output
// parameters with ones to the tmpnames, and substituting returns with
// assignments to the output.
func (subst *inlsubst) node(n *Node) *Node {
if n == nil {
return nil
}
switch n.Op {
case ONAME:
if inlvar := subst.inlvars[n]; inlvar != nil { // These will be set during inlnode
if Debug['m'] > 2 {
fmt.Printf("substituting name %+v -> %+v\n", n, inlvar)
}
return inlvar
}
if Debug['m'] > 2 {
fmt.Printf("not substituting name %+v\n", n)
}
return n
case OLITERAL, OTYPE:
// If n is a named constant or type, we can continue
// using it in the inline copy. Otherwise, make a copy
// so we can update the line number.
if n.Sym != nil {
return n
}
// Since we don't handle bodies with closures, this return is guaranteed to belong to the current inlined function.
// dump("Return before substitution", n);
case ORETURN:
m := nod(OGOTO, subst.retlabel, nil)
m.Ninit.Set(subst.list(n.Ninit))
if len(subst.retvars) != 0 && n.List.Len() != 0 {
as := nod(OAS2, nil, nil)
// Make a shallow copy of retvars.
// Otherwise OINLCALL.Rlist will be the same list,
// and later walk and typecheck may clobber it.
for _, n := range subst.retvars {
as.List.Append(n)
}
as.Rlist.Set(subst.list(n.List))
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
as = typecheck(as, Etop)
m.Ninit.Append(as)
}
typecheckslice(m.Ninit.Slice(), Etop)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
m = typecheck(m, Etop)
// dump("Return after substitution", m);
return m
case OGOTO, OLABEL:
m := n.copy()
m.Pos = subst.updatedPos(m.Pos)
m.Ninit.Set(nil)
p := fmt.Sprintf("%s·%d", n.Left.Sym.Name, inlgen)
m.Left = newname(lookup(p))
return m
}
m := n.copy()
m.Pos = subst.updatedPos(m.Pos)
m.Ninit.Set(nil)
if n.Op == OCLOSURE {
Fatalf("cannot inline function containing closure: %+v", n)
}
m.Left = subst.node(n.Left)
m.Right = subst.node(n.Right)
m.List.Set(subst.list(n.List))
m.Rlist.Set(subst.list(n.Rlist))
m.Ninit.Set(append(m.Ninit.Slice(), subst.list(n.Ninit)...))
m.Nbody.Set(subst.list(n.Nbody))
return m
}
func (subst *inlsubst) updatedPos(xpos src.XPos) src.XPos {
pos := Ctxt.PosTable.Pos(xpos)
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
oldbase := pos.Base() // can be nil
newbase := subst.bases[oldbase]
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
if newbase == nil {
newbase = src.NewInliningBase(oldbase, subst.newInlIndex)
subst.bases[oldbase] = newbase
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
}
pos.SetBase(newbase)
return Ctxt.PosTable.XPos(pos)
}