go/src/cmd/compile/internal/gc/walk.go

3906 lines
94 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc
import (
"cmd/compile/internal/types"
"cmd/internal/objabi"
"cmd/internal/sys"
"fmt"
"strings"
)
// The constant is known to runtime.
const (
tmpstringbufsize = 32
)
func walk(fn *Node) {
Curfn = fn
if Debug['W'] != 0 {
s := fmt.Sprintf("\nbefore %v", Curfn.Func.Nname.Sym)
dumplist(s, Curfn.Nbody)
}
lno := lineno
// Final typecheck for any unused variables.
for i, ln := range fn.Func.Dcl {
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if ln.Op == ONAME && (ln.Class() == PAUTO || ln.Class() == PAUTOHEAP) {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
ln = typecheck(ln, Erv|Easgn)
fn.Func.Dcl[i] = ln
}
}
// Propagate the used flag for typeswitch variables up to the NONAME in it's definition.
for _, ln := range fn.Func.Dcl {
if ln.Op == ONAME && (ln.Class() == PAUTO || ln.Class() == PAUTOHEAP) && ln.Name.Defn != nil && ln.Name.Defn.Op == OTYPESW && ln.Name.Used() {
ln.Name.Defn.Left.Name.SetUsed(true)
}
}
for _, ln := range fn.Func.Dcl {
if ln.Op != ONAME || (ln.Class() != PAUTO && ln.Class() != PAUTOHEAP) || ln.Sym.Name[0] == '&' || ln.Name.Used() {
continue
}
if defn := ln.Name.Defn; defn != nil && defn.Op == OTYPESW {
if defn.Left.Name.Used() {
continue
}
yyerrorl(defn.Left.Pos, "%v declared and not used", ln.Sym)
defn.Left.Name.SetUsed(true) // suppress repeats
} else {
yyerrorl(ln.Pos, "%v declared and not used", ln.Sym)
}
}
lineno = lno
if nerrors != 0 {
return
}
walkstmtlist(Curfn.Nbody.Slice())
if Debug['W'] != 0 {
s := fmt.Sprintf("after walk %v", Curfn.Func.Nname.Sym)
dumplist(s, Curfn.Nbody)
}
zeroResults()
heapmoves()
if Debug['W'] != 0 && Curfn.Func.Enter.Len() > 0 {
s := fmt.Sprintf("enter %v", Curfn.Func.Nname.Sym)
dumplist(s, Curfn.Func.Enter)
}
}
func walkstmtlist(s []*Node) {
for i := range s {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i] = walkstmt(s[i])
}
}
func samelist(a, b []*Node) bool {
if len(a) != len(b) {
return false
}
for i, n := range a {
if n != b[i] {
return false
}
}
return true
}
func paramoutheap(fn *Node) bool {
for _, ln := range fn.Func.Dcl {
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch ln.Class() {
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
case PPARAMOUT:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if ln.isParamStackCopy() || ln.Addrtaken() {
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
return true
}
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
case PAUTO:
// stop early - parameters are over
return false
}
}
return false
}
// adds "adjust" to all the argument locations for the call n.
// n must be a defer or go node that has already been walked.
func adjustargs(n *Node, adjust int) {
callfunc := n.Left
for _, arg := range callfunc.List.Slice() {
if arg.Op != OAS {
Fatalf("call arg not assignment")
}
lhs := arg.Left
if lhs.Op == ONAME {
// This is a temporary introduced by reorder1.
// The real store to the stack appears later in the arg list.
continue
}
if lhs.Op != OINDREGSP {
Fatalf("call argument store does not use OINDREGSP")
}
// can't really check this in machine-indep code.
//if(lhs->val.u.reg != D_SP)
// Fatalf("call arg assign not indreg(SP)")
lhs.Xoffset += int64(adjust)
}
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of walkstmt MUST be assigned back to n, e.g.
// n.Left = walkstmt(n.Left)
func walkstmt(n *Node) *Node {
if n == nil {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
setlineno(n)
walkstmtlist(n.Ninit.Slice())
switch n.Op {
default:
if n.Op == ONAME {
yyerror("%v is not a top level statement", n.Sym)
} else {
yyerror("%v is not a top level statement", n.Op)
}
Dump("nottop", n)
case OAS,
OASOP,
OAS2,
OAS2DOTTYPE,
OAS2RECV,
OAS2FUNC,
OAS2MAPR,
OCLOSE,
OCOPY,
OCALLMETH,
OCALLINTER,
OCALL,
OCALLFUNC,
ODELETE,
OSEND,
OPRINT,
OPRINTN,
OPANIC,
OEMPTY,
cmd/internal/gc: inline runtime.getg This more closely restores what the old C runtime did. (In C, g was an 'extern register' with the same effective implementation as in this CL.) On a late 2012 MacBookPro10,2, best of 5 old vs best of 5 new: benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4981312777 4463426605 -10.40% BenchmarkFannkuch11 3046495712 3006819428 -1.30% BenchmarkFmtFprintfEmpty 89.3 79.8 -10.64% BenchmarkFmtFprintfString 284 262 -7.75% BenchmarkFmtFprintfInt 282 262 -7.09% BenchmarkFmtFprintfIntInt 480 448 -6.67% BenchmarkFmtFprintfPrefixedInt 382 358 -6.28% BenchmarkFmtFprintfFloat 529 486 -8.13% BenchmarkFmtManyArgs 1849 1773 -4.11% BenchmarkGobDecode 12835963 11794385 -8.11% BenchmarkGobEncode 10527170 10288422 -2.27% BenchmarkGzip 436109569 438422516 +0.53% BenchmarkGunzip 110121663 109843648 -0.25% BenchmarkHTTPClientServer 81930 85446 +4.29% BenchmarkJSONEncode 24638574 24280603 -1.45% BenchmarkJSONDecode 93022423 85753546 -7.81% BenchmarkMandelbrot200 4703899 4735407 +0.67% BenchmarkGoParse 5319853 5086843 -4.38% BenchmarkRegexpMatchEasy0_32 151 151 +0.00% BenchmarkRegexpMatchEasy0_1K 452 453 +0.22% BenchmarkRegexpMatchEasy1_32 131 132 +0.76% BenchmarkRegexpMatchEasy1_1K 761 722 -5.12% BenchmarkRegexpMatchMedium_32 228 224 -1.75% BenchmarkRegexpMatchMedium_1K 63751 64296 +0.85% BenchmarkRegexpMatchHard_32 3188 3238 +1.57% BenchmarkRegexpMatchHard_1K 95396 96756 +1.43% BenchmarkRevcomp 661587262 687107364 +3.86% BenchmarkTemplate 108312598 104008540 -3.97% BenchmarkTimeParse 453 459 +1.32% BenchmarkTimeFormat 475 441 -7.16% The garbage benchmark from the benchmarks subrepo gets 2.6% faster as well. Change-Id: I320aeda332db81012688b26ffab23f6581c59cfa Reviewed-on: https://go-review.googlesource.com/8460 Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Rick Hudson <rlh@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2015-04-03 12:23:28 -04:00
ORECOVER,
OGETG:
if n.Typecheck() == 0 {
Fatalf("missing typecheck: %+v", n)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
wascopy := n.Op == OCOPY
init := n.Ninit
n.Ninit.Set(nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = walkexpr(n, &init)
n = addinit(n, init.Slice())
if wascopy && n.Op == OCONVNOP {
n.Op = OEMPTY // don't leave plain values as statements.
}
// special case for a receive where we throw away
// the value received.
case ORECV:
if n.Typecheck() == 0 {
Fatalf("missing typecheck: %+v", n)
}
init := n.Ninit
n.Ninit.Set(nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, &init)
n = mkcall1(chanfn("chanrecv1", 2, n.Left.Type), nil, &init, n.Left, nodnil())
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = walkexpr(n, &init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = addinit(n, init.Slice())
case OBREAK,
OCONTINUE,
OFALL,
OGOTO,
OLABEL,
ODCLCONST,
ODCLTYPE,
OCHECKNIL,
cmd/compile: recognize Syscall-like functions for liveness analysis Consider this code: func f(*int) func g() { p := new(int) f(p) } where f is an assembly function. In general liveness analysis assumes that during the call to f, p is dead in this frame. If f has retained p, p will be found alive in f's frame and keep the new(int) from being garbage collected. This is all correct and works. We use the Go func declaration for f to give the assembly function liveness information (the arguments are assumed live for the entire call). Now consider this code: func h1() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) } Here syscall.Syscall is taking the place of f, but because its arguments are uintptr, the liveness analysis and the garbage collector ignore them. Since p is no longer live in h once the call starts, if the garbage collector scans the stack while the system call is blocked, it will find no reference to the new(int) and reclaim it. If the kernel is going to write to *p once the call finishes, reclaiming the memory is a mistake. We can't change the arguments or the liveness information for syscall.Syscall itself, both for compatibility and because sometimes the arguments really are integers, and the garbage collector will get quite upset if it finds an integer where it expects a pointer. The problem is that these arguments are fundamentally untyped. The solution we have taken in the syscall package's wrappers in past releases is to insert a call to a dummy function named "use", to make it look like the argument is live during the call to syscall.Syscall: func h2() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) use(unsafe.Pointer(p)) } Keeping p alive during the call means that if the garbage collector scans the stack during the system call now, it will find the reference to p. Unfortunately, this approach is not available to users outside syscall, because 'use' is unexported, and people also have to realize they need to use it and do so. There is much existing code using syscall.Syscall without a 'use'-like function. That code will fail very occasionally in mysterious ways (see #13372). This CL fixes all that existing code by making the compiler do the right thing automatically, without any code modifications. That is, it takes h1 above, which is incorrect code today, and makes it correct code. Specifically, if the compiler sees a foreign func definition (one without a body) that has uintptr arguments, it marks those arguments as "unsafe uintptrs". If it later sees the function being called with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x as having escaped, and it makes sure to hold x in a live temporary variable until the call returns, so that the garbage collector cannot reclaim whatever heap memory x points to. For now I am leaving the explicit calls to use in package syscall, but they can be removed early in a future cycle (likely Go 1.7). The rule has no effect on escape analysis, only on liveness analysis. Fixes #13372. Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500 Reviewed-on: https://go-review.googlesource.com/18584 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
OVARKILL,
OVARLIVE:
break
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
case ODCL:
v := n.Left
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if v.Class() == PAUTOHEAP {
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if compiling_runtime {
yyerror("%v escapes to heap, not allowed in runtime.", v)
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
}
if prealloc[v] == nil {
prealloc[v] = callnew(v.Type)
}
nn := nod(OAS, v.Name.Param.Heapaddr, prealloc[v])
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
nn.SetColas(true)
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
nn = typecheck(nn, Etop)
return walkstmt(nn)
}
case OBLOCK:
walkstmtlist(n.List.Slice())
case OXCASE:
yyerror("case statement out of place")
n.Op = OCASE
fallthrough
case OCASE:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = walkstmt(n.Right)
case ODEFER:
Curfn.Func.SetHasDefer(true)
switch n.Left.Op {
case OPRINT, OPRINTN:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkprintfunc(n.Left, &n.Ninit)
case OCOPY:
n.Left = copyany(n.Left, &n.Ninit, true)
default:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, &n.Ninit)
}
// make room for size & fn arguments.
adjustargs(n, 2*Widthptr)
case OFOR, OFORUNTIL:
if n.Left != nil {
walkstmtlist(n.Left.Ninit.Slice())
init := n.Left.Ninit
n.Left.Ninit.Set(nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, &init)
n.Left = addinit(n.Left, init.Slice())
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = walkstmt(n.Right)
walkstmtlist(n.Nbody.Slice())
case OIF:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, &n.Ninit)
walkstmtlist(n.Nbody.Slice())
walkstmtlist(n.Rlist.Slice())
case OPROC:
switch n.Left.Op {
case OPRINT, OPRINTN:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkprintfunc(n.Left, &n.Ninit)
case OCOPY:
n.Left = copyany(n.Left, &n.Ninit, true)
default:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, &n.Ninit)
}
// make room for size & fn arguments.
adjustargs(n, 2*Widthptr)
case ORETURN:
walkexprlist(n.List.Slice(), &n.Ninit)
if n.List.Len() == 0 {
break
}
cmd/compile: shrink gc.Type in half Many of Type's fields are etype-specific. This CL organizes them into their own auxiliary types, duplicating a few fields as necessary, and adds an Extra field to hold them. It also sorts the remaining fields for better struct packing. It also improves documentation for most fields. This reduces the size of Type at the cost of some extra allocations. There's no CPU impact; memory impact below. It also makes the natural structure of Type clearer. Passes toolstash -cmp on all architectures. Ideas for future work in this vein: (1) Width and Align probably only need to be stored for Struct and Array types. The refactoring to accomplish this would hopefully also eliminate TFUNCARGS and TCHANARGS entirely. (2) Maplineno is sparsely used and could probably better be stored in a separate map[*Type]int32, with mapqueue updated to store both a Node and a line number. (3) The Printed field may be removable once the old (non-binary) importer/exported has been removed. (4) StructType's fields field could be changed from *[]*Field to []*Field, which would remove a common allocation. (5) I believe that Type.Nod can be moved to ForwardType. Separate CL. name old alloc/op new alloc/op delta Template 57.9MB ± 0% 55.9MB ± 0% -3.43% (p=0.000 n=50+50) Unicode 38.3MB ± 0% 37.8MB ± 0% -1.39% (p=0.000 n=50+50) GoTypes 185MB ± 0% 180MB ± 0% -2.56% (p=0.000 n=50+50) Compiler 824MB ± 0% 806MB ± 0% -2.19% (p=0.000 n=50+50) name old allocs/op new allocs/op delta Template 486k ± 0% 497k ± 0% +2.25% (p=0.000 n=50+50) Unicode 377k ± 0% 379k ± 0% +0.55% (p=0.000 n=50+50) GoTypes 1.39M ± 0% 1.42M ± 0% +1.63% (p=0.000 n=50+50) Compiler 5.52M ± 0% 5.57M ± 0% +0.84% (p=0.000 n=47+50) Change-Id: I828488eeb74902b013d5ae4cf844de0b6c0dfc87 Reviewed-on: https://go-review.googlesource.com/21611 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-04-01 20:11:30 -07:00
if (Curfn.Type.FuncType().Outnamed && n.List.Len() > 1) || paramoutheap(Curfn) {
// assign to the function out parameters,
// so that reorder3 can fix up conflicts
var rl []*Node
for _, ln := range Curfn.Func.Dcl {
cl := ln.Class()
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if cl == PAUTO || cl == PAUTOHEAP {
break
}
if cl == PPARAMOUT {
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if ln.isParamStackCopy() {
ln = walkexpr(typecheck(nod(OIND, ln.Name.Param.Heapaddr, nil), Erv), nil)
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
}
rl = append(rl, ln)
}
}
if got, want := n.List.Len(), len(rl); got != want {
// order should have rewritten multi-value function calls
// with explicit OAS2FUNC nodes.
Fatalf("expected %v return arguments, have %v", want, got)
}
if samelist(rl, n.List.Slice()) {
// special return in disguise
n.List.Set(nil)
break
}
// move function calls out, to make reorder3's job easier.
walkexprlistsafe(n.List.Slice(), &n.Ninit)
ll := ascompatee(n.Op, rl, n.List.Slice(), &n.Ninit)
n.List.Set(reorder3(ll))
break
}
ll := ascompatte(nil, false, Curfn.Type.Results(), n.List.Slice(), 1, &n.Ninit)
n.List.Set(ll)
case ORETJMP:
break
case OSELECT:
walkselect(n)
case OSWITCH:
walkswitch(n)
case ORANGE:
n = walkrange(n)
}
if n.Op == ONAME {
Fatalf("walkstmt ended up with name: %+v", n)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
func isSmallMakeSlice(n *Node) bool {
if n.Op != OMAKESLICE {
return false
}
l := n.Left
r := n.Right
if r == nil {
r = l
}
t := n.Type
return smallintconst(l) && smallintconst(r) && (t.Elem().Width == 0 || r.Int64() < (1<<16)/t.Elem().Width)
}
// walk the whole tree of the body of an
// expression or simple statement.
// the types expressions are calculated.
// compile-time constants are evaluated.
// complex side effects like statements are appended to init
func walkexprlist(s []*Node, init *Nodes) {
for i := range s {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i] = walkexpr(s[i], init)
}
}
func walkexprlistsafe(s []*Node, init *Nodes) {
for i, n := range s {
s[i] = safeexpr(n, init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i] = walkexpr(s[i], init)
}
}
func walkexprlistcheap(s []*Node, init *Nodes) {
for i, n := range s {
s[i] = cheapexpr(n, init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
s[i] = walkexpr(s[i], init)
}
}
// Build name of function for interface conversion.
// Not all names are possible
// (e.g., we'll never generate convE2E or convE2I or convI2E).
func convFuncName(from, to *types.Type) string {
tkind := to.Tie()
switch from.Tie() {
case 'I':
switch tkind {
case 'I':
return "convI2I"
}
case 'T':
switch tkind {
case 'E':
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
switch {
case from.Size() == 2 && from.Align == 2:
return "convT2E16"
case from.Size() == 4 && from.Align == 4 && !types.Haspointers(from):
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
return "convT2E32"
case from.Size() == 8 && from.Align == types.Types[TUINT64].Align && !types.Haspointers(from):
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
return "convT2E64"
case from.IsString():
return "convT2Estring"
case from.IsSlice():
return "convT2Eslice"
case !types.Haspointers(from):
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
return "convT2Enoptr"
}
return "convT2E"
case 'I':
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
switch {
case from.Size() == 2 && from.Align == 2:
return "convT2I16"
case from.Size() == 4 && from.Align == 4 && !types.Haspointers(from):
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
return "convT2I32"
case from.Size() == 8 && from.Align == types.Types[TUINT64].Align && !types.Haspointers(from):
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
return "convT2I64"
case from.IsString():
return "convT2Istring"
case from.IsSlice():
return "convT2Islice"
case !types.Haspointers(from):
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
return "convT2Inoptr"
}
return "convT2I"
}
}
Fatalf("unknown conv func %c2%c", from.Tie(), to.Tie())
panic("unreachable")
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of walkexpr MUST be assigned back to n, e.g.
// n.Left = walkexpr(n.Left, init)
func walkexpr(n *Node, init *Nodes) *Node {
if n == nil {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
// Eagerly checkwidth all expressions for the back end.
if n.Type != nil && !n.Type.WidthCalculated() {
switch n.Type.Etype {
case TBLANK, TNIL, TIDEAL:
default:
checkwidth(n.Type)
}
}
if init == &n.Ninit {
// not okay to use n->ninit when walking n,
// because we might replace n with some other node
// and would lose the init list.
Fatalf("walkexpr init == &n->ninit")
}
if n.Ninit.Len() != 0 {
walkstmtlist(n.Ninit.Slice())
init.AppendNodes(&n.Ninit)
}
lno := setlineno(n)
if Debug['w'] > 1 {
Dump("walk-before", n)
}
if n.Typecheck() != 1 {
Fatalf("missed typecheck: %+v", n)
}
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if n.Op == ONAME && n.Class() == PAUTOHEAP {
nn := nod(OIND, n.Name.Param.Heapaddr, nil)
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
nn = typecheck(nn, Erv)
nn = walkexpr(nn, init)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
nn.Left.SetNonNil(true)
return nn
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
}
opswitch:
switch n.Op {
default:
Dump("walk", n)
Fatalf("walkexpr: switch 1 unknown op %+S", n)
case ONONAME, OINDREGSP, OEMPTY, OGETG:
case OTYPE, ONAME, OLITERAL:
// TODO(mdempsky): Just return n; see discussion on CL 38655.
// Perhaps refactor to use Node.mayBeShared for these instead.
// If these return early, make sure to still call
// stringsym for constant strings.
case ONOT, OMINUS, OPLUS, OCOM, OREAL, OIMAG, ODOTMETH, ODOTINTER,
OIND, OSPTR, OITAB, OIDATA, OADDR:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
case OEFACE, OAND, OSUB, OMUL, OLT, OLE, OGE, OGT, OADD, OOR, OXOR:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
case ODOT:
usefield(n)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
case ODOTTYPE, ODOTTYPE2:
n.Left = walkexpr(n.Left, init)
// Set up interface type addresses for back end.
n.Right = typename(n.Type)
if n.Op == ODOTTYPE {
n.Right.Right = typename(n.Left.Type)
}
cmd/compile: evaluate itabname during walk instead of SSA For backend concurrency safety. Follow-up to CL 38721. This does introduce a Nodes where there wasn't one before, but these are so rare that the performance impact is negligible. Does not pass toolstash-check, but the only change is line numbers, and the new line numbers appear preferable. Updates #15756 name old alloc/op new alloc/op delta Template 39.9MB ± 0% 39.9MB ± 0% ~ (p=0.841 n=5+5) Unicode 29.8MB ± 0% 29.8MB ± 0% ~ (p=0.690 n=5+5) GoTypes 113MB ± 0% 113MB ± 0% +0.09% (p=0.008 n=5+5) SSA 854MB ± 0% 855MB ± 0% ~ (p=0.222 n=5+5) Flate 25.3MB ± 0% 25.3MB ± 0% ~ (p=0.690 n=5+5) GoParser 31.8MB ± 0% 31.9MB ± 0% ~ (p=0.421 n=5+5) Reflect 78.2MB ± 0% 78.3MB ± 0% ~ (p=0.548 n=5+5) Tar 26.7MB ± 0% 26.7MB ± 0% ~ (p=0.690 n=5+5) XML 42.3MB ± 0% 42.3MB ± 0% ~ (p=0.222 n=5+5) name old allocs/op new allocs/op delta Template 391k ± 1% 391k ± 0% ~ (p=0.841 n=5+5) Unicode 320k ± 0% 320k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% +0.26% (p=0.008 n=5+5) SSA 7.60M ± 0% 7.60M ± 0% ~ (p=0.548 n=5+5) Flate 234k ± 0% 234k ± 1% ~ (p=1.000 n=5+5) GoParser 316k ± 1% 317k ± 0% ~ (p=0.841 n=5+5) Reflect 979k ± 0% 980k ± 0% ~ (p=0.690 n=5+5) Tar 251k ± 1% 251k ± 0% ~ (p=0.595 n=5+5) XML 394k ± 0% 393k ± 0% ~ (p=0.222 n=5+5) Change-Id: I237ae5502db4560f78ce021dc62f6d289797afd6 Reviewed-on: https://go-review.googlesource.com/39197 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2017-03-28 13:52:33 -07:00
if !n.Type.IsInterface() && !n.Left.Type.IsEmptyInterface() {
n.List.Set1(itabname(n.Type, n.Left.Type))
}
case ODOTPTR:
usefield(n)
if n.Op == ODOTPTR && n.Left.Type.Elem().Width == 0 {
// No actual copy will be generated, so emit an explicit nil check.
n.Left = cheapexpr(n.Left, init)
checknil(n.Left, init)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
case OLEN, OCAP:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
// replace len(*[10]int) with 10.
// delayed until now to preserve side effects.
t := n.Left.Type
if t.IsPtr() {
t = t.Elem()
}
if t.IsArray() {
safeexpr(n.Left, init)
nodconst(n, n.Type, t.NumElem())
n.SetTypecheck(1)
}
case OLSH, ORSH:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
t := n.Left.Type
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.SetBounded(bounded(n.Right, 8*t.Width))
if Debug['m'] != 0 && n.Etype != 0 && !Isconst(n.Right, CTINT) {
Warn("shift bounds check elided")
}
case OCOMPLEX:
// Use results from call expression as arguments for complex.
if n.Left == nil && n.Right == nil {
n.Left = n.List.First()
n.Right = n.List.Second()
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
case OEQ, ONE:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
// Disable safemode while compiling this code: the code we
// generate internally can refer to unsafe.Pointer.
// In this case it can happen if we need to generate an ==
// for a struct containing a reflect.Value, which itself has
// an unexported field of type unsafe.Pointer.
old_safemode := safemode
safemode = false
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = walkcompare(n, init)
safemode = old_safemode
case OANDAND, OOROR:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
// cannot put side effects from n.Right on init,
// because they cannot run before n.Left is checked.
// save elsewhere and store on the eventual n.Right.
var ll Nodes
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = walkexpr(n.Right, &ll)
n.Right = addinit(n.Right, ll.Slice())
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
n = walkinrange(n, init)
case OPRINT, OPRINTN:
walkexprlist(n.List.Slice(), init)
n = walkprint(n, init)
case OPANIC:
n = mkcall("gopanic", nil, init, n.Left)
case ORECOVER:
n = mkcall("gorecover", n.Type, init, nod(OADDR, nodfp, nil))
case OCLOSUREVAR, OCFUNC:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.SetAddable(true)
case OCALLINTER:
usemethod(n)
t := n.Left.Type
if n.List.Len() != 0 && n.List.First().Op == OAS {
break
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
walkexprlist(n.List.Slice(), init)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
ll := ascompatte(n, n.Isddd(), t.Params(), n.List.Slice(), 0, init)
n.List.Set(reorder1(ll))
case OCALLFUNC:
if n.Left.Op == OCLOSURE {
// Transform direct call of a closure to call of a normal function.
// transformclosure already did all preparation work.
// Prepend captured variables to argument list.
n.List.Prepend(n.Left.Func.Enter.Slice()...)
n.Left.Func.Enter.Set(nil)
// Replace OCLOSURE with ONAME/PFUNC.
n.Left = n.Left.Func.Closure.Func.Nname
// Update type of OCALLFUNC node.
// Output arguments had not changed, but their offsets could.
if n.Left.Type.NumResults() == 1 {
n.Type = n.Left.Type.Results().Field(0).Type
} else {
n.Type = n.Left.Type.Results()
}
}
t := n.Left.Type
if n.List.Len() != 0 && n.List.First().Op == OAS {
break
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
walkexprlist(n.List.Slice(), init)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
ll := ascompatte(n, n.Isddd(), t.Params(), n.List.Slice(), 0, init)
n.List.Set(reorder1(ll))
case OCALLMETH:
t := n.Left.Type
if n.List.Len() != 0 && n.List.First().Op == OAS {
break
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
walkexprlist(n.List.Slice(), init)
ll := ascompatte(n, false, t.Recvs(), []*Node{n.Left.Left}, 0, init)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
lr := ascompatte(n, n.Isddd(), t.Params(), n.List.Slice(), 0, init)
ll = append(ll, lr...)
n.Left.Left = nil
updateHasCall(n.Left)
n.List.Set(reorder1(ll))
case OAS:
init.AppendNodes(&n.Ninit)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Left = safeexpr(n.Left, init)
if oaslit(n, init) {
break
}
cmd/compile: disable various write barrier optimizations Several of our current write barrier elision optimizations are invalid with the hybrid barrier. Eliding the hybrid barrier requires that *both* the current and new pointer be already shaded and, since we don't have the flow analysis to figure out anything about the slot's current value, for now we have to just disable several of these optimizations. This has a slight impact on binary size. On linux/amd64, the go tool binary increases by 0.7% and the compile binary increases by 1.5%. It also has a slight impact on performance, as one would expect. We'll win some of this back in subsequent commits. name old time/op new time/op delta BinaryTree17-12 2.38s ± 1% 2.40s ± 1% +0.82% (p=0.000 n=18+20) Fannkuch11-12 2.84s ± 1% 2.70s ± 0% -4.97% (p=0.000 n=18+18) FmtFprintfEmpty-12 44.2ns ± 1% 46.4ns ± 2% +4.89% (p=0.000 n=16+18) FmtFprintfString-12 131ns ± 0% 134ns ± 1% +2.05% (p=0.000 n=12+19) FmtFprintfInt-12 114ns ± 1% 117ns ± 1% +3.26% (p=0.000 n=19+20) FmtFprintfIntInt-12 176ns ± 1% 181ns ± 1% +3.25% (p=0.000 n=20+20) FmtFprintfPrefixedInt-12 185ns ± 1% 190ns ± 1% +2.77% (p=0.000 n=19+18) FmtFprintfFloat-12 249ns ± 1% 254ns ± 1% +1.71% (p=0.000 n=18+20) FmtManyArgs-12 747ns ± 1% 743ns ± 1% -0.58% (p=0.000 n=19+18) GobDecode-12 6.57ms ± 1% 6.61ms ± 0% +0.73% (p=0.000 n=19+20) GobEncode-12 5.58ms ± 1% 5.60ms ± 0% +0.27% (p=0.001 n=18+18) Gzip-12 223ms ± 1% 223ms ± 1% ~ (p=0.351 n=19+20) Gunzip-12 37.9ms ± 0% 37.9ms ± 1% ~ (p=0.095 n=16+20) HTTPClientServer-12 77.8µs ± 1% 78.5µs ± 1% +0.97% (p=0.000 n=19+20) JSONEncode-12 14.8ms ± 1% 14.8ms ± 1% ~ (p=0.079 n=20+19) JSONDecode-12 53.7ms ± 1% 54.2ms ± 1% +0.92% (p=0.000 n=20+19) Mandelbrot200-12 3.81ms ± 1% 3.81ms ± 0% ~ (p=0.916 n=19+18) GoParse-12 3.19ms ± 1% 3.19ms ± 1% ~ (p=0.175 n=20+19) RegexpMatchEasy0_32-12 71.9ns ± 1% 70.6ns ± 1% -1.87% (p=0.000 n=19+20) RegexpMatchEasy0_1K-12 946ns ± 0% 944ns ± 0% -0.22% (p=0.000 n=19+16) RegexpMatchEasy1_32-12 67.3ns ± 2% 66.8ns ± 1% -0.72% (p=0.008 n=20+20) RegexpMatchEasy1_1K-12 374ns ± 1% 384ns ± 1% +2.69% (p=0.000 n=18+20) RegexpMatchMedium_32-12 107ns ± 1% 107ns ± 1% ~ (p=1.000 n=20+20) RegexpMatchMedium_1K-12 34.3µs ± 1% 34.6µs ± 1% +0.90% (p=0.000 n=20+20) RegexpMatchHard_32-12 1.78µs ± 1% 1.80µs ± 1% +1.45% (p=0.000 n=20+19) RegexpMatchHard_1K-12 53.6µs ± 0% 54.5µs ± 1% +1.52% (p=0.000 n=19+18) Revcomp-12 417ms ± 5% 391ms ± 1% -6.42% (p=0.000 n=16+19) Template-12 61.1ms ± 1% 64.2ms ± 0% +5.07% (p=0.000 n=19+20) TimeParse-12 302ns ± 1% 305ns ± 1% +0.90% (p=0.000 n=18+18) TimeFormat-12 319ns ± 1% 315ns ± 1% -1.25% (p=0.000 n=18+18) [Geo mean] 54.0µs 54.3µs +0.58% name old time/op new time/op delta XGarbage-12 2.24ms ± 2% 2.28ms ± 1% +1.68% (p=0.000 n=18+17) XHTTP-12 11.4µs ± 1% 11.6µs ± 2% +1.63% (p=0.000 n=18+18) XJSON-12 11.6ms ± 0% 12.5ms ± 0% +7.84% (p=0.000 n=18+17) Updates #17503. Change-Id: I1899f8e35662971e24bf692b517dfbe2b533c00c Reviewed-on: https://go-review.googlesource.com/31572 Reviewed-by: Keith Randall <khr@golang.org>
2016-10-18 10:26:28 -04:00
if n.Right == nil {
// TODO(austin): Check all "implicit zeroing"
break
}
if !instrumenting && iszero(n.Right) {
break
}
switch n.Right.Op {
default:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = walkexpr(n.Right, init)
case ORECV:
// x = <-c; n.Left is x, n.Right.Left is c.
// orderstmt made sure x is addressable.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right.Left = walkexpr(n.Right.Left, init)
n1 := nod(OADDR, n.Left, nil)
r := n.Right.Left // the channel
n = mkcall1(chanfn("chanrecv1", 2, r.Type), nil, init, r, n1)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = walkexpr(n, init)
break opswitch
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
case OAPPEND:
// x = append(...)
r := n.Right
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if r.Type.Elem().NotInHeap() {
cmd/compile: add go:notinheap type pragma This adds a //go:notinheap pragma for declarations of types that must not be heap allocated. We ensure these rules by disallowing new(T), make([]T), append([]T), or implicit allocation of T, by disallowing conversions to notinheap types, and by propagating notinheap to any struct or array that contains notinheap elements. The utility of this pragma is that we can eliminate write barriers for writes to pointers to go:notinheap types, since the write barrier is guaranteed to be a no-op. This will let us mark several scheduler and memory allocator structures as go:notinheap, which will let us disallow write barriers in the scheduler and memory allocator much more thoroughly and also eliminate some problematic hybrid write barriers. This also makes go:nowritebarrierrec and go:yeswritebarrierrec much more powerful. Currently we use go:nowritebarrier all over the place, but it's almost never what you actually want: when write barriers are illegal, they're typically illegal for a whole dynamic scope. Partly this is because go:nowritebarrier has been around longer, but it's also because go:nowritebarrierrec couldn't be used in situations that had no-op write barriers or where some nested scope did allow write barriers. go:notinheap eliminates many no-op write barriers and go:yeswritebarrierrec makes it possible to opt back in to write barriers, so these two changes will let us use go:nowritebarrierrec far more liberally. This updates #13386, which is about controlling pointers from non-GC'd memory to GC'd memory. That would require some additional pragma (or pragmas), but could build on this pragma. Change-Id: I6314f8f4181535dd166887c9ec239977b54940bd Reviewed-on: https://go-review.googlesource.com/30939 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-10-11 22:53:27 -04:00
yyerror("%v is go:notinheap; heap allocation disallowed", r.Type.Elem())
}
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if r.Isddd() {
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
r = appendslice(r, init) // also works for append(slice, string).
} else {
r = walkappend(r, init, n)
}
n.Right = r
if r.Op == OAPPEND {
// Left in place for back end.
// Do not add a new write barrier.
// Set up address of type for back end.
r.Left = typename(r.Type.Elem())
break opswitch
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
}
// Otherwise, lowered for race detector.
// Treat as ordinary assignment.
}
if n.Left != nil && n.Right != nil {
n = convas(n, init)
}
case OAS2:
init.AppendNodes(&n.Ninit)
walkexprlistsafe(n.List.Slice(), init)
walkexprlistsafe(n.Rlist.Slice(), init)
ll := ascompatee(OAS, n.List.Slice(), n.Rlist.Slice(), init)
ll = reorder3(ll)
n = liststmt(ll)
// a,b,... = fn()
case OAS2FUNC:
init.AppendNodes(&n.Ninit)
r := n.Rlist.First()
walkexprlistsafe(n.List.Slice(), init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = walkexpr(r, init)
if isIntrinsicCall(r) {
n.Rlist.Set1(r)
break
}
init.Append(r)
ll := ascompatet(n.List, r.Type)
n = liststmt(ll)
// x, y = <-c
// orderstmt made sure x is addressable.
case OAS2RECV:
init.AppendNodes(&n.Ninit)
r := n.Rlist.First()
walkexprlistsafe(n.List.Slice(), init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r.Left = walkexpr(r.Left, init)
var n1 *Node
if isblank(n.List.First()) {
n1 = nodnil()
} else {
n1 = nod(OADDR, n.List.First(), nil)
}
n1.Etype = 1 // addr does not escape
fn := chanfn("chanrecv2", 2, r.Left.Type)
ok := n.List.Second()
call := mkcall1(fn, ok.Type, init, r.Left, n1)
n = nod(OAS, ok, call)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = typecheck(n, Etop)
// a,b = m[i]
case OAS2MAPR:
init.AppendNodes(&n.Ninit)
r := n.Rlist.First()
walkexprlistsafe(n.List.Slice(), init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r.Left = walkexpr(r.Left, init)
r.Right = walkexpr(r.Right, init)
t := r.Left.Type
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
fast := mapfast(t)
var key *Node
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
if fast != mapslow {
// fast versions take key by value
key = r.Right
} else {
// standard version takes key by reference
// orderexpr made sure key is addressable.
key = nod(OADDR, r.Right, nil)
}
// from:
// a,b = m[i]
// to:
// var,b = mapaccess2*(t, m, i)
// a = *var
a := n.List.First()
if w := t.Val().Width; w <= 1024 { // 1024 must match ../../../../runtime/hashmap.go:maxZero
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
fn := mapfn(mapaccess2[fast], t)
r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key)
} else {
fn := mapfn("mapaccess2_fat", t)
z := zeroaddr(w)
r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key, z)
}
// mapaccess2* returns a typed bool, but due to spec changes,
// the boolean result of i.(T) is now untyped so we make it the
// same type as the variable on the lhs.
if ok := n.List.Second(); !isblank(ok) && ok.Type.IsBoolean() {
r.Type.Field(1).Type = ok.Type
}
n.Rlist.Set1(r)
n.Op = OAS2FUNC
// don't generate a = *var if a is _
if !isblank(a) {
var_ := temp(types.NewPtr(t.Val()))
var_.SetTypecheck(1)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
var_.SetNonNil(true) // mapaccess always returns a non-nil pointer
n.List.SetFirst(var_)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = walkexpr(n, init)
init.Append(n)
n = nod(OAS, a, nod(OIND, var_, nil))
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = typecheck(n, Etop)
n = walkexpr(n, init)
case ODELETE:
init.AppendNodes(&n.Ninit)
map_ := n.List.First()
key := n.List.Second()
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
map_ = walkexpr(map_, init)
key = walkexpr(key, init)
t := map_.Type
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
fast := mapfast(t)
if fast == mapslow {
// orderstmt made sure key is addressable.
key = nod(OADDR, key, nil)
}
n = mkcall1(mapfndel(mapdelete[fast], t), nil, init, typename(t), map_, key)
case OAS2DOTTYPE:
walkexprlistsafe(n.List.Slice(), init)
n.Rlist.SetFirst(walkexpr(n.Rlist.First(), init))
case OCONVIFACE:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
// Optimize convT2E or convT2I as a two-word copy when T is pointer-shaped.
if isdirectiface(n.Left.Type) {
var t *Node
if n.Type.IsEmptyInterface() {
t = typename(n.Left.Type)
} else {
t = itabname(n.Left.Type, n.Type)
}
l := nod(OEFACE, t, n.Left)
l.Type = n.Type
l.SetTypecheck(n.Typecheck())
n = l
break
}
cmd/compile: convert constants to interfaces without allocating The order pass is responsible for ensuring that values passed to runtime functions, including convT2E/convT2I, are addressable. Prior to this CL, this was always accomplished by creating a temp, which frequently escaped to the heap, causing allocations, perhaps most notably in code like: fmt.Println(1, 2, 3) // allocates three times None of the runtime routines modify the contents of the pointers they receive, so in the case of constants, instead of creating a temp value, we can create a static value. (Marking the static value as read-only provides protection against accidental attempts by the runtime to modify the constant data.) This improves code generation for code like: panic("abc") c <- 2 // c is a chan int which can now simply refer to "abc" and 2, rather than going by way of a temporary. It also allows us to optimize convT2E/convT2I, by recognizing static readonly values and directly constructing the interface. This CL adds ~0.5% to binary size, despite decreasing the size of many functions, because it also adds many static symbols. This binary size regression could be recovered in future (but currently unplanned) work. There is a lot of content-duplication in these symbols; this statement generates six new symbols, three containing an int 1 and three containing a pointer to the string "a": fmt.Println(1, 1, 1, "a", "a", "a") These symbols could be made content-addressable. Furthermore, these symbols are small, so the alignment and naming overhead is large. As with the go.strings section, these symbols could be hidden and have their alignment reduced. The changes to test/live.go make it impossible (at least with current optimization techniques) to place the values being passed to the runtime in static symbols, preserving autotmp creation. Fixes #18704 Benchmarks from fmt and go-kit's logging package: github.com/go-kit/kit/log name old time/op new time/op delta JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10) JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10) Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9) OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10) TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9) TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10) LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9) LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10) NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10) NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10) ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10) ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10) JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10) Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10) TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10) TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10) LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10) ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10) Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) fmt name old time/op new time/op delta SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7) SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10) SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10) SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10) SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10) SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9) SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8) SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10) SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10) SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10) SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10) SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9) SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10) SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10) SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10) SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10) ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10) FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10) FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10) FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10) ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8) ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10) ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9) name old alloc/op new alloc/op delta SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfEmpty-8 0.00B 0.00B ~ (all equal) SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10) SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10) SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10) SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10) SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10) SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal) SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal) SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal) ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal) ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10) ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfEmpty-8 0.00 0.00 ~ (all equal) SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal) SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal) ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00 0.00 ~ (all equal) ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal) ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5 Reviewed-on: https://go-review.googlesource.com/35554 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
if staticbytes == nil {
staticbytes = newname(Runtimepkg.Lookup("staticbytes"))
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
staticbytes.SetClass(PEXTERN)
staticbytes.Type = types.NewArray(types.Types[TUINT8], 256)
zerobase = newname(Runtimepkg.Lookup("zerobase"))
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
zerobase.SetClass(PEXTERN)
zerobase.Type = types.Types[TUINTPTR]
}
// Optimize convT2{E,I} for many cases in which T is not pointer-shaped,
// by using an existing addressable value identical to n.Left
// or creating one on the stack.
cmd/compile: convert constants to interfaces without allocating The order pass is responsible for ensuring that values passed to runtime functions, including convT2E/convT2I, are addressable. Prior to this CL, this was always accomplished by creating a temp, which frequently escaped to the heap, causing allocations, perhaps most notably in code like: fmt.Println(1, 2, 3) // allocates three times None of the runtime routines modify the contents of the pointers they receive, so in the case of constants, instead of creating a temp value, we can create a static value. (Marking the static value as read-only provides protection against accidental attempts by the runtime to modify the constant data.) This improves code generation for code like: panic("abc") c <- 2 // c is a chan int which can now simply refer to "abc" and 2, rather than going by way of a temporary. It also allows us to optimize convT2E/convT2I, by recognizing static readonly values and directly constructing the interface. This CL adds ~0.5% to binary size, despite decreasing the size of many functions, because it also adds many static symbols. This binary size regression could be recovered in future (but currently unplanned) work. There is a lot of content-duplication in these symbols; this statement generates six new symbols, three containing an int 1 and three containing a pointer to the string "a": fmt.Println(1, 1, 1, "a", "a", "a") These symbols could be made content-addressable. Furthermore, these symbols are small, so the alignment and naming overhead is large. As with the go.strings section, these symbols could be hidden and have their alignment reduced. The changes to test/live.go make it impossible (at least with current optimization techniques) to place the values being passed to the runtime in static symbols, preserving autotmp creation. Fixes #18704 Benchmarks from fmt and go-kit's logging package: github.com/go-kit/kit/log name old time/op new time/op delta JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10) JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10) Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9) OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10) TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9) TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10) LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9) LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10) NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10) NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10) ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10) ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10) JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10) Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10) TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10) TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10) LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10) ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10) Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) fmt name old time/op new time/op delta SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7) SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10) SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10) SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10) SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10) SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9) SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8) SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10) SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10) SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10) SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10) SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9) SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10) SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10) SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10) SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10) ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10) FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10) FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10) FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10) ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8) ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10) ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9) name old alloc/op new alloc/op delta SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfEmpty-8 0.00B 0.00B ~ (all equal) SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10) SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10) SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10) SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10) SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10) SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal) SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal) SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal) ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal) ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10) ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfEmpty-8 0.00 0.00 ~ (all equal) SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal) SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal) ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00 0.00 ~ (all equal) ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal) ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5 Reviewed-on: https://go-review.googlesource.com/35554 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
var value *Node
switch {
case n.Left.Type.Size() == 0:
// n.Left is zero-sized. Use zerobase.
cheapexpr(n.Left, init) // Evaluate n.Left for side-effects. See issue 19246.
value = zerobase
case n.Left.Type.IsBoolean() || (n.Left.Type.Size() == 1 && n.Left.Type.IsInteger()):
// n.Left is a bool/byte. Use staticbytes[n.Left].
n.Left = cheapexpr(n.Left, init)
value = nod(OINDEX, staticbytes, byteindex(n.Left))
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
value.SetBounded(true)
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
case n.Left.Class() == PEXTERN && n.Left.Name != nil && n.Left.Name.Readonly():
// n.Left is a readonly global; use it directly.
value = n.Left
case !n.Left.Type.IsInterface() && n.Esc == EscNone && n.Left.Type.Width <= 1024:
// n.Left does not escape. Use a stack temporary initialized to n.Left.
value = temp(n.Left.Type)
init.Append(typecheck(nod(OAS, value, n.Left), Etop))
cmd/compile: convert constants to interfaces without allocating The order pass is responsible for ensuring that values passed to runtime functions, including convT2E/convT2I, are addressable. Prior to this CL, this was always accomplished by creating a temp, which frequently escaped to the heap, causing allocations, perhaps most notably in code like: fmt.Println(1, 2, 3) // allocates three times None of the runtime routines modify the contents of the pointers they receive, so in the case of constants, instead of creating a temp value, we can create a static value. (Marking the static value as read-only provides protection against accidental attempts by the runtime to modify the constant data.) This improves code generation for code like: panic("abc") c <- 2 // c is a chan int which can now simply refer to "abc" and 2, rather than going by way of a temporary. It also allows us to optimize convT2E/convT2I, by recognizing static readonly values and directly constructing the interface. This CL adds ~0.5% to binary size, despite decreasing the size of many functions, because it also adds many static symbols. This binary size regression could be recovered in future (but currently unplanned) work. There is a lot of content-duplication in these symbols; this statement generates six new symbols, three containing an int 1 and three containing a pointer to the string "a": fmt.Println(1, 1, 1, "a", "a", "a") These symbols could be made content-addressable. Furthermore, these symbols are small, so the alignment and naming overhead is large. As with the go.strings section, these symbols could be hidden and have their alignment reduced. The changes to test/live.go make it impossible (at least with current optimization techniques) to place the values being passed to the runtime in static symbols, preserving autotmp creation. Fixes #18704 Benchmarks from fmt and go-kit's logging package: github.com/go-kit/kit/log name old time/op new time/op delta JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10) JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10) Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9) OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10) TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9) TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10) LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9) LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10) NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10) NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10) ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10) ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10) JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10) Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10) TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10) TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10) LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10) ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10) Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) fmt name old time/op new time/op delta SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7) SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10) SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10) SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10) SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10) SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9) SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8) SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10) SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10) SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10) SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10) SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9) SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10) SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10) SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10) SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10) ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10) FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10) FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10) FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10) ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8) ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10) ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9) name old alloc/op new alloc/op delta SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfEmpty-8 0.00B 0.00B ~ (all equal) SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10) SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10) SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10) SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10) SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10) SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal) SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal) SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal) ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal) ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10) ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfEmpty-8 0.00 0.00 ~ (all equal) SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal) SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal) ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00 0.00 ~ (all equal) ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal) ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5 Reviewed-on: https://go-review.googlesource.com/35554 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
}
if value != nil {
// Value is identical to n.Left.
// Construct the interface directly: {type/itab, &value}.
var t *Node
if n.Type.IsEmptyInterface() {
t = typename(n.Left.Type)
} else {
t = itabname(n.Left.Type, n.Type)
}
cmd/compile: convert constants to interfaces without allocating The order pass is responsible for ensuring that values passed to runtime functions, including convT2E/convT2I, are addressable. Prior to this CL, this was always accomplished by creating a temp, which frequently escaped to the heap, causing allocations, perhaps most notably in code like: fmt.Println(1, 2, 3) // allocates three times None of the runtime routines modify the contents of the pointers they receive, so in the case of constants, instead of creating a temp value, we can create a static value. (Marking the static value as read-only provides protection against accidental attempts by the runtime to modify the constant data.) This improves code generation for code like: panic("abc") c <- 2 // c is a chan int which can now simply refer to "abc" and 2, rather than going by way of a temporary. It also allows us to optimize convT2E/convT2I, by recognizing static readonly values and directly constructing the interface. This CL adds ~0.5% to binary size, despite decreasing the size of many functions, because it also adds many static symbols. This binary size regression could be recovered in future (but currently unplanned) work. There is a lot of content-duplication in these symbols; this statement generates six new symbols, three containing an int 1 and three containing a pointer to the string "a": fmt.Println(1, 1, 1, "a", "a", "a") These symbols could be made content-addressable. Furthermore, these symbols are small, so the alignment and naming overhead is large. As with the go.strings section, these symbols could be hidden and have their alignment reduced. The changes to test/live.go make it impossible (at least with current optimization techniques) to place the values being passed to the runtime in static symbols, preserving autotmp creation. Fixes #18704 Benchmarks from fmt and go-kit's logging package: github.com/go-kit/kit/log name old time/op new time/op delta JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10) JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10) Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9) OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10) TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9) TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10) LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9) LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10) NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10) NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10) ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10) ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10) JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10) Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10) TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10) TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10) LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10) ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10) Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) fmt name old time/op new time/op delta SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7) SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10) SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10) SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10) SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10) SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9) SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8) SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10) SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10) SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10) SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10) SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9) SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10) SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10) SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10) SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10) ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10) FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10) FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10) FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10) ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8) ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10) ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9) name old alloc/op new alloc/op delta SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfEmpty-8 0.00B 0.00B ~ (all equal) SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10) SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10) SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10) SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10) SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10) SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal) SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal) SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal) ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal) ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10) ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfEmpty-8 0.00 0.00 ~ (all equal) SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal) SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal) ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00 0.00 ~ (all equal) ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal) ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5 Reviewed-on: https://go-review.googlesource.com/35554 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
l := nod(OEFACE, t, typecheck(nod(OADDR, value, nil), Erv))
l.Type = n.Type
l.SetTypecheck(n.Typecheck())
n = l
break
}
// Implement interface to empty interface conversion.
// tmp = i.itab
// if tmp != nil {
// tmp = tmp.type
// }
// e = iface{tmp, i.data}
if n.Type.IsEmptyInterface() && n.Left.Type.IsInterface() && !n.Left.Type.IsEmptyInterface() {
// Evaluate the input interface.
c := temp(n.Left.Type)
init.Append(nod(OAS, c, n.Left))
// Get the itab out of the interface.
tmp := temp(types.NewPtr(types.Types[TUINT8]))
init.Append(nod(OAS, tmp, typecheck(nod(OITAB, c, nil), Erv)))
// Get the type out of the itab.
nif := nod(OIF, typecheck(nod(ONE, tmp, nodnil()), Erv), nil)
nif.Nbody.Set1(nod(OAS, tmp, itabType(tmp)))
init.Append(nif)
// Build the result.
e := nod(OEFACE, tmp, ifaceData(c, types.NewPtr(types.Types[TUINT8])))
e.Type = n.Type // assign type manually, typecheck doesn't understand OEFACE.
e.SetTypecheck(1)
n = e
break
}
var ll []*Node
if n.Type.IsEmptyInterface() {
if !n.Left.Type.IsInterface() {
cmd/compile: optimize remaining convT2I calls See #14874 Updates #6853 This change adds a compiler optimization for non pointer shaped convT2I. Since itab symbols are now emitted by the compiler, the itab address can be passed directly to convT2I instead of passing the iface type and a cache pointer argument. Compilebench results for the 5-commits series ending here: name old time/op new time/op delta Template 336ms ± 4% 344ms ± 4% +2.61% (p=0.027 n=9+8) Unicode 165ms ± 6% 173ms ± 7% +5.11% (p=0.014 n=9+9) GoTypes 1.09s ± 1% 1.06s ± 2% -3.29% (p=0.000 n=9+9) Compiler 5.09s ±10% 4.75s ±10% -6.64% (p=0.011 n=10+10) MakeBash 31.1s ± 5% 30.3s ± 3% ~ (p=0.089 n=10+10) name old text-bytes new text-bytes delta HelloSize 558k ± 0% 558k ± 0% +0.02% (p=0.000 n=10+10) CmdGoSize 6.24M ± 0% 6.11M ± 0% -2.11% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 3.66k ± 0% 3.74k ± 0% +2.41% (p=0.000 n=10+10) CmdGoSize 134k ± 0% 162k ± 0% +20.76% (p=0.000 n=10+10) name old bss-bytes new bss-bytes delta HelloSize 126k ± 0% 126k ± 0% ~ (all samples are equal) CmdGoSize 149k ± 0% 146k ± 0% -2.17% (p=0.000 n=10+10) name old exe-bytes new exe-bytes delta HelloSize 924k ± 0% 924k ± 0% +0.05% (p=0.000 n=10+10) CmdGoSize 9.77M ± 0% 9.62M ± 0% -1.47% (p=0.000 n=10+10) Change-Id: Ib230ddc04988824035c32287ae544a965fedd344 Reviewed-on: https://go-review.googlesource.com/20902 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Michel Lespinasse <walken@google.com>
2016-03-18 17:21:33 -07:00
ll = append(ll, typename(n.Left.Type))
}
cmd/compile: optimize remaining convT2I calls See #14874 Updates #6853 This change adds a compiler optimization for non pointer shaped convT2I. Since itab symbols are now emitted by the compiler, the itab address can be passed directly to convT2I instead of passing the iface type and a cache pointer argument. Compilebench results for the 5-commits series ending here: name old time/op new time/op delta Template 336ms ± 4% 344ms ± 4% +2.61% (p=0.027 n=9+8) Unicode 165ms ± 6% 173ms ± 7% +5.11% (p=0.014 n=9+9) GoTypes 1.09s ± 1% 1.06s ± 2% -3.29% (p=0.000 n=9+9) Compiler 5.09s ±10% 4.75s ±10% -6.64% (p=0.011 n=10+10) MakeBash 31.1s ± 5% 30.3s ± 3% ~ (p=0.089 n=10+10) name old text-bytes new text-bytes delta HelloSize 558k ± 0% 558k ± 0% +0.02% (p=0.000 n=10+10) CmdGoSize 6.24M ± 0% 6.11M ± 0% -2.11% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 3.66k ± 0% 3.74k ± 0% +2.41% (p=0.000 n=10+10) CmdGoSize 134k ± 0% 162k ± 0% +20.76% (p=0.000 n=10+10) name old bss-bytes new bss-bytes delta HelloSize 126k ± 0% 126k ± 0% ~ (all samples are equal) CmdGoSize 149k ± 0% 146k ± 0% -2.17% (p=0.000 n=10+10) name old exe-bytes new exe-bytes delta HelloSize 924k ± 0% 924k ± 0% +0.05% (p=0.000 n=10+10) CmdGoSize 9.77M ± 0% 9.62M ± 0% -1.47% (p=0.000 n=10+10) Change-Id: Ib230ddc04988824035c32287ae544a965fedd344 Reviewed-on: https://go-review.googlesource.com/20902 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Michel Lespinasse <walken@google.com>
2016-03-18 17:21:33 -07:00
} else {
if n.Left.Type.IsInterface() {
cmd/compile: optimize remaining convT2I calls See #14874 Updates #6853 This change adds a compiler optimization for non pointer shaped convT2I. Since itab symbols are now emitted by the compiler, the itab address can be passed directly to convT2I instead of passing the iface type and a cache pointer argument. Compilebench results for the 5-commits series ending here: name old time/op new time/op delta Template 336ms ± 4% 344ms ± 4% +2.61% (p=0.027 n=9+8) Unicode 165ms ± 6% 173ms ± 7% +5.11% (p=0.014 n=9+9) GoTypes 1.09s ± 1% 1.06s ± 2% -3.29% (p=0.000 n=9+9) Compiler 5.09s ±10% 4.75s ±10% -6.64% (p=0.011 n=10+10) MakeBash 31.1s ± 5% 30.3s ± 3% ~ (p=0.089 n=10+10) name old text-bytes new text-bytes delta HelloSize 558k ± 0% 558k ± 0% +0.02% (p=0.000 n=10+10) CmdGoSize 6.24M ± 0% 6.11M ± 0% -2.11% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 3.66k ± 0% 3.74k ± 0% +2.41% (p=0.000 n=10+10) CmdGoSize 134k ± 0% 162k ± 0% +20.76% (p=0.000 n=10+10) name old bss-bytes new bss-bytes delta HelloSize 126k ± 0% 126k ± 0% ~ (all samples are equal) CmdGoSize 149k ± 0% 146k ± 0% -2.17% (p=0.000 n=10+10) name old exe-bytes new exe-bytes delta HelloSize 924k ± 0% 924k ± 0% +0.05% (p=0.000 n=10+10) CmdGoSize 9.77M ± 0% 9.62M ± 0% -1.47% (p=0.000 n=10+10) Change-Id: Ib230ddc04988824035c32287ae544a965fedd344 Reviewed-on: https://go-review.googlesource.com/20902 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Michel Lespinasse <walken@google.com>
2016-03-18 17:21:33 -07:00
ll = append(ll, typename(n.Type))
} else {
ll = append(ll, itabname(n.Left.Type, n.Type))
}
}
if n.Left.Type.IsInterface() {
ll = append(ll, n.Left)
} else {
// regular types are passed by reference to avoid C vararg calls
// orderexpr arranged for n.Left to be a temporary for all
// the conversions it could see. comparison of an interface
// with a non-interface, especially in a switch on interface value
// with non-interface cases, is not visible to orderstmt, so we
// have to fall back on allocating a temp here.
if islvalue(n.Left) {
ll = append(ll, nod(OADDR, n.Left, nil))
} else {
ll = append(ll, nod(OADDR, copyexpr(n.Left, n.Left.Type, init), nil))
}
dowidth(n.Left.Type)
}
fn := syslook(convFuncName(n.Left.Type, n.Type))
fn = substArgTypes(fn, n.Left.Type, n.Type)
dowidth(fn.Type)
n = nod(OCALL, fn, nil)
n.List.Set(ll)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = typecheck(n, Erv)
n = walkexpr(n, init)
case OCONV, OCONVNOP:
if thearch.LinkArch.Family == sys.ARM || thearch.LinkArch.Family == sys.MIPS {
if n.Left.Type.IsFloat() {
if n.Type.Etype == TINT64 {
n = mkcall("float64toint64", n.Type, init, conv(n.Left, types.Types[TFLOAT64]))
break
}
if n.Type.Etype == TUINT64 {
n = mkcall("float64touint64", n.Type, init, conv(n.Left, types.Types[TFLOAT64]))
break
}
}
if n.Type.IsFloat() {
if n.Left.Type.Etype == TINT64 {
n = conv(mkcall("int64tofloat64", types.Types[TFLOAT64], init, conv(n.Left, types.Types[TINT64])), n.Type)
break
}
if n.Left.Type.Etype == TUINT64 {
n = conv(mkcall("uint64tofloat64", types.Types[TFLOAT64], init, conv(n.Left, types.Types[TUINT64])), n.Type)
break
}
}
}
if thearch.LinkArch.Family == sys.I386 {
if n.Left.Type.IsFloat() {
if n.Type.Etype == TINT64 {
n = mkcall("float64toint64", n.Type, init, conv(n.Left, types.Types[TFLOAT64]))
break
}
if n.Type.Etype == TUINT64 {
n = mkcall("float64touint64", n.Type, init, conv(n.Left, types.Types[TFLOAT64]))
break
}
if n.Type.Etype == TUINT32 || n.Type.Etype == TUINT || n.Type.Etype == TUINTPTR {
n = mkcall("float64touint32", n.Type, init, conv(n.Left, types.Types[TFLOAT64]))
break
}
}
if n.Type.IsFloat() {
if n.Left.Type.Etype == TINT64 {
n = conv(mkcall("int64tofloat64", types.Types[TFLOAT64], init, conv(n.Left, types.Types[TINT64])), n.Type)
break
}
if n.Left.Type.Etype == TUINT64 {
n = conv(mkcall("uint64tofloat64", types.Types[TFLOAT64], init, conv(n.Left, types.Types[TUINT64])), n.Type)
break
}
if n.Left.Type.Etype == TUINT32 || n.Left.Type.Etype == TUINT || n.Left.Type.Etype == TUINTPTR {
n = conv(mkcall("uint32tofloat64", types.Types[TFLOAT64], init, conv(n.Left, types.Types[TUINT32])), n.Type)
break
}
}
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
case OANDNOT:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Op = OAND
n.Right = nod(OCOM, n.Right, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = typecheck(n.Right, Erv)
n.Right = walkexpr(n.Right, init)
case ODIV, OMOD:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
// rewrite complex div into function call.
et := n.Left.Type.Etype
if isComplex[et] && n.Op == ODIV {
t := n.Type
n = mkcall("complex128div", types.Types[TCOMPLEX128], init, conv(n.Left, types.Types[TCOMPLEX128]), conv(n.Right, types.Types[TCOMPLEX128]))
n = conv(n, t)
break
}
// Nothing to do for float divisions.
if isFloat[et] {
break
}
// rewrite 64-bit div and mod on 32-bit architectures.
// TODO: Remove this code once we can introduce
// runtime calls late in SSA processing.
if Widthreg < 8 && (et == TINT64 || et == TUINT64) {
if n.Right.Op == OLITERAL {
// Leave div/mod by constant powers of 2.
// The SSA backend will handle those.
switch et {
case TINT64:
c := n.Right.Int64()
if c < 0 {
c = -c
}
if c != 0 && c&(c-1) == 0 {
break opswitch
}
case TUINT64:
c := uint64(n.Right.Int64())
if c != 0 && c&(c-1) == 0 {
break opswitch
}
}
}
var fn string
if et == TINT64 {
fn = "int64"
} else {
fn = "uint64"
}
if n.Op == ODIV {
fn += "div"
} else {
fn += "mod"
}
n = mkcall(fn, n.Type, init, conv(n.Left, types.Types[et]), conv(n.Right, types.Types[et]))
}
case OINDEX:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
// save the original node for bounds checking elision.
// If it was a ODIV/OMOD walk might rewrite it.
r := n.Right
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = walkexpr(n.Right, init)
// if range of type cannot exceed static array bound,
// disable bounds check.
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if n.Bounded() {
break
}
t := n.Left.Type
if t != nil && t.IsPtr() {
t = t.Elem()
}
if t.IsArray() {
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.SetBounded(bounded(r, t.NumElem()))
if Debug['m'] != 0 && n.Bounded() && !Isconst(n.Right, CTINT) {
Warn("index bounds check elided")
}
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if smallintconst(n.Right) && !n.Bounded() {
yyerror("index out of bounds")
}
} else if Isconst(n.Left, CTSTR) {
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.SetBounded(bounded(r, int64(len(n.Left.Val().U.(string)))))
if Debug['m'] != 0 && n.Bounded() && !Isconst(n.Right, CTINT) {
Warn("index bounds check elided")
}
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if smallintconst(n.Right) && !n.Bounded() {
yyerror("index out of bounds")
}
}
if Isconst(n.Right, CTINT) {
if n.Right.Val().U.(*Mpint).CmpInt64(0) < 0 || n.Right.Val().U.(*Mpint).Cmp(maxintval[TINT]) > 0 {
yyerror("index out of bounds")
}
}
case OINDEXMAP:
// Replace m[k] with *map{access1,assign}(maptype, m, &k)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
map_ := n.Left
key := n.Right
t := map_.Type
if n.Etype == 1 {
// This m[k] expression is on the left-hand side of an assignment.
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
fast := mapfast(t)
if fast == mapslow {
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
// standard version takes key by reference.
// orderexpr made sure key is addressable.
key = nod(OADDR, key, nil)
}
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
n = mkcall1(mapfn(mapassign[fast], t), nil, init, typename(t), map_, key)
} else {
// m[k] is not the target of an assignment.
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
fast := mapfast(t)
if fast == mapslow {
// standard version takes key by reference.
// orderexpr made sure key is addressable.
key = nod(OADDR, key, nil)
}
if w := t.Val().Width; w <= 1024 { // 1024 must match ../../../../runtime/hashmap.go:maxZero
n = mkcall1(mapfn(mapaccess1[fast], t), types.NewPtr(t.Val()), init, typename(t), map_, key)
} else {
z := zeroaddr(w)
n = mkcall1(mapfn("mapaccess1_fat", t), types.NewPtr(t.Val()), init, typename(t), map_, key, z)
}
}
n.Type = types.NewPtr(t.Val())
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
n.SetNonNil(true) // mapaccess1* and mapassign always return non-nil pointers.
n = nod(OIND, n, nil)
n.Type = t.Val()
n.SetTypecheck(1)
case ORECV:
Fatalf("walkexpr ORECV") // should see inside OAS only
case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
low, high, max := n.SliceBounds()
low = walkexpr(low, init)
if low != nil && iszero(low) {
// Reduce x[0:j] to x[:j] and x[0:j:k] to x[:j:k].
low = nil
}
high = walkexpr(high, init)
max = walkexpr(max, init)
n.SetSliceBounds(low, high, max)
if n.Op.IsSlice3() {
if max != nil && max.Op == OCAP && samesafeexpr(n.Left, max.Left) {
// Reduce x[i:j:cap(x)] to x[i:j].
if n.Op == OSLICE3 {
n.Op = OSLICE
} else {
n.Op = OSLICEARR
}
n = reduceSlice(n)
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
}
} else {
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
n = reduceSlice(n)
}
case ONEW:
if n.Esc == EscNone {
if n.Type.Elem().Width >= 1<<16 {
Fatalf("large ONEW with EscNone: %v", n)
}
r := temp(n.Type.Elem())
r = nod(OAS, r, nil) // zero temp
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Etop)
init.Append(r)
r = nod(OADDR, r.Left, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
n = r
} else {
n = callnew(n.Type.Elem())
}
case OCMPSTR:
// s + "badgerbadgerbadger" == "badgerbadgerbadger"
if (Op(n.Etype) == OEQ || Op(n.Etype) == ONE) && Isconst(n.Right, CTSTR) && n.Left.Op == OADDSTR && n.Left.List.Len() == 2 && Isconst(n.Left.List.Second(), CTSTR) && strlit(n.Right) == strlit(n.Left.List.Second()) {
// TODO(marvin): Fix Node.EType type union.
r := nod(Op(n.Etype), nod(OLEN, n.Left.List.First(), nil), nodintconst(0))
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
r = walkexpr(r, init)
r.Type = n.Type
n = r
break
}
cmd/compile: unroll comparisons to short constant strings Unroll s == "ab" to len(s) == 2 && s[0] == 'a' && s[1] == 'b' This generates faster and shorter code by avoiding a runtime call. Do something similar for !=. The cutoff length is 6. This was chosen empirically by examining binary sizes on arm, arm64, 386, and amd64 using the SSA backend. For all architectures examined, 4, 5, and 6 were the ideal cutoff, with identical binary sizes. The distribution of constant string equality sizes during 'go build -a std' is: 40.81% 622 len 0 14.11% 215 len 4 9.45% 144 len 1 7.81% 119 len 3 7.48% 114 len 5 5.12% 78 len 7 4.13% 63 len 2 3.54% 54 len 8 2.69% 41 len 6 1.18% 18 len 10 0.85% 13 len 9 0.66% 10 len 14 0.59% 9 len 17 0.46% 7 len 11 0.26% 4 len 12 0.20% 3 len 19 0.13% 2 len 13 0.13% 2 len 15 0.13% 2 len 16 0.07% 1 len 20 0.07% 1 len 23 0.07% 1 len 33 0.07% 1 len 36 A cutoff of length 6 covers most of the cases. Benchmarks on amd64 comparing a string to a constant of length 3: Cmp/1same-8 4.78ns ± 6% 0.94ns ± 9% -80.26% (p=0.000 n=20+20) Cmp/1diffbytes-8 6.43ns ± 6% 0.96ns ±11% -85.13% (p=0.000 n=20+20) Cmp/3same-8 4.71ns ± 5% 1.28ns ± 5% -72.90% (p=0.000 n=20+20) Cmp/3difffirstbyte-8 6.33ns ± 7% 1.27ns ± 7% -79.90% (p=0.000 n=20+20) Cmp/3difflastbyte-8 6.34ns ± 8% 1.26ns ± 9% -80.13% (p=0.000 n=20+20) The change to the prove test preserves the existing intent of the test. When the string was short, there was a new "proved in bounds" report that referred to individual byte comparisons. Change-Id: I593ac303b0d11f275672090c5c786ea0c6b8da13 Reviewed-on: https://go-review.googlesource.com/26758 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-06-25 13:40:43 -07:00
// Rewrite comparisons to short constant strings as length+byte-wise comparisons.
var cs, ncs *Node // const string, non-const string
switch {
case Isconst(n.Left, CTSTR) && Isconst(n.Right, CTSTR):
// ignore; will be constant evaluated
case Isconst(n.Left, CTSTR):
cs = n.Left
ncs = n.Right
case Isconst(n.Right, CTSTR):
cs = n.Right
ncs = n.Left
}
if cs != nil {
cmp := Op(n.Etype)
// maxRewriteLen was chosen empirically.
// It is the value that minimizes cmd/go file size
// across most architectures.
// See the commit description for CL 26758 for details.
maxRewriteLen := 6
// Some architectures can load unaligned byte sequence as 1 word.
// So we can cover longer strings with the same amount of code.
canCombineLoads := false
combine64bit := false
// TODO: does this improve performance on any other architectures?
switch thearch.LinkArch.Family {
case sys.AMD64:
// Larger compare require longer instructions, so keep this reasonably low.
// Data from CL 26758 shows that longer strings are rare.
// If we really want we can do 16 byte SSE comparisons in the future.
maxRewriteLen = 16
canCombineLoads = true
combine64bit = true
case sys.I386:
maxRewriteLen = 8
canCombineLoads = true
}
cmd/compile: unroll comparisons to short constant strings Unroll s == "ab" to len(s) == 2 && s[0] == 'a' && s[1] == 'b' This generates faster and shorter code by avoiding a runtime call. Do something similar for !=. The cutoff length is 6. This was chosen empirically by examining binary sizes on arm, arm64, 386, and amd64 using the SSA backend. For all architectures examined, 4, 5, and 6 were the ideal cutoff, with identical binary sizes. The distribution of constant string equality sizes during 'go build -a std' is: 40.81% 622 len 0 14.11% 215 len 4 9.45% 144 len 1 7.81% 119 len 3 7.48% 114 len 5 5.12% 78 len 7 4.13% 63 len 2 3.54% 54 len 8 2.69% 41 len 6 1.18% 18 len 10 0.85% 13 len 9 0.66% 10 len 14 0.59% 9 len 17 0.46% 7 len 11 0.26% 4 len 12 0.20% 3 len 19 0.13% 2 len 13 0.13% 2 len 15 0.13% 2 len 16 0.07% 1 len 20 0.07% 1 len 23 0.07% 1 len 33 0.07% 1 len 36 A cutoff of length 6 covers most of the cases. Benchmarks on amd64 comparing a string to a constant of length 3: Cmp/1same-8 4.78ns ± 6% 0.94ns ± 9% -80.26% (p=0.000 n=20+20) Cmp/1diffbytes-8 6.43ns ± 6% 0.96ns ±11% -85.13% (p=0.000 n=20+20) Cmp/3same-8 4.71ns ± 5% 1.28ns ± 5% -72.90% (p=0.000 n=20+20) Cmp/3difffirstbyte-8 6.33ns ± 7% 1.27ns ± 7% -79.90% (p=0.000 n=20+20) Cmp/3difflastbyte-8 6.34ns ± 8% 1.26ns ± 9% -80.13% (p=0.000 n=20+20) The change to the prove test preserves the existing intent of the test. When the string was short, there was a new "proved in bounds" report that referred to individual byte comparisons. Change-Id: I593ac303b0d11f275672090c5c786ea0c6b8da13 Reviewed-on: https://go-review.googlesource.com/26758 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-06-25 13:40:43 -07:00
var and Op
switch cmp {
case OEQ:
and = OANDAND
case ONE:
and = OOROR
default:
// Don't do byte-wise comparisons for <, <=, etc.
// They're fairly complicated.
// Length-only checks are ok, though.
maxRewriteLen = 0
}
if s := cs.Val().U.(string); len(s) <= maxRewriteLen {
if len(s) > 0 {
ncs = safeexpr(ncs, init)
}
// TODO(marvin): Fix Node.EType type union.
r := nod(cmp, nod(OLEN, ncs, nil), nodintconst(int64(len(s))))
remains := len(s)
for i := 0; remains > 0; {
if remains == 1 || !canCombineLoads {
cb := nodintconst(int64(s[i]))
ncb := nod(OINDEX, ncs, nodintconst(int64(i)))
r = nod(and, r, nod(cmp, ncb, cb))
remains--
i++
continue
}
var step int
var convType *types.Type
switch {
case remains >= 8 && combine64bit:
convType = types.Types[TINT64]
step = 8
case remains >= 4:
convType = types.Types[TUINT32]
step = 4
case remains >= 2:
convType = types.Types[TUINT16]
step = 2
}
ncsubstr := nod(OINDEX, ncs, nodintconst(int64(i)))
ncsubstr = conv(ncsubstr, convType)
csubstr := int64(s[i])
// Calculate large constant from bytes as sequence of shifts and ors.
// Like this: uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 ...
// ssa will combine this into a single large load.
for offset := 1; offset < step; offset++ {
b := nod(OINDEX, ncs, nodintconst(int64(i+offset)))
b = conv(b, convType)
b = nod(OLSH, b, nodintconst(int64(8*offset)))
ncsubstr = nod(OOR, ncsubstr, b)
csubstr = csubstr | int64(s[i+offset])<<uint8(8*offset)
}
csubstrPart := nodintconst(csubstr)
// Compare "step" bytes as once
r = nod(and, r, nod(cmp, csubstrPart, ncsubstr))
remains -= step
i += step
cmd/compile: unroll comparisons to short constant strings Unroll s == "ab" to len(s) == 2 && s[0] == 'a' && s[1] == 'b' This generates faster and shorter code by avoiding a runtime call. Do something similar for !=. The cutoff length is 6. This was chosen empirically by examining binary sizes on arm, arm64, 386, and amd64 using the SSA backend. For all architectures examined, 4, 5, and 6 were the ideal cutoff, with identical binary sizes. The distribution of constant string equality sizes during 'go build -a std' is: 40.81% 622 len 0 14.11% 215 len 4 9.45% 144 len 1 7.81% 119 len 3 7.48% 114 len 5 5.12% 78 len 7 4.13% 63 len 2 3.54% 54 len 8 2.69% 41 len 6 1.18% 18 len 10 0.85% 13 len 9 0.66% 10 len 14 0.59% 9 len 17 0.46% 7 len 11 0.26% 4 len 12 0.20% 3 len 19 0.13% 2 len 13 0.13% 2 len 15 0.13% 2 len 16 0.07% 1 len 20 0.07% 1 len 23 0.07% 1 len 33 0.07% 1 len 36 A cutoff of length 6 covers most of the cases. Benchmarks on amd64 comparing a string to a constant of length 3: Cmp/1same-8 4.78ns ± 6% 0.94ns ± 9% -80.26% (p=0.000 n=20+20) Cmp/1diffbytes-8 6.43ns ± 6% 0.96ns ±11% -85.13% (p=0.000 n=20+20) Cmp/3same-8 4.71ns ± 5% 1.28ns ± 5% -72.90% (p=0.000 n=20+20) Cmp/3difffirstbyte-8 6.33ns ± 7% 1.27ns ± 7% -79.90% (p=0.000 n=20+20) Cmp/3difflastbyte-8 6.34ns ± 8% 1.26ns ± 9% -80.13% (p=0.000 n=20+20) The change to the prove test preserves the existing intent of the test. When the string was short, there was a new "proved in bounds" report that referred to individual byte comparisons. Change-Id: I593ac303b0d11f275672090c5c786ea0c6b8da13 Reviewed-on: https://go-review.googlesource.com/26758 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-06-25 13:40:43 -07:00
}
r = typecheck(r, Erv)
r = walkexpr(r, init)
r.Type = n.Type
n = r
break
}
}
var r *Node
// TODO(marvin): Fix Node.EType type union.
if Op(n.Etype) == OEQ || Op(n.Etype) == ONE {
// prepare for rewrite below
n.Left = cheapexpr(n.Left, init)
n.Right = cheapexpr(n.Right, init)
lstr := conv(n.Left, types.Types[TSTRING])
rstr := conv(n.Right, types.Types[TSTRING])
lptr := nod(OSPTR, lstr, nil)
rptr := nod(OSPTR, rstr, nil)
llen := conv(nod(OLEN, lstr, nil), types.Types[TUINTPTR])
rlen := conv(nod(OLEN, rstr, nil), types.Types[TUINTPTR])
fn := syslook("memequal")
fn = substArgTypes(fn, types.Types[TUINT8], types.Types[TUINT8])
r = mkcall1(fn, types.Types[TBOOL], init, lptr, rptr, llen)
// quick check of len before full compare for == or !=.
// memequal then tests equality up to length len.
// TODO(marvin): Fix Node.EType type union.
if Op(n.Etype) == OEQ {
// len(left) == len(right) && memequal(left, right, len)
r = nod(OANDAND, nod(OEQ, llen, rlen), r)
} else {
// len(left) != len(right) || !memequal(left, right, len)
r = nod(ONOT, r, nil)
r = nod(OOROR, nod(ONE, llen, rlen), r)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
r = walkexpr(r, nil)
} else {
// sys_cmpstring(s1, s2) :: 0
r = mkcall("cmpstring", types.Types[TINT], init, conv(n.Left, types.Types[TSTRING]), conv(n.Right, types.Types[TSTRING]))
// TODO(marvin): Fix Node.EType type union.
r = nod(Op(n.Etype), r, nodintconst(0))
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
if !n.Type.IsBoolean() {
Fatalf("cmp %v", n.Type)
}
r.Type = n.Type
n = r
case OADDSTR:
n = addstr(n, init)
case OAPPEND:
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
// order should make sure we only see OAS(node, OAPPEND), which we handle above.
Fatalf("append outside assignment")
case OCOPY:
n = copyany(n, init, instrumenting && !compiling_runtime)
// cannot use chanfn - closechan takes any, not chan any
case OCLOSE:
fn := syslook("closechan")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = substArgTypes(fn, n.Left.Type)
n = mkcall1(fn, nil, init, n.Left)
case OMAKECHAN:
cmd/compile: generate makechan calls with int arguments Where possible generate calls to runtime makechan with int arguments during compile time instead of makechan with int64 arguments. This eliminates converting arguments for calls to makechan with int64 arguments for platforms where int64 values do not fit into arguments of type int. A similar optimization for makeslice was introduced in CL golang.org/cl/27851. 386: name old time/op new time/op delta MakeChan/Byte 52.4ns ± 6% 45.0ns ± 1% -14.14% (p=0.000 n=10+10) MakeChan/Int 54.5ns ± 1% 49.1ns ± 1% -9.87% (p=0.000 n=10+10) MakeChan/Ptr 150ns ± 1% 143ns ± 0% -4.38% (p=0.000 n=9+7) MakeChan/Struct/0 49.2ns ± 2% 43.2ns ± 2% -12.27% (p=0.000 n=10+10) MakeChan/Struct/32 81.7ns ± 2% 76.2ns ± 1% -6.71% (p=0.000 n=10+10) MakeChan/Struct/40 88.4ns ± 2% 82.5ns ± 2% -6.60% (p=0.000 n=10+10) AMD64: name old time/op new time/op delta MakeChan/Byte 83.4ns ± 8% 80.8ns ± 3% ~ (p=0.171 n=10+10) MakeChan/Int 101ns ± 3% 101ns ± 2% ~ (p=0.412 n=10+10) MakeChan/Ptr 128ns ± 1% 128ns ± 1% ~ (p=0.191 n=10+10) MakeChan/Struct/0 67.6ns ± 3% 68.7ns ± 4% ~ (p=0.224 n=10+10) MakeChan/Struct/32 138ns ± 1% 139ns ± 1% ~ (p=0.185 n=10+9) MakeChan/Struct/40 154ns ± 1% 154ns ± 1% -0.55% (p=0.027 n=10+9) Change-Id: Ie854cb066007232c5e9f71ea7d6fe27e81a9c050 Reviewed-on: https://go-review.googlesource.com/55140 Run-TryBot: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-08-13 20:03:02 +02:00
// When size fits into int, use makechan instead of
// makechan64, which is faster and shorter on 32 bit platforms.
size := n.Left
fnname := "makechan64"
argtype := types.Types[TINT64]
// Type checking guarantees that TIDEAL size is positive and fits in an int.
// The case of size overflow when converting TUINT or TUINTPTR to TINT
// will be handled by the negative range checks in makechan during runtime.
if size.Type.IsKind(TIDEAL) || maxintval[size.Type.Etype].Cmp(maxintval[TUINT]) <= 0 {
fnname = "makechan"
argtype = types.Types[TINT]
}
n = mkcall1(chanfn(fnname, 1, n.Type), n.Type, init, typename(n.Type), conv(size, argtype))
case OMAKEMAP:
t := n.Type
hmapType := hmap(t)
hint := n.Left
// var h *hmap
var h *Node
if n.Esc == EscNone {
// Allocate hmap on stack.
// var hv hmap
hv := temp(hmapType)
zero := nod(OAS, hv, nil)
zero = typecheck(zero, Etop)
init.Append(zero)
// h = &hv
h = nod(OADDR, hv, nil)
// Allocate one bucket pointed to by hmap.buckets on stack if hint
// is not larger than BUCKETSIZE. In case hint is larger than
// BUCKETSIZE runtime.makemap will allocate the buckets on the heap.
// Maximum key and value size is 128 bytes, larger objects
// are stored with an indirection. So max bucket size is 2048+eps.
if !Isconst(hint, CTINT) ||
!(hint.Val().U.(*Mpint).CmpInt64(BUCKETSIZE) > 0) {
// var bv bmap
bv := temp(bmap(t))
zero = nod(OAS, bv, nil)
zero = typecheck(zero, Etop)
init.Append(zero)
// b = &bv
b := nod(OADDR, bv, nil)
// h.buckets = b
bsym := hmapType.Field(5).Sym // hmap.buckets see reflect.go:hmap
na := nod(OAS, nodSym(ODOT, h, bsym), b)
na = typecheck(na, Etop)
init.Append(na)
}
} else {
// h = nil
h = nodnil()
}
// When hint fits into int, use makemap instead of
// makemap64, which is faster and shorter on 32 bit platforms.
fnname := "makemap64"
argtype := types.Types[TINT64]
// Type checking guarantees that TIDEAL hint is positive and fits in an int.
// See checkmake call in TMAP case of OMAKE case in OpSwitch in typecheck1 function.
// The case of hint overflow when converting TUINT or TUINTPTR to TINT
// will be handled by the negative range checks in makemap during runtime.
if hint.Type.IsKind(TIDEAL) || maxintval[hint.Type.Etype].Cmp(maxintval[TUINT]) <= 0 {
fnname = "makemap"
argtype = types.Types[TINT]
}
fn := syslook(fnname)
fn = substArgTypes(fn, hmapType, t.Key(), t.Val())
n = mkcall1(fn, n.Type, init, typename(n.Type), conv(hint, argtype), h)
case OMAKESLICE:
l := n.Left
r := n.Right
if r == nil {
r = safeexpr(l, init)
l = r
}
t := n.Type
if n.Esc == EscNone {
if !isSmallMakeSlice(n) {
Fatalf("non-small OMAKESLICE with EscNone: %v", n)
}
// var arr [r]T
// n = arr[:l]
t = types.NewArray(t.Elem(), nonnegintconst(r)) // [r]T
var_ := temp(t)
a := nod(OAS, var_, nil) // zero temp
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
init.Append(a)
r := nod(OSLICE, var_, nil) // arr[:l]
r.SetSliceBounds(nil, l, nil)
r = conv(r, n.Type) // in case n.Type is named.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
r = walkexpr(r, init)
n = r
} else {
// n escapes; set up a call to makeslice.
// When len and cap can fit into int, use makeslice instead of
// makeslice64, which is faster and shorter on 32 bit platforms.
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if t.Elem().NotInHeap() {
cmd/compile: add go:notinheap type pragma This adds a //go:notinheap pragma for declarations of types that must not be heap allocated. We ensure these rules by disallowing new(T), make([]T), append([]T), or implicit allocation of T, by disallowing conversions to notinheap types, and by propagating notinheap to any struct or array that contains notinheap elements. The utility of this pragma is that we can eliminate write barriers for writes to pointers to go:notinheap types, since the write barrier is guaranteed to be a no-op. This will let us mark several scheduler and memory allocator structures as go:notinheap, which will let us disallow write barriers in the scheduler and memory allocator much more thoroughly and also eliminate some problematic hybrid write barriers. This also makes go:nowritebarrierrec and go:yeswritebarrierrec much more powerful. Currently we use go:nowritebarrier all over the place, but it's almost never what you actually want: when write barriers are illegal, they're typically illegal for a whole dynamic scope. Partly this is because go:nowritebarrier has been around longer, but it's also because go:nowritebarrierrec couldn't be used in situations that had no-op write barriers or where some nested scope did allow write barriers. go:notinheap eliminates many no-op write barriers and go:yeswritebarrierrec makes it possible to opt back in to write barriers, so these two changes will let us use go:nowritebarrierrec far more liberally. This updates #13386, which is about controlling pointers from non-GC'd memory to GC'd memory. That would require some additional pragma (or pragmas), but could build on this pragma. Change-Id: I6314f8f4181535dd166887c9ec239977b54940bd Reviewed-on: https://go-review.googlesource.com/30939 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-10-11 22:53:27 -04:00
yyerror("%v is go:notinheap; heap allocation disallowed", t.Elem())
}
len, cap := l, r
fnname := "makeslice64"
argtype := types.Types[TINT64]
// Type checking guarantees that TIDEAL len/cap are positive and fit in an int.
// The case of len or cap overflow when converting TUINT or TUINTPTR to TINT
// will be handled by the negative range checks in makeslice during runtime.
if (len.Type.IsKind(TIDEAL) || maxintval[len.Type.Etype].Cmp(maxintval[TUINT]) <= 0) &&
(cap.Type.IsKind(TIDEAL) || maxintval[cap.Type.Etype].Cmp(maxintval[TUINT]) <= 0) {
fnname = "makeslice"
argtype = types.Types[TINT]
}
fn := syslook(fnname)
fn = substArgTypes(fn, t.Elem()) // any-1
n = mkcall1(fn, t, init, typename(t.Elem()), conv(len, argtype), conv(cap, argtype))
}
case ORUNESTR:
a := nodnil()
if n.Esc == EscNone {
t := types.NewArray(types.Types[TUINT8], 4)
var_ := temp(t)
a = nod(OADDR, var_, nil)
}
// intstring(*[4]byte, rune)
n = mkcall("intstring", n.Type, init, a, conv(n.Left, types.Types[TINT64]))
case OARRAYBYTESTR:
a := nodnil()
if n.Esc == EscNone {
// Create temporary buffer for string on stack.
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
a = nod(OADDR, temp(t), nil)
}
// slicebytetostring(*[32]byte, []byte) string;
n = mkcall("slicebytetostring", n.Type, init, a, n.Left)
// slicebytetostringtmp([]byte) string;
case OARRAYBYTESTRTMP:
n.Left = walkexpr(n.Left, init)
if !instrumenting {
// Let the backend handle OARRAYBYTESTRTMP directly
// to avoid a function call to slicebytetostringtmp.
break
}
n = mkcall("slicebytetostringtmp", n.Type, init, n.Left)
// slicerunetostring(*[32]byte, []rune) string;
case OARRAYRUNESTR:
a := nodnil()
if n.Esc == EscNone {
// Create temporary buffer for string on stack.
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
a = nod(OADDR, temp(t), nil)
}
n = mkcall("slicerunetostring", n.Type, init, a, n.Left)
// stringtoslicebyte(*32[byte], string) []byte;
case OSTRARRAYBYTE:
a := nodnil()
if n.Esc == EscNone {
// Create temporary buffer for slice on stack.
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
a = nod(OADDR, temp(t), nil)
}
n = mkcall("stringtoslicebyte", n.Type, init, a, conv(n.Left, types.Types[TSTRING]))
case OSTRARRAYBYTETMP:
// []byte(string) conversion that creates a slice
// referring to the actual string bytes.
// This conversion is handled later by the backend and
// is only for use by internal compiler optimizations
// that know that the slice won't be mutated.
// The only such case today is:
// for i, c := range []byte(string)
n.Left = walkexpr(n.Left, init)
// stringtoslicerune(*[32]rune, string) []rune
case OSTRARRAYRUNE:
a := nodnil()
if n.Esc == EscNone {
// Create temporary buffer for slice on stack.
t := types.NewArray(types.Types[TINT32], tmpstringbufsize)
a = nod(OADDR, temp(t), nil)
}
n = mkcall("stringtoslicerune", n.Type, init, a, n.Left)
// ifaceeq(i1 any-1, i2 any-2) (ret bool);
case OCMPIFACE:
if !eqtype(n.Left.Type, n.Right.Type) {
Fatalf("ifaceeq %v %v %v", n.Op, n.Left.Type, n.Right.Type)
}
var fn *Node
if n.Left.Type.IsEmptyInterface() {
fn = syslook("efaceeq")
} else {
fn = syslook("ifaceeq")
}
n.Right = cheapexpr(n.Right, init)
n.Left = cheapexpr(n.Left, init)
lt := nod(OITAB, n.Left, nil)
rt := nod(OITAB, n.Right, nil)
ld := nod(OIDATA, n.Left, nil)
rd := nod(OIDATA, n.Right, nil)
ld.Type = types.Types[TUNSAFEPTR]
rd.Type = types.Types[TUNSAFEPTR]
ld.SetTypecheck(1)
rd.SetTypecheck(1)
call := mkcall1(fn, n.Type, init, lt, ld, rd)
// Check itable/type before full compare.
// Note: short-circuited because order matters.
// TODO(marvin): Fix Node.EType type union.
var cmp *Node
if Op(n.Etype) == OEQ {
cmp = nod(OANDAND, nod(OEQ, lt, rt), call)
} else {
cmp = nod(OOROR, nod(ONE, lt, rt), nod(ONOT, call, nil))
}
cmp = typecheck(cmp, Erv)
cmp = walkexpr(cmp, init)
cmp.Type = n.Type
n = cmp
case OARRAYLIT, OSLICELIT, OMAPLIT, OSTRUCTLIT, OPTRLIT:
if isStaticCompositeLiteral(n) && !canSSAType(n.Type) {
// n can be directly represented in the read-only data section.
// Make direct reference to the static data. See issue 12841.
vstat := staticname(n.Type)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
vstat.Name.SetReadonly(true)
fixedlit(inInitFunction, initKindStatic, n, vstat, init)
n = vstat
n = typecheck(n, Erv)
break
}
var_ := temp(n.Type)
anylit(n, var_, init)
n = var_
case OSEND:
n1 := n.Right
n1 = assignconv(n1, n.Left.Type.Elem(), "chan send")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n1 = walkexpr(n1, init)
n1 = nod(OADDR, n1, nil)
n = mkcall1(chanfn("chansend1", 2, n.Left.Type), nil, init, n.Left, n1)
case OCLOSURE:
n = walkclosure(n, init)
case OCALLPART:
n = walkpartialcall(n, init)
}
// Expressions that are constant at run time but not
// considered const by the language spec are not turned into
// constants until walk. For example, if n is y%1 == 0, the
// walk of y%1 may have replaced it by 0.
// Check whether n with its updated args is itself now a constant.
t := n.Type
evconst(n)
if n.Type != t {
Fatalf("evconst changed Type: %v had type %v, now %v", n, t, n.Type)
}
if n.Op == OLITERAL {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = typecheck(n, Erv)
// Emit string symbol now to avoid emitting
// any concurrently during the backend.
if s, ok := n.Val().U.(string); ok {
_ = stringsym(s)
}
}
updateHasCall(n)
if Debug['w'] != 0 && n != nil {
Dump("walk", n)
}
lineno = lno
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
// TODO(josharian): combine this with its caller and simplify
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
func reduceSlice(n *Node) *Node {
low, high, max := n.SliceBounds()
if high != nil && high.Op == OLEN && samesafeexpr(n.Left, high.Left) {
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
// Reduce x[i:len(x)] to x[i:].
high = nil
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
}
n.SetSliceBounds(low, high, max)
if (n.Op == OSLICE || n.Op == OSLICESTR) && low == nil && high == nil {
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
// Reduce x[:] to x.
if Debug_slice > 0 {
Warn("slice: omit slice operation")
}
return n.Left
}
return n
}
func ascompatee1(l *Node, r *Node, init *Nodes) *Node {
// convas will turn map assigns into function calls,
// making it impossible for reorder3 to work.
n := nod(OAS, l, r)
if l.Op == OINDEXMAP {
return n
}
return convas(n, init)
}
func ascompatee(op Op, nl, nr []*Node, init *Nodes) []*Node {
// check assign expression list to
// an expression list. called in
// expr-list = expr-list
// ensure order of evaluation for function calls
for i := range nl {
nl[i] = safeexpr(nl[i], init)
}
for i1 := range nr {
nr[i1] = safeexpr(nr[i1], init)
}
var nn []*Node
i := 0
for ; i < len(nl); i++ {
if i >= len(nr) {
break
}
// Do not generate 'x = x' during return. See issue 4014.
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if op == ORETURN && samesafeexpr(nl[i], nr[i]) {
continue
}
nn = append(nn, ascompatee1(nl[i], nr[i], init))
}
// cannot happen: caller checked that lists had same length
if i < len(nl) || i < len(nr) {
var nln, nrn Nodes
nln.Set(nl)
nrn.Set(nr)
Fatalf("error in shape across %+v %v %+v / %d %d [%s]", nln, op, nrn, len(nl), len(nr), Curfn.funcname())
}
return nn
}
// l is an lv and rt is the type of an rv
// return 1 if this implies a function call
// evaluating the lv or a function call
// in the conversion of the types
func fncall(l *Node, rt *types.Type) bool {
if l.HasCall() || l.Op == OINDEXMAP {
return true
}
if eqtype(l.Type, rt) {
return false
}
return true
}
// check assign type list to
// an expression list. called in
// expr-list = func()
func ascompatet(nl Nodes, nr *types.Type) []*Node {
if nl.Len() != nr.NumFields() {
Fatalf("ascompatet: assignment count mismatch: %d = %d", nl.Len(), nr.NumFields())
}
var nn, mm Nodes
for i, l := range nl.Slice() {
if isblank(l) {
continue
}
r := nr.Field(i)
// any lv that causes a fn call must be
// deferred until all the return arguments
// have been pulled from the output arguments
if fncall(l, r.Type) {
tmp := temp(r.Type)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
tmp = typecheck(tmp, Erv)
a := nod(OAS, l, tmp)
a = convas(a, &mm)
mm.Append(a)
l = tmp
}
a := nod(OAS, l, nodarg(r, 0))
a = convas(a, &nn)
updateHasCall(a)
if a.HasCall() {
Dump("ascompatet ucount", a)
Fatalf("ascompatet: too many function calls evaluating parameters")
}
nn.Append(a)
}
return append(nn.Slice(), mm.Slice()...)
}
// nodarg returns a Node for the function argument denoted by t,
// which is either the entire function argument or result struct (t is a struct *types.Type)
// or a specific argument (t is a *types.Field within a struct *types.Type).
//
// If fp is 0, the node is for use by a caller invoking the given
// function, preparing the arguments before the call
// or retrieving the results after the call.
// In this case, the node will correspond to an outgoing argument
// slot like 8(SP).
//
// If fp is 1, the node is for use by the function itself
// (the callee), to retrieve its arguments or write its results.
// In this case the node will be an ONAME with an appropriate
// type and offset.
func nodarg(t interface{}, fp int) *Node {
var n *Node
var funarg types.Funarg
switch t := t.(type) {
default:
Fatalf("bad nodarg %T(%v)", t, t)
case *types.Type:
// Entire argument struct, not just one arg
if !t.IsFuncArgStruct() {
Fatalf("nodarg: bad type %v", t)
}
funarg = t.StructType().Funarg
// Build fake variable name for whole arg struct.
n = newname(lookup(".args"))
n.Type = t
first := t.Field(0)
if first == nil {
Fatalf("nodarg: bad struct")
}
if first.Offset == BADWIDTH {
Fatalf("nodarg: offset not computed for %v", t)
}
n.Xoffset = first.Offset
case *types.Field:
funarg = t.Funarg
if fp == 1 {
// NOTE(rsc): This should be using t.Nname directly,
// except in the case where t.Nname.Sym is the blank symbol and
// so the assignment would be discarded during code generation.
// In that case we need to make a new node, and there is no harm
// in optimization passes to doing so. But otherwise we should
// definitely be using the actual declaration and not a newly built node.
// The extra Fatalf checks here are verifying that this is the case,
// without changing the actual logic (at time of writing, it's getting
// toward time for the Go 1.7 beta).
// At some quieter time (assuming we've never seen these Fatalfs happen)
// we could change this code to use "expect" directly.
expect := asNode(t.Nname)
if expect.isParamHeapCopy() {
expect = expect.Name.Param.Stackcopy
}
for _, n := range Curfn.Func.Dcl {
if (n.Class() == PPARAM || n.Class() == PPARAMOUT) && !t.Sym.IsBlank() && n.Sym == t.Sym {
if n != expect {
Fatalf("nodarg: unexpected node: %v (%p %v) vs %v (%p %v)", n, n, n.Op, asNode(t.Nname), asNode(t.Nname), asNode(t.Nname).Op)
}
return n
}
}
if !expect.Sym.IsBlank() {
Fatalf("nodarg: did not find node in dcl list: %v", expect)
}
}
// Build fake name for individual variable.
// This is safe because if there was a real declared name
// we'd have used it above.
n = newname(lookup("__"))
n.Type = t.Type
if t.Offset == BADWIDTH {
Fatalf("nodarg: offset not computed for %v", t)
}
n.Xoffset = t.Offset
n.Orig = asNode(t.Nname)
}
// Rewrite argument named _ to __,
// or else the assignment to _ will be
// discarded during code generation.
if isblank(n) {
n.Sym = lookup("__")
}
switch fp {
default:
Fatalf("bad fp")
case 0: // preparing arguments for call
n.Op = OINDREGSP
n.Xoffset += Ctxt.FixedFrameSize()
case 1: // reading arguments inside call
n.SetClass(PPARAM)
if funarg == types.FunargResults {
n.SetClass(PPARAMOUT)
}
}
n.SetTypecheck(1)
n.SetAddrtaken(true) // keep optimizers at bay
return n
}
// package all the arguments that match a ... T parameter into a []T.
func mkdotargslice(typ *types.Type, args []*Node, init *Nodes, ddd *Node) *Node {
cmd/internal/gc: improve flow of input params to output params This includes the following information in the per-function summary: outK = paramJ encoded in outK bits for paramJ outK = *paramJ encoded in outK bits for paramJ heap = paramJ EscHeap heap = *paramJ EscContentEscapes Note that (currently) if the address of a parameter is taken and returned, necessarily a heap allocation occurred to contain that reference, and the heap can never refer to stack, therefore the parameter and everything downstream from it escapes to the heap. The per-function summary information now has a tuneable number of bits (2 is probably noticeably better than 1, 3 is likely overkill, but it is now easy to check and the -m debugging output includes information that allows you to figure out if more would be better.) A new test was added to check pointer flow through struct-typed and *struct-typed parameters and returns; some of these are sensitive to the number of summary bits, and ought to yield better results with a more competent escape analysis algorithm. Another new test checks (some) correctness with array parameters, results, and operations. The old analysis inferred a piece of plan9 runtime was non-escaping by counteracting overconservative analysis with buggy analysis; with the bug fixed, the result was too conservative (and it's not easy to fix in this framework) so the source code was tweaked to get the desired result. A test was added against the discovered bug. The escape analysis was further improved splitting the "level" into 3 parts, one tracking the conventional "level" and the other two computing the highest-level-suffix-from-copy, which is used to generally model the cancelling effect of indirection applied to address-of. With the improved escape analysis enabled, it was necessary to modify one of the runtime tests because it now attempts to allocate too much on the (small, fixed-size) G0 (system) stack and this failed the test. Compiling src/std after touching src/runtime/*.go with -m logging turned on shows 420 fewer heap allocation sites (10538 vs 10968). Profiling allocations in src/html/template with for i in {1..5} ; do go tool 6g -memprofile=mastx.${i}.prof -memprofilerate=1 *.go; go tool pprof -alloc_objects -text mastx.${i}.prof ; done showed a 15% reduction in allocations performed by the compiler. Update #3753 Update #4720 Fixes #10466 Change-Id: I0fd97d5f5ac527b45f49e2218d158a6e89951432 Reviewed-on: https://go-review.googlesource.com/8202 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-26 16:36:15 -04:00
esc := uint16(EscUnknown)
if ddd != nil {
esc = ddd.Esc
}
if len(args) == 0 {
n := nodnil()
n.Type = typ
return n
}
n := nod(OCOMPLIT, nil, typenod(typ))
if ddd != nil && prealloc[ddd] != nil {
prealloc[n] = prealloc[ddd] // temporary to use
}
n.List.Set(args)
n.Esc = esc
n = typecheck(n, Erv)
if n.Type == nil {
Fatalf("mkdotargslice: typecheck failed")
}
n = walkexpr(n, init)
return n
}
// check assign expression list to
// a type list. called in
// return expr-list
// func(expr-list)
func ascompatte(call *Node, isddd bool, lhs *types.Type, rhs []*Node, fp int, init *Nodes) []*Node {
// f(g()) where g has multiple return values
if len(rhs) == 1 && rhs[0].Type.IsFuncArgStruct() {
// optimization - can do block copy
if eqtypenoname(rhs[0].Type, lhs) {
nl := nodarg(lhs, fp)
nr := nod(OCONVNOP, rhs[0], nil)
nr.Type = nl.Type
n := convas(nod(OAS, nl, nr), init)
n.SetTypecheck(1)
return []*Node{n}
}
// conversions involved.
// copy into temporaries.
var tmps []*Node
for _, nr := range rhs[0].Type.FieldSlice() {
tmps = append(tmps, temp(nr.Type))
}
a := nod(OAS2, nil, nil)
a.List.Set(tmps)
a.Rlist.Set(rhs)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
a = walkstmt(a)
init.Append(a)
rhs = tmps
}
// For each parameter (LHS), assign its corresponding argument (RHS).
// If there's a ... parameter (which is only valid as the final
// parameter) and this is not a ... call expression,
// then assign the remaining arguments as a slice.
var nn []*Node
for i, nl := range lhs.FieldSlice() {
var nr *Node
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if nl.Isddd() && !isddd {
nr = mkdotargslice(nl.Type, rhs[i:], init, call.Right)
} else {
nr = rhs[i]
}
a := nod(OAS, nodarg(nl, fp), nr)
a = convas(a, init)
a.SetTypecheck(1)
nn = append(nn, a)
}
return nn
}
// generate code for print
func walkprint(nn *Node, init *Nodes) *Node {
// Hoist all the argument evaluation up before the lock.
walkexprlistcheap(nn.List.Slice(), init)
// For println, add " " between elements and "\n" at the end.
if nn.Op == OPRINTN {
s := nn.List.Slice()
t := make([]*Node, 0, len(s)*2)
for i, n := range s {
if i != 0 {
t = append(t, nodstr(" "))
}
t = append(t, n)
}
t = append(t, nodstr("\n"))
nn.List.Set(t)
}
// Collapse runs of constant strings.
s := nn.List.Slice()
t := make([]*Node, 0, len(s))
for i := 0; i < len(s); {
var strs []string
for i < len(s) && Isconst(s[i], CTSTR) {
strs = append(strs, s[i].Val().U.(string))
i++
}
if len(strs) > 0 {
t = append(t, nodstr(strings.Join(strs, "")))
}
if i < len(s) {
t = append(t, s[i])
i++
}
}
nn.List.Set(t)
calls := []*Node{mkcall("printlock", nil, init)}
for i, n := range nn.List.Slice() {
if n.Op == OLITERAL {
switch n.Val().Ctype() {
case CTRUNE:
n = defaultlit(n, types.Runetype)
case CTINT:
n = defaultlit(n, types.Types[TINT64])
case CTFLT:
n = defaultlit(n, types.Types[TFLOAT64])
}
}
if n.Op != OLITERAL && n.Type != nil && n.Type.Etype == TIDEAL {
n = defaultlit(n, types.Types[TINT64])
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = defaultlit(n, nil)
nn.List.SetIndex(i, n)
if n.Type == nil || n.Type.Etype == TFORW {
continue
}
var on *Node
switch n.Type.Etype {
case TINTER:
if n.Type.IsEmptyInterface() {
on = syslook("printeface")
} else {
on = syslook("printiface")
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
on = substArgTypes(on, n.Type) // any-1
case TPTR32, TPTR64, TCHAN, TMAP, TFUNC, TUNSAFEPTR:
on = syslook("printpointer")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
on = substArgTypes(on, n.Type) // any-1
case TSLICE:
on = syslook("printslice")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
on = substArgTypes(on, n.Type) // any-1
case TUINT, TUINT8, TUINT16, TUINT32, TUINT64, TUINTPTR:
if isRuntimePkg(n.Type.Sym.Pkg) && n.Type.Sym.Name == "hex" {
on = syslook("printhex")
} else {
on = syslook("printuint")
}
case TINT, TINT8, TINT16, TINT32, TINT64:
on = syslook("printint")
case TFLOAT32, TFLOAT64:
on = syslook("printfloat")
case TCOMPLEX64, TCOMPLEX128:
on = syslook("printcomplex")
case TBOOL:
on = syslook("printbool")
case TSTRING:
cs := ""
if Isconst(n, CTSTR) {
cs = n.Val().U.(string)
}
switch cs {
case " ":
on = syslook("printsp")
case "\n":
on = syslook("printnl")
default:
on = syslook("printstring")
}
default:
badtype(OPRINT, n.Type, nil)
continue
}
r := nod(OCALL, on, nil)
if params := on.Type.Params().FieldSlice(); len(params) > 0 {
t := params[0].Type
if !eqtype(t, n.Type) {
n = nod(OCONV, n, nil)
n.Type = t
}
r.List.Append(n)
}
calls = append(calls, r)
}
calls = append(calls, mkcall("printunlock", nil, init))
typecheckslice(calls, Etop)
walkexprlist(calls, init)
r := nod(OEMPTY, nil, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Etop)
r = walkexpr(r, init)
r.Ninit.Set(calls)
return r
}
func callnew(t *types.Type) *Node {
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if t.NotInHeap() {
cmd/compile: add go:notinheap type pragma This adds a //go:notinheap pragma for declarations of types that must not be heap allocated. We ensure these rules by disallowing new(T), make([]T), append([]T), or implicit allocation of T, by disallowing conversions to notinheap types, and by propagating notinheap to any struct or array that contains notinheap elements. The utility of this pragma is that we can eliminate write barriers for writes to pointers to go:notinheap types, since the write barrier is guaranteed to be a no-op. This will let us mark several scheduler and memory allocator structures as go:notinheap, which will let us disallow write barriers in the scheduler and memory allocator much more thoroughly and also eliminate some problematic hybrid write barriers. This also makes go:nowritebarrierrec and go:yeswritebarrierrec much more powerful. Currently we use go:nowritebarrier all over the place, but it's almost never what you actually want: when write barriers are illegal, they're typically illegal for a whole dynamic scope. Partly this is because go:nowritebarrier has been around longer, but it's also because go:nowritebarrierrec couldn't be used in situations that had no-op write barriers or where some nested scope did allow write barriers. go:notinheap eliminates many no-op write barriers and go:yeswritebarrierrec makes it possible to opt back in to write barriers, so these two changes will let us use go:nowritebarrierrec far more liberally. This updates #13386, which is about controlling pointers from non-GC'd memory to GC'd memory. That would require some additional pragma (or pragmas), but could build on this pragma. Change-Id: I6314f8f4181535dd166887c9ec239977b54940bd Reviewed-on: https://go-review.googlesource.com/30939 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-10-11 22:53:27 -04:00
yyerror("%v is go:notinheap; heap allocation disallowed", t)
}
dowidth(t)
fn := syslook("newobject")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = substArgTypes(fn, t)
v := mkcall1(fn, types.NewPtr(t), nil, typename(t))
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
v.SetNonNil(true)
return v
}
func iscallret(n *Node) bool {
n = outervalue(n)
return n.Op == OINDREGSP
}
func isstack(n *Node) bool {
n = outervalue(n)
// If n is *autotmp and autotmp = &foo, replace n with foo.
// We introduce such temps when initializing struct literals.
if n.Op == OIND && n.Left.Op == ONAME && n.Left.IsAutoTmp() {
defn := n.Left.Name.Defn
if defn != nil && defn.Op == OAS && defn.Right.Op == OADDR {
n = defn.Right.Left
}
}
switch n.Op {
case OINDREGSP:
return true
case ONAME:
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch n.Class() {
case PAUTO, PPARAM, PPARAMOUT:
return true
}
}
return false
}
// isReflectHeaderDataField reports whether l is an expression p.Data
// where p has type reflect.SliceHeader or reflect.StringHeader.
func isReflectHeaderDataField(l *Node) bool {
if l.Type != types.Types[TUINTPTR] {
return false
}
var tsym *types.Sym
switch l.Op {
case ODOT:
tsym = l.Left.Type.Sym
case ODOTPTR:
tsym = l.Left.Type.Elem().Sym
default:
return false
}
if tsym == nil || l.Sym.Name != "Data" || tsym.Pkg.Path != "reflect" {
return false
}
return tsym.Name == "SliceHeader" || tsym.Name == "StringHeader"
}
func convas(n *Node, init *Nodes) *Node {
if n.Op != OAS {
Fatalf("convas: not OAS %v", n.Op)
}
n.SetTypecheck(1)
var lt *types.Type
var rt *types.Type
if n.Left == nil || n.Right == nil {
goto out
}
lt = n.Left.Type
rt = n.Right.Type
if lt == nil || rt == nil {
goto out
}
if isblank(n.Left) {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = defaultlit(n.Right, nil)
goto out
}
if !eqtype(lt, rt) {
n.Right = assignconv(n.Right, lt, "assignment")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Right = walkexpr(n.Right, init)
}
dowidth(n.Right.Type)
out:
updateHasCall(n)
return n
}
// from ascompat[te]
// evaluating actual function arguments.
// f(a,b)
// if there is exactly one function expr,
// then it is done first. otherwise must
// make temp variables
func reorder1(all []*Node) []*Node {
if len(all) == 1 {
return all
}
funcCalls := 0
for _, n := range all {
updateHasCall(n)
if n.HasCall() {
funcCalls++
}
}
if funcCalls == 0 {
return all
}
var g []*Node // fncalls assigned to tempnames
var f *Node // last fncall assigned to stack
var r []*Node // non fncalls and tempnames assigned to stack
d := 0
for _, n := range all {
if !n.HasCall() {
r = append(r, n)
continue
}
d++
if d == funcCalls {
f = n
continue
}
// make assignment of fncall to tempname
a := temp(n.Right.Type)
a = nod(OAS, a, n.Right)
g = append(g, a)
// put normal arg assignment on list
// with fncall replaced by tempname
n.Right = a.Left
r = append(r, n)
}
if f != nil {
g = append(g, f)
}
return append(g, r...)
}
// from ascompat[ee]
// a,b = c,d
// simultaneous assignment. there cannot
// be later use of an earlier lvalue.
//
// function calls have been removed.
func reorder3(all []*Node) []*Node {
// If a needed expression may be affected by an
// earlier assignment, make an early copy of that
// expression and use the copy instead.
var early []*Node
var mapinit Nodes
for i, n := range all {
l := n.Left
// Save subexpressions needed on left side.
// Drill through non-dereferences.
for {
if l.Op == ODOT || l.Op == OPAREN {
l = l.Left
continue
}
if l.Op == OINDEX && l.Left.Type.IsArray() {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
l.Right = reorder3save(l.Right, all, i, &early)
l = l.Left
continue
}
break
}
switch l.Op {
default:
Fatalf("reorder3 unexpected lvalue %#v", l.Op)
case ONAME:
break
case OINDEX, OINDEXMAP:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
l.Left = reorder3save(l.Left, all, i, &early)
l.Right = reorder3save(l.Right, all, i, &early)
if l.Op == OINDEXMAP {
all[i] = convas(all[i], &mapinit)
}
case OIND, ODOTPTR:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
l.Left = reorder3save(l.Left, all, i, &early)
}
// Save expression on right side.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
all[i].Right = reorder3save(all[i].Right, all, i, &early)
}
early = append(mapinit.Slice(), early...)
return append(early, all...)
}
// if the evaluation of *np would be affected by the
// assignments in all up to but not including the ith assignment,
// copy into a temporary during *early and
// replace *np with that temp.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of reorder3save MUST be assigned back to n, e.g.
// n.Left = reorder3save(n.Left, all, i, early)
func reorder3save(n *Node, all []*Node, i int, early *[]*Node) *Node {
if !aliased(n, all, i) {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
q := temp(n.Type)
q = nod(OAS, q, n)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
q = typecheck(q, Etop)
*early = append(*early, q)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return q.Left
}
// what's the outer value that a write to n affects?
// outer value means containing struct or array.
func outervalue(n *Node) *Node {
for {
if n.Op == OXDOT {
Fatalf("OXDOT in walk")
}
if n.Op == ODOT || n.Op == OPAREN || n.Op == OCONVNOP {
n = n.Left
continue
}
if n.Op == OINDEX && n.Left.Type != nil && n.Left.Type.IsArray() {
n = n.Left
continue
}
break
}
return n
}
// Is it possible that the computation of n might be
// affected by writes in as up to but not including the ith element?
func aliased(n *Node, all []*Node, i int) bool {
if n == nil {
return false
}
// Treat all fields of a struct as referring to the whole struct.
// We could do better but we would have to keep track of the fields.
for n.Op == ODOT {
n = n.Left
}
// Look for obvious aliasing: a variable being assigned
// during the all list and appearing in n.
// Also record whether there are any writes to main memory.
// Also record whether there are any writes to variables
// whose addresses have been taken.
memwrite := 0
varwrite := 0
for _, an := range all[:i] {
a := outervalue(an.Left)
for a.Op == ODOT {
a = a.Left
}
if a.Op != ONAME {
memwrite = 1
continue
}
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch n.Class() {
default:
varwrite = 1
continue
case PAUTO, PPARAM, PPARAMOUT:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if n.Addrtaken() {
varwrite = 1
continue
}
if vmatch2(a, n) {
// Direct hit.
return true
}
}
}
// The variables being written do not appear in n.
// However, n might refer to computed addresses
// that are being written.
// If no computed addresses are affected by the writes, no aliasing.
if memwrite == 0 && varwrite == 0 {
return false
}
// If n does not refer to computed addresses
// (that is, if n only refers to variables whose addresses
// have not been taken), no aliasing.
if varexpr(n) {
return false
}
// Otherwise, both the writes and n refer to computed memory addresses.
// Assume that they might conflict.
return true
}
// does the evaluation of n only refer to variables
// whose addresses have not been taken?
// (and no other memory)
func varexpr(n *Node) bool {
if n == nil {
return true
}
switch n.Op {
case OLITERAL:
return true
case ONAME:
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch n.Class() {
case PAUTO, PPARAM, PPARAMOUT:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if !n.Addrtaken() {
return true
}
}
return false
case OADD,
OSUB,
OOR,
OXOR,
OMUL,
ODIV,
OMOD,
OLSH,
ORSH,
OAND,
OANDNOT,
OPLUS,
OMINUS,
OCOM,
OPAREN,
OANDAND,
OOROR,
OCONV,
OCONVNOP,
OCONVIFACE,
ODOTTYPE:
return varexpr(n.Left) && varexpr(n.Right)
case ODOT: // but not ODOTPTR
// Should have been handled in aliased.
Fatalf("varexpr unexpected ODOT")
}
// Be conservative.
return false
}
// is the name l mentioned in r?
func vmatch2(l *Node, r *Node) bool {
if r == nil {
return false
}
switch r.Op {
// match each right given left
case ONAME:
return l == r
case OLITERAL:
return false
}
if vmatch2(l, r.Left) {
return true
}
if vmatch2(l, r.Right) {
return true
}
for _, n := range r.List.Slice() {
if vmatch2(l, n) {
return true
}
}
return false
}
// is any name mentioned in l also mentioned in r?
// called by sinit.go
func vmatch1(l *Node, r *Node) bool {
// isolate all left sides
if l == nil || r == nil {
return false
}
switch l.Op {
case ONAME:
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
switch l.Class() {
case PPARAM, PAUTO:
break
default:
// assignment to non-stack variable must be
// delayed if right has function calls.
if r.HasCall() {
return true
}
}
return vmatch2(l, r)
case OLITERAL:
return false
}
if vmatch1(l.Left, r) {
return true
}
if vmatch1(l.Right, r) {
return true
}
for _, n := range l.List.Slice() {
if vmatch1(n, r) {
return true
}
}
return false
}
// paramstoheap returns code to allocate memory for heap-escaped parameters
// and to copy non-result parameters' values from the stack.
func paramstoheap(params *types.Type) []*Node {
var nn []*Node
for _, t := range params.Fields().Slice() {
v := asNode(t.Nname)
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if v != nil && v.Sym != nil && strings.HasPrefix(v.Sym.Name, "~r") { // unnamed result
v = nil
}
if v == nil {
continue
}
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if stackcopy := v.Name.Param.Stackcopy; stackcopy != nil {
nn = append(nn, walkstmt(nod(ODCL, v, nil)))
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if stackcopy.Class() == PPARAM {
nn = append(nn, walkstmt(typecheck(nod(OAS, v, stackcopy), Etop)))
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
}
}
}
return nn
}
// zeroResults zeros the return values at the start of the function.
// We need to do this very early in the function. Defer might stop a
// panic and show the return values as they exist at the time of
// panic. For precise stacks, the garbage collector assumes results
// are always live, so we need to zero them before any allocations,
// even allocations to move params/results to the heap.
// The generated code is added to Curfn's Enter list.
func zeroResults() {
lno := lineno
lineno = Curfn.Pos
for _, f := range Curfn.Type.Results().Fields().Slice() {
if v := asNode(f.Nname); v != nil && v.Name.Param.Heapaddr != nil {
// The local which points to the return value is the
// thing that needs zeroing. This is already handled
// by a Needzero annotation in plive.go:livenessepilogue.
continue
}
// Zero the stack location containing f.
Curfn.Func.Enter.Append(nod(OAS, nodarg(f, 1), nil))
}
lineno = lno
}
// returnsfromheap returns code to copy values for heap-escaped parameters
// back to the stack.
func returnsfromheap(params *types.Type) []*Node {
var nn []*Node
for _, t := range params.Fields().Slice() {
v := asNode(t.Nname)
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
if v == nil {
continue
}
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
if stackcopy := v.Name.Param.Stackcopy; stackcopy != nil && stackcopy.Class() == PPARAMOUT {
nn = append(nn, walkstmt(typecheck(nod(OAS, stackcopy, v), Etop)))
cmd/compile: fix liveness computation for heap-escaped parameters The liveness computation of parameters generally was never correct, but forcing all parameters to be live throughout the function covered up that problem. The new SSA back end is too clever: even though it currently keeps the parameter values live throughout the function, it may find optimizations that mean the current values are not written back to the original parameter stack slots immediately or ever (for example if a parameter is set to nil, SSA constant propagation may replace all later uses of the parameter with a constant nil, eliminating the need to write the nil value back to the stack slot), so the liveness code must now track the actual operations on the stack slots, exposing these problems. One small problem in the handling of arguments is that nodarg can return ONAME PPARAM nodes with adjusted offsets, so that there are actually multiple *Node pointers for the same parameter in the instruction stream. This might be possible to correct, but not in this CL. For now, we fix this by using n.Orig instead of n when considering PPARAM and PPARAMOUT nodes. The major problem in the handling of arguments is general confusion in the liveness code about the meaning of PPARAM|PHEAP and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP. The difference between these two is that when a local variable "moves" to the heap, it's really just allocated there to start with; in contrast, when an argument moves to the heap, the actual data has to be copied there from the stack at the beginning of the function, and when a result "moves" to the heap the value in the heap has to be copied back to the stack when the function returns This general confusion is also present in the SSA back end. The PHEAP bit worked decently when I first introduced it 7 years ago (!) in 391425ae. The back end did nothing sophisticated, and in particular there was no analysis at all: no escape analysis, no liveness analysis, and certainly no SSA back end. But the complications caused in the various downstream consumers suggest that this should be a detail kept mainly in the front end. This CL therefore eliminates both the PHEAP bit and even the idea of "heap variables" from the back ends. First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP variable classes with the single PAUTOHEAP, a pseudo-class indicating a variable maintained on the heap and available by indirecting a local variable kept on the stack (a plain PAUTO). Second, walkexpr replaces all references to PAUTOHEAP variables with indirections of the corresponding PAUTO variable. The back ends and the liveness code now just see plain indirected variables. This may actually produce better code, but the real goal here is to eliminate these little-used and somewhat suspect code paths in the back end analyses. The OPARAM node type goes away too. A followup CL will do the same to PPARAMREF. I'm not sure that the back ends (SSA in particular) are handling those right either, and with the framework established in this CL that change is trivial and the result clearly more correct. Fixes #15747. Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74 Reviewed-on: https://go-review.googlesource.com/23393 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
}
}
return nn
}
// heapmoves generates code to handle migrating heap-escaped parameters
// between the stack and the heap. The generated code is added to Curfn's
// Enter and Exit lists.
func heapmoves() {
lno := lineno
lineno = Curfn.Pos
nn := paramstoheap(Curfn.Type.Recvs())
nn = append(nn, paramstoheap(Curfn.Type.Params())...)
nn = append(nn, paramstoheap(Curfn.Type.Results())...)
Curfn.Func.Enter.Append(nn...)
lineno = Curfn.Func.Endlineno
Curfn.Func.Exit.Append(returnsfromheap(Curfn.Type.Results())...)
lineno = lno
}
func vmkcall(fn *Node, t *types.Type, init *Nodes, va []*Node) *Node {
if fn.Type == nil || fn.Type.Etype != TFUNC {
Fatalf("mkcall %v %v", fn, fn.Type)
}
n := fn.Type.NumParams()
if n != len(va) {
Fatalf("vmkcall %v needs %v args got %v", fn, n, len(va))
}
r := nod(OCALL, fn, nil)
r.List.Set(va)
if fn.Type.NumResults() > 0 {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv|Efnstruct)
} else {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Etop)
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = walkexpr(r, init)
r.Type = t
return r
}
func mkcall(name string, t *types.Type, init *Nodes, args ...*Node) *Node {
return vmkcall(syslook(name), t, init, args)
}
func mkcall1(fn *Node, t *types.Type, init *Nodes, args ...*Node) *Node {
return vmkcall(fn, t, init, args)
}
func conv(n *Node, t *types.Type) *Node {
if eqtype(n.Type, t) {
return n
}
n = nod(OCONV, n, nil)
n.Type = t
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = typecheck(n, Erv)
return n
}
// byteindex converts n, which is byte-sized, to a uint8.
// We cannot use conv, because we allow converting bool to uint8 here,
// which is forbidden in user code.
func byteindex(n *Node) *Node {
if eqtype(n.Type, types.Types[TUINT8]) {
return n
}
n = nod(OCONV, n, nil)
n.Type = types.Types[TUINT8]
n.SetTypecheck(1)
return n
}
func chanfn(name string, n int, t *types.Type) *Node {
if !t.IsChan() {
Fatalf("chanfn %v", t)
}
fn := syslook(name)
switch n {
default:
Fatalf("chanfn %d", n)
case 1:
fn = substArgTypes(fn, t.Elem())
case 2:
fn = substArgTypes(fn, t.Elem(), t.Elem())
}
return fn
}
func mapfn(name string, t *types.Type) *Node {
if !t.IsMap() {
Fatalf("mapfn %v", t)
}
fn := syslook(name)
fn = substArgTypes(fn, t.Key(), t.Val(), t.Key(), t.Val())
return fn
}
func mapfndel(name string, t *types.Type) *Node {
if !t.IsMap() {
Fatalf("mapfn %v", t)
}
fn := syslook(name)
fn = substArgTypes(fn, t.Key(), t.Val(), t.Key())
return fn
}
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
const (
mapslow = iota
mapfast32
mapfast64
mapfaststr
nmapfast
)
type mapnames [nmapfast]string
func mkmapnames(base string) mapnames {
return mapnames{base, base + "_fast32", base + "_fast64", base + "_faststr"}
}
var mapaccess1 = mkmapnames("mapaccess1")
var mapaccess2 = mkmapnames("mapaccess2")
var mapassign = mkmapnames("mapassign")
var mapdelete = mkmapnames("mapdelete")
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
func mapfast(t *types.Type) int {
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
// Check ../../runtime/hashmap.go:maxValueSize before changing.
if t.Val().Width > 128 {
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
return mapslow
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
}
switch algtype(t.Key()) {
case AMEM32:
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
return mapfast32
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
case AMEM64:
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
return mapfast64
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
case ASTRING:
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
return mapfaststr
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
}
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
return mapslow
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
}
func writebarrierfn(name string, l *types.Type, r *types.Type) *Node {
fn := syslook(name)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = substArgTypes(fn, l, r)
return fn
}
func addstr(n *Node, init *Nodes) *Node {
// orderexpr rewrote OADDSTR to have a list of strings.
c := n.List.Len()
if c < 2 {
Fatalf("addstr count %d too small", c)
}
buf := nodnil()
if n.Esc == EscNone {
sz := int64(0)
for _, n1 := range n.List.Slice() {
if n1.Op == OLITERAL {
sz += int64(len(n1.Val().U.(string)))
}
}
// Don't allocate the buffer if the result won't fit.
if sz < tmpstringbufsize {
// Create temporary buffer for result string on stack.
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
buf = nod(OADDR, temp(t), nil)
}
}
// build list of string arguments
args := []*Node{buf}
for _, n2 := range n.List.Slice() {
args = append(args, conv(n2, types.Types[TSTRING]))
}
var fn string
if c <= 5 {
// small numbers of strings use direct runtime helpers.
// note: orderexpr knows this cutoff too.
fn = fmt.Sprintf("concatstring%d", c)
} else {
// large numbers of strings are passed to the runtime as a slice.
fn = "concatstrings"
t := types.NewSlice(types.Types[TSTRING])
slice := nod(OCOMPLIT, nil, typenod(t))
if prealloc[n] != nil {
prealloc[slice] = prealloc[n]
}
slice.List.Set(args[1:]) // skip buf arg
args = []*Node{buf, slice}
slice.Esc = EscNone
}
cat := syslook(fn)
r := nod(OCALL, cat, nil)
r.List.Set(args)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
r = walkexpr(r, init)
r.Type = n.Type
return r
}
// expand append(l1, l2...) to
// init {
// s := l1
// n := len(s) + len(l2)
// // Compare as uint so growslice can panic on overflow.
// if uint(n) > uint(cap(s)) {
// s = growslice(s, n)
// }
// s = s[:n]
// memmove(&s[len(l1)], &l2[0], len(l2)*sizeof(T))
// }
// s
//
// l2 is allowed to be a string.
func appendslice(n *Node, init *Nodes) *Node {
walkexprlistsafe(n.List.Slice(), init)
// walkexprlistsafe will leave OINDEX (s[n]) alone if both s
// and n are name or literal, but those may index the slice we're
// modifying here. Fix explicitly.
ls := n.List.Slice()
for i1, n1 := range ls {
ls[i1] = cheapexpr(n1, init)
}
l1 := n.List.First()
l2 := n.List.Second()
var l []*Node
// var s []T
s := temp(l1.Type)
l = append(l, nod(OAS, s, l1)) // s = l1
// n := len(s) + len(l2)
nn := temp(types.Types[TINT])
l = append(l, nod(OAS, nn, nod(OADD, nod(OLEN, s, nil), nod(OLEN, l2, nil))))
// if uint(n) > uint(cap(s))
nif := nod(OIF, nil, nil)
nif.Left = nod(OGT, nod(OCONV, nn, nil), nod(OCONV, nod(OCAP, s, nil), nil))
nif.Left.Left.Type = types.Types[TUINT]
nif.Left.Right.Type = types.Types[TUINT]
// instantiate growslice(Type*, []any, int) []any
fn := syslook("growslice")
fn = substArgTypes(fn, s.Type.Elem(), s.Type.Elem())
// s = growslice(T, s, n)
nif.Nbody.Set1(nod(OAS, s, mkcall1(fn, s.Type, &nif.Ninit, typename(s.Type.Elem()), s, nn)))
l = append(l, nif)
// s = s[:n]
nt := nod(OSLICE, s, nil)
nt.SetSliceBounds(nil, nn, nil)
nt.Etype = 1
l = append(l, nod(OAS, s, nt))
if types.Haspointers(l1.Type.Elem()) {
// copy(s[len(l1):], l2)
nptr1 := nod(OSLICE, s, nil)
nptr1.SetSliceBounds(nod(OLEN, l1, nil), nil, nil)
nptr1.Etype = 1
nptr2 := l2
fn := syslook("typedslicecopy")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = substArgTypes(fn, l1.Type, l2.Type)
var ln Nodes
ln.Set(l)
nt := mkcall1(fn, types.Types[TINT], &ln, typename(l1.Type.Elem()), nptr1, nptr2)
l = append(ln.Slice(), nt)
} else if instrumenting && !compiling_runtime {
// rely on runtime to instrument copy.
// copy(s[len(l1):], l2)
nptr1 := nod(OSLICE, s, nil)
nptr1.SetSliceBounds(nod(OLEN, l1, nil), nil, nil)
nptr1.Etype = 1
nptr2 := l2
var ln Nodes
ln.Set(l)
var nt *Node
if l2.Type.IsString() {
fn := syslook("slicestringcopy")
fn = substArgTypes(fn, l1.Type, l2.Type)
nt = mkcall1(fn, types.Types[TINT], &ln, nptr1, nptr2)
} else {
fn := syslook("slicecopy")
fn = substArgTypes(fn, l1.Type, l2.Type)
nt = mkcall1(fn, types.Types[TINT], &ln, nptr1, nptr2, nodintconst(s.Type.Elem().Width))
}
l = append(ln.Slice(), nt)
} else {
// memmove(&s[len(l1)], &l2[0], len(l2)*sizeof(T))
nptr1 := nod(OINDEX, s, nod(OLEN, l1, nil))
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
nptr1.SetBounded(true)
nptr1 = nod(OADDR, nptr1, nil)
nptr2 := nod(OSPTR, l2, nil)
fn := syslook("memmove")
fn = substArgTypes(fn, s.Type.Elem(), s.Type.Elem())
var ln Nodes
ln.Set(l)
nwid := cheapexpr(conv(nod(OLEN, l2, nil), types.Types[TUINTPTR]), &ln)
nwid = nod(OMUL, nwid, nodintconst(s.Type.Elem().Width))
nt := mkcall1(fn, nil, &ln, nptr1, nptr2, nwid)
l = append(ln.Slice(), nt)
}
typecheckslice(l, Etop)
walkstmtlist(l)
init.Append(l...)
return s
}
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
// Rewrite append(src, x, y, z) so that any side effects in
// x, y, z (including runtime panics) are evaluated in
// initialization statements before the append.
// For normal code generation, stop there and leave the
// rest to cgen_append.
//
// For race detector, expand append(src, a [, b]* ) to
//
// init {
// s := src
// const argc = len(args) - 1
// if cap(s) - len(s) < argc {
// s = growslice(s, len(s)+argc)
// }
// n := len(s)
// s = s[:n+argc]
// s[n] = a
// s[n+1] = b
// ...
// }
// s
func walkappend(n *Node, init *Nodes, dst *Node) *Node {
if !samesafeexpr(dst, n.List.First()) {
n.List.SetFirst(safeexpr(n.List.First(), init))
n.List.SetFirst(walkexpr(n.List.First(), init))
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
}
walkexprlistsafe(n.List.Slice()[1:], init)
// walkexprlistsafe will leave OINDEX (s[n]) alone if both s
// and n are name or literal, but those may index the slice we're
// modifying here. Fix explicitly.
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
// Using cheapexpr also makes sure that the evaluation
// of all arguments (and especially any panics) happen
// before we begin to modify the slice in a visible way.
ls := n.List.Slice()[1:]
for i, n := range ls {
ls[i] = cheapexpr(n, init)
}
nsrc := n.List.First()
argc := n.List.Len() - 1
if argc < 1 {
return nsrc
}
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
// General case, with no function calls left as arguments.
// Leave for gen, except that instrumentation requires old form.
if !instrumenting || compiling_runtime {
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
return n
}
var l []*Node
ns := temp(nsrc.Type)
l = append(l, nod(OAS, ns, nsrc)) // s = src
na := nodintconst(int64(argc)) // const argc
nx := nod(OIF, nil, nil) // if cap(s) - len(s) < argc
nx.Left = nod(OLT, nod(OSUB, nod(OCAP, ns, nil), nod(OLEN, ns, nil)), na)
fn := syslook("growslice") // growslice(<type>, old []T, mincap int) (ret []T)
fn = substArgTypes(fn, ns.Type.Elem(), ns.Type.Elem())
nx.Nbody.Set1(nod(OAS, ns,
mkcall1(fn, ns.Type, &nx.Ninit, typename(ns.Type.Elem()), ns,
nod(OADD, nod(OLEN, ns, nil), na))))
l = append(l, nx)
nn := temp(types.Types[TINT])
l = append(l, nod(OAS, nn, nod(OLEN, ns, nil))) // n = len(s)
nx = nod(OSLICE, ns, nil) // ...s[:n+argc]
nx.SetSliceBounds(nil, nod(OADD, nn, na), nil)
nx.Etype = 1
l = append(l, nod(OAS, ns, nx)) // s = s[:n+argc]
ls = n.List.Slice()[1:]
for i, n := range ls {
nx = nod(OINDEX, ns, nn) // s[n] ...
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
nx.SetBounded(true)
l = append(l, nod(OAS, nx, n)) // s[n] = arg
if i+1 < len(ls) {
l = append(l, nod(OAS, nn, nod(OADD, nn, nodintconst(1)))) // n = n + 1
}
}
typecheckslice(l, Etop)
walkstmtlist(l)
init.Append(l...)
return ns
}
// Lower copy(a, b) to a memmove call or a runtime call.
//
// init {
// n := len(a)
// if n > len(b) { n = len(b) }
// memmove(a.ptr, b.ptr, n*sizeof(elem(a)))
// }
// n;
//
// Also works if b is a string.
//
func copyany(n *Node, init *Nodes, runtimecall bool) *Node {
if types.Haspointers(n.Left.Type.Elem()) {
fn := writebarrierfn("typedslicecopy", n.Left.Type, n.Right.Type)
return mkcall1(fn, n.Type, init, typename(n.Left.Type.Elem()), n.Left, n.Right)
}
if runtimecall {
if n.Right.Type.IsString() {
fn := syslook("slicestringcopy")
fn = substArgTypes(fn, n.Left.Type, n.Right.Type)
return mkcall1(fn, n.Type, init, n.Left, n.Right)
}
fn := syslook("slicecopy")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = substArgTypes(fn, n.Left.Type, n.Right.Type)
return mkcall1(fn, n.Type, init, n.Left, n.Right, nodintconst(n.Left.Type.Elem().Width))
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
nl := temp(n.Left.Type)
nr := temp(n.Right.Type)
var l []*Node
l = append(l, nod(OAS, nl, n.Left))
l = append(l, nod(OAS, nr, n.Right))
nfrm := nod(OSPTR, nr, nil)
nto := nod(OSPTR, nl, nil)
nlen := temp(types.Types[TINT])
// n = len(to)
l = append(l, nod(OAS, nlen, nod(OLEN, nl, nil)))
// if n > len(frm) { n = len(frm) }
nif := nod(OIF, nil, nil)
nif.Left = nod(OGT, nlen, nod(OLEN, nr, nil))
nif.Nbody.Append(nod(OAS, nlen, nod(OLEN, nr, nil)))
l = append(l, nif)
// Call memmove.
fn := syslook("memmove")
fn = substArgTypes(fn, nl.Type.Elem(), nl.Type.Elem())
nwid := temp(types.Types[TUINTPTR])
l = append(l, nod(OAS, nwid, conv(nlen, types.Types[TUINTPTR])))
nwid = nod(OMUL, nwid, nodintconst(nl.Type.Elem().Width))
l = append(l, mkcall1(fn, nil, init, nto, nfrm, nwid))
typecheckslice(l, Etop)
walkstmtlist(l)
init.Append(l...)
return nlen
}
func eqfor(t *types.Type, needsize *int) *Node {
// Should only arrive here with large memory or
// a struct/array containing a non-memory field/element.
// Small memory is handled inline, and single non-memory
// is handled during type check (OCMPSTR etc).
switch a, _ := algtype1(t); a {
case AMEM:
n := syslook("memequal")
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = substArgTypes(n, t, t)
*needsize = 1
return n
case ASPECIAL:
sym := typesymprefix(".eq", t)
n := newname(sym)
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
n.SetClass(PFUNC)
ntype := nod(OTFUNC, nil, nil)
ntype.List.Append(anonfield(types.NewPtr(t)))
ntype.List.Append(anonfield(types.NewPtr(t)))
ntype.Rlist.Append(anonfield(types.Types[TBOOL]))
ntype = typecheck(ntype, Etype)
n.Type = ntype.Type
*needsize = 0
return n
}
Fatalf("eqfor %v", t)
return nil
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of walkcompare MUST be assigned back to n, e.g.
// n.Left = walkcompare(n.Left, init)
func walkcompare(n *Node, init *Nodes) *Node {
// Given interface value l and concrete value r, rewrite
// l == r
// into types-equal && data-equal.
// This is efficient, avoids allocations, and avoids runtime calls.
var l, r *Node
if n.Left.Type.IsInterface() && !n.Right.Type.IsInterface() {
l = n.Left
r = n.Right
} else if !n.Left.Type.IsInterface() && n.Right.Type.IsInterface() {
l = n.Right
r = n.Left
}
if l != nil {
// Handle both == and !=.
eq := n.Op
var andor Op
if eq == OEQ {
andor = OANDAND
} else {
andor = OOROR
}
// Check for types equal.
// For empty interface, this is:
// l.tab == type(r)
// For non-empty interface, this is:
// l.tab != nil && l.tab._type == type(r)
var eqtype *Node
tab := nod(OITAB, l, nil)
rtyp := typename(r.Type)
if l.Type.IsEmptyInterface() {
tab.Type = types.NewPtr(types.Types[TUINT8])
tab.SetTypecheck(1)
eqtype = nod(eq, tab, rtyp)
} else {
nonnil := nod(brcom(eq), nodnil(), tab)
match := nod(eq, itabType(tab), rtyp)
eqtype = nod(andor, nonnil, match)
}
// Check for data equal.
eqdata := nod(eq, ifaceData(l, r.Type), r)
// Put it all together.
expr := nod(andor, eqtype, eqdata)
n = finishcompare(n, expr, init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
// Must be comparison of array or struct.
// Otherwise back end handles it.
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
// While we're here, decide whether to
// inline or call an eq alg.
t := n.Left.Type
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
var inline bool
maxcmpsize := int64(4)
unalignedLoad := false
switch thearch.LinkArch.Family {
case sys.AMD64, sys.ARM64, sys.S390X:
// Keep this low enough, to generate less code than function call.
maxcmpsize = 16
unalignedLoad = true
case sys.I386:
maxcmpsize = 8
unalignedLoad = true
}
switch t.Etype {
default:
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
case TARRAY:
// We can compare several elements at once with 2/4/8 byte integer compares
inline = t.NumElem() <= 1 || (issimple[t.Elem().Etype] && (t.NumElem() <= 4 || t.Elem().Width*t.NumElem() <= maxcmpsize))
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
case TSTRUCT:
inline = t.NumFields() <= 4
}
cmpl := n.Left
for cmpl != nil && cmpl.Op == OCONVNOP {
cmpl = cmpl.Left
}
cmpr := n.Right
for cmpr != nil && cmpr.Op == OCONVNOP {
cmpr = cmpr.Left
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
// Chose not to inline. Call equality function directly.
if !inline {
if isvaluelit(cmpl) {
var_ := temp(cmpl.Type)
anylit(cmpl, var_, init)
cmpl = var_
}
if isvaluelit(cmpr) {
var_ := temp(cmpr.Type)
anylit(cmpr, var_, init)
cmpr = var_
}
if !islvalue(cmpl) || !islvalue(cmpr) {
Fatalf("arguments of comparison must be lvalues - %v %v", cmpl, cmpr)
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
// eq algs take pointers
pl := temp(types.NewPtr(t))
al := nod(OAS, pl, nod(OADDR, cmpl, nil))
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
al.Right.Etype = 1 // addr does not escape
al = typecheck(al, Etop)
init.Append(al)
pr := temp(types.NewPtr(t))
ar := nod(OAS, pr, nod(OADDR, cmpr, nil))
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
ar.Right.Etype = 1 // addr does not escape
ar = typecheck(ar, Etop)
init.Append(ar)
var needsize int
call := nod(OCALL, eqfor(t, &needsize), nil)
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
call.List.Append(pl)
call.List.Append(pr)
if needsize != 0 {
call.List.Append(nodintconst(t.Width))
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
}
res := call
if n.Op != OEQ {
res = nod(ONOT, res, nil)
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
}
n = finishcompare(n, res, init)
return n
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
// inline: build boolean expression comparing element by element
andor := OANDAND
if n.Op == ONE {
andor = OOROR
}
var expr *Node
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
compare := func(el, er *Node) {
a := nod(n.Op, el, er)
if expr == nil {
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
expr = a
} else {
expr = nod(andor, expr, a)
}
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
cmpl = safeexpr(cmpl, init)
cmpr = safeexpr(cmpr, init)
if t.IsStruct() {
for _, f := range t.Fields().Slice() {
sym := f.Sym
if sym.IsBlank() {
continue
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
compare(
nodSym(OXDOT, cmpl, sym),
nodSym(OXDOT, cmpr, sym),
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
)
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
} else {
step := int64(1)
remains := t.NumElem() * t.Elem().Width
combine64bit := unalignedLoad && Widthreg == 8 && t.Elem().Width <= 4 && t.Elem().IsInteger()
combine32bit := unalignedLoad && t.Elem().Width <= 2 && t.Elem().IsInteger()
combine16bit := unalignedLoad && t.Elem().Width == 1 && t.Elem().IsInteger()
for i := int64(0); remains > 0; {
var convType *types.Type
switch {
case remains >= 8 && combine64bit:
convType = types.Types[TINT64]
step = 8 / t.Elem().Width
case remains >= 4 && combine32bit:
convType = types.Types[TUINT32]
step = 4 / t.Elem().Width
case remains >= 2 && combine16bit:
convType = types.Types[TUINT16]
step = 2 / t.Elem().Width
default:
step = 1
}
if step == 1 {
compare(
nod(OINDEX, cmpl, nodintconst(int64(i))),
nod(OINDEX, cmpr, nodintconst(int64(i))),
)
i++
remains -= t.Elem().Width
} else {
cmplw := nod(OINDEX, cmpl, nodintconst(int64(i)))
cmplw = conv(cmplw, convType)
cmprw := nod(OINDEX, cmpr, nodintconst(int64(i)))
cmprw = conv(cmprw, convType)
// For code like this: uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 ...
// ssa will generate a single large load.
for offset := int64(1); offset < step; offset++ {
lb := nod(OINDEX, cmpl, nodintconst(int64(i+offset)))
lb = conv(lb, convType)
lb = nod(OLSH, lb, nodintconst(int64(8*t.Elem().Width*offset)))
cmplw = nod(OOR, cmplw, lb)
rb := nod(OINDEX, cmpr, nodintconst(int64(i+offset)))
rb = conv(rb, convType)
rb = nod(OLSH, rb, nodintconst(int64(8*t.Elem().Width*offset)))
cmprw = nod(OOR, cmprw, rb)
}
compare(cmplw, cmprw)
i += step
remains -= step * t.Elem().Width
}
}
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
if expr == nil {
expr = nodbool(n.Op == OEQ)
}
cmd/compile: when inlining ==, don’t take the address of the values This CL reworks walkcompare for clarity and concision. It also makes one significant functional change. (The functional change is hard to separate cleanly from the cleanup, so I just did them together.) When inlining and unrolling an equality comparison for a small struct or array, compare the elements like: a[0] == b[0] && a[1] == b[1] rather than pa := &a pb := &b pa[0] == pb[0] && pa[1] == pb[1] The result is the same, but taking the address and working through the indirect forces the backends to generate less efficient code. This is only an improvement with the SSA backend. However, every port but s390x now has a working SSA backend, and switching to the SSA backend by default everywhere is a priority for Go 1.8. It thus seems reasonable to start to prioritize SSA performance over the old backend. Updates #15303 Sample code: type T struct { a, b int8 } func g(a T) bool { return a == T{1, 2} } SSA before: "".g t=1 size=80 args=0x10 locals=0x8 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16 0x0000 00000 (badeq.go:7) SUBQ $8, SP 0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX 0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP) 0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX 0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP) 0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP) 0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP) 0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP) 0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX 0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX 0x002f 00047 (badeq.go:8) CMPB AL, CL 0x0031 00049 (badeq.go:8) JNE 70 0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX 0x0038 00056 (badeq.go:8) CMPB AL, $2 0x003a 00058 (badeq.go:8) SETEQ AL 0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP) 0x0041 00065 (badeq.go:8) ADDQ $8, SP 0x0045 00069 (badeq.go:8) RET 0x0046 00070 (badeq.go:8) MOVB $0, AL 0x0048 00072 (badeq.go:8) JMP 61 SSA after: "".g t=1 size=32 args=0x10 locals=0x0 0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) NOP 0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB) 0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX 0x0005 00005 (badeq.go:8) CMPB AL, $1 0x0007 00007 (badeq.go:8) JNE 25 0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX 0x000e 00014 (badeq.go:8) CMPB CL, $2 0x0011 00017 (badeq.go:8) SETEQ AL 0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP) 0x0018 00024 (badeq.go:8) RET 0x0019 00025 (badeq.go:8) MOVB $0, AL 0x001b 00027 (badeq.go:8) JMP 20 Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86 Reviewed-on: https://go-review.googlesource.com/22277 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
n = finishcompare(n, expr, init)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
// The result of finishcompare MUST be assigned back to n, e.g.
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// n.Left = finishcompare(n.Left, x, r, init)
func finishcompare(n, r *Node, init *Nodes) *Node {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// Use nn here to avoid passing r to typecheck.
nn := r
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
nn = typecheck(nn, Erv)
nn = walkexpr(nn, init)
r = nn
if r.Type != n.Type {
r = nod(OCONVNOP, r, nil)
r.Type = n.Type
r.SetTypecheck(1)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
nn = r
}
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return nn
}
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
// isIntOrdering reports whether n is a <, ≤, >, or ≥ ordering between integers.
func (n *Node) isIntOrdering() bool {
switch n.Op {
case OLE, OLT, OGE, OGT:
default:
return false
}
return n.Left.Type.IsInteger() && n.Right.Type.IsInteger()
}
// walkinrange optimizes integer-in-range checks, such as 4 <= x && x < 10.
// n must be an OANDAND or OOROR node.
// The result of walkinrange MUST be assigned back to n, e.g.
// n.Left = walkinrange(n.Left)
func walkinrange(n *Node, init *Nodes) *Node {
// We are looking for something equivalent to a opl b OP b opr c, where:
// * a, b, and c have integer type
// * b is side-effect-free
// * opl and opr are each < or ≤
// * OP is &&
l := n.Left
r := n.Right
if !l.isIntOrdering() || !r.isIntOrdering() {
return n
}
// Find b, if it exists, and rename appropriately.
// Input is: l.Left l.Op l.Right ANDAND/OROR r.Left r.Op r.Right
// Output is: a opl b(==x) ANDAND/OROR b(==x) opr c
a, opl, b := l.Left, l.Op, l.Right
x, opr, c := r.Left, r.Op, r.Right
for i := 0; ; i++ {
if samesafeexpr(b, x) {
break
}
if i == 3 {
// Tried all permutations and couldn't find an appropriate b == x.
return n
}
if i&1 == 0 {
a, opl, b = b, brrev(opl), a
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
} else {
x, opr, c = c, brrev(opr), x
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
}
}
// If n.Op is ||, apply de Morgan.
// Negate the internal ops now; we'll negate the top level op at the end.
// Henceforth assume &&.
negateResult := n.Op == OOROR
if negateResult {
opl = brcom(opl)
opr = brcom(opr)
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
}
cmpdir := func(o Op) int {
switch o {
case OLE, OLT:
return -1
case OGE, OGT:
return +1
}
Fatalf("walkinrange cmpdir %v", o)
return 0
}
if cmpdir(opl) != cmpdir(opr) {
// Not a range check; something like b < a && b < c.
return n
}
switch opl {
case OGE, OGT:
// We have something like a > b && b ≥ c.
// Switch and reverse ops and rename constants,
// to make it look like a ≤ b && b < c.
a, c = c, a
opl, opr = brrev(opr), brrev(opl)
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
}
// We must ensure that c-a is non-negative.
// For now, require a and c to be constants.
// In the future, we could also support a == 0 and c == len/cap(...).
// Unfortunately, by this point, most len/cap expressions have been
// stored into temporary variables.
if !Isconst(a, CTINT) || !Isconst(c, CTINT) {
return n
}
if opl == OLT {
// We have a < b && ...
// We need a ≤ b && ... to safely use unsigned comparison tricks.
// If a is not the maximum constant for b's type,
// we can increment a and switch to ≤.
if a.Int64() >= maxintval[b.Type.Etype].Int64() {
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
return n
}
a = nodintconst(a.Int64() + 1)
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
opl = OLE
}
bound := c.Int64() - a.Int64()
if bound < 0 {
// Bad news. Something like 5 <= x && x < 3.
// Rare in practice, and we still need to generate side-effects,
// so just leave it alone.
return n
}
// We have a ≤ b && b < c (or a ≤ b && b ≤ c).
// This is equivalent to (a-a) ≤ (b-a) && (b-a) < (c-a),
// which is equivalent to 0 ≤ (b-a) && (b-a) < (c-a),
// which is equivalent to uint(b-a) < uint(c-a).
ut := b.Type.ToUnsigned()
lhs := conv(nod(OSUB, b, a), ut)
rhs := nodintconst(bound)
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
if negateResult {
// Negate top level.
opr = brcom(opr)
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
}
cmp := nod(opr, lhs, rhs)
cmp.Pos = n.Pos
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
cmp = addinit(cmp, l.Ninit.Slice())
cmp = addinit(cmp, r.Ninit.Slice())
// Typecheck the AST rooted at cmp...
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
cmp = typecheck(cmp, Erv)
// ...but then reset cmp's type to match n's type.
cmp.Type = n.Type
cmd/compile: optimize integer "in range" expressions Use unsigned comparisons to reduce from two comparisons to one for integer "in range" checks, such as a <= b && b < c. We already do this for bounds checks. Extend it to user code. This is much easier to do in the front end than SSA. A back end optimization would be more powerful, but this is a good start. This reduces the power of some of SSA prove inferences (#16653), but those regressions appear to be rare and not worth holding this CL for. Fixes #15844. Fixes #16697. strconv benchmarks: name old time/op new time/op delta Atof64Decimal-8 41.4ns ± 3% 38.9ns ± 2% -5.89% (p=0.000 n=24+25) Atof64Float-8 48.5ns ± 0% 46.8ns ± 3% -3.64% (p=0.000 n=20+23) Atof64FloatExp-8 97.7ns ± 4% 93.5ns ± 1% -4.25% (p=0.000 n=25+20) Atof64Big-8 187ns ± 8% 162ns ± 2% -13.54% (p=0.000 n=24+22) Atof64RandomBits-8 250ns ± 6% 233ns ± 5% -6.76% (p=0.000 n=25+25) Atof64RandomFloats-8 160ns ± 0% 152ns ± 0% -5.00% (p=0.000 n=21+22) Atof32Decimal-8 41.1ns ± 1% 38.7ns ± 2% -5.86% (p=0.000 n=24+24) Atof32Float-8 46.1ns ± 1% 43.5ns ± 3% -5.63% (p=0.000 n=21+24) Atof32FloatExp-8 101ns ± 4% 100ns ± 2% -1.59% (p=0.000 n=24+23) Atof32Random-8 136ns ± 3% 133ns ± 3% -2.83% (p=0.000 n=22+22) Atoi-8 33.8ns ± 3% 30.6ns ± 3% -9.51% (p=0.000 n=24+25) AtoiNeg-8 31.6ns ± 3% 29.1ns ± 2% -8.05% (p=0.000 n=23+24) Atoi64-8 48.6ns ± 1% 43.8ns ± 1% -9.81% (p=0.000 n=20+23) Atoi64Neg-8 47.1ns ± 4% 42.0ns ± 2% -10.83% (p=0.000 n=25+25) FormatFloatDecimal-8 177ns ± 9% 178ns ± 6% ~ (p=0.460 n=25+25) FormatFloat-8 282ns ± 6% 282ns ± 3% ~ (p=0.954 n=25+22) FormatFloatExp-8 259ns ± 7% 255ns ± 6% ~ (p=0.089 n=25+24) FormatFloatNegExp-8 253ns ± 6% 254ns ± 6% ~ (p=0.941 n=25+24) FormatFloatBig-8 340ns ± 6% 341ns ± 8% ~ (p=0.600 n=22+25) AppendFloatDecimal-8 79.4ns ± 0% 80.6ns ± 6% ~ (p=0.861 n=20+25) AppendFloat-8 175ns ± 3% 174ns ± 0% ~ (p=0.722 n=25+20) AppendFloatExp-8 142ns ± 4% 142ns ± 2% ~ (p=0.948 n=25+24) AppendFloatNegExp-8 137ns ± 2% 138ns ± 2% +0.70% (p=0.001 n=24+25) AppendFloatBig-8 218ns ± 3% 218ns ± 4% ~ (p=0.596 n=25+25) AppendFloatBinaryExp-8 80.0ns ± 4% 78.0ns ± 1% -2.43% (p=0.000 n=24+21) AppendFloat32Integer-8 82.3ns ± 3% 79.3ns ± 4% -3.69% (p=0.000 n=24+25) AppendFloat32ExactFraction-8 143ns ± 2% 143ns ± 0% ~ (p=0.177 n=23+19) AppendFloat32Point-8 175ns ± 3% 175ns ± 3% ~ (p=0.062 n=24+25) AppendFloat32Exp-8 139ns ± 2% 137ns ± 4% -1.05% (p=0.001 n=24+24) AppendFloat32NegExp-8 134ns ± 0% 137ns ± 4% +2.06% (p=0.000 n=22+25) AppendFloat64Fixed1-8 97.8ns ± 0% 98.6ns ± 3% ~ (p=0.711 n=20+25) AppendFloat64Fixed2-8 110ns ± 3% 110ns ± 5% -0.45% (p=0.037 n=24+24) AppendFloat64Fixed3-8 102ns ± 3% 102ns ± 3% ~ (p=0.684 n=24+24) AppendFloat64Fixed4-8 112ns ± 3% 110ns ± 0% -1.43% (p=0.000 n=25+18) FormatInt-8 3.18µs ± 4% 3.10µs ± 6% -2.54% (p=0.001 n=24+25) AppendInt-8 1.81µs ± 5% 1.80µs ± 5% ~ (p=0.648 n=25+25) FormatUint-8 812ns ± 6% 816ns ± 6% ~ (p=0.777 n=25+25) AppendUint-8 536ns ± 4% 538ns ± 3% ~ (p=0.798 n=20+22) Quote-8 605ns ± 6% 602ns ± 9% ~ (p=0.573 n=25+25) QuoteRune-8 99.5ns ± 8% 100.2ns ± 7% ~ (p=0.432 n=25+25) AppendQuote-8 361ns ± 3% 363ns ± 4% ~ (p=0.085 n=25+25) AppendQuoteRune-8 23.3ns ± 3% 22.4ns ± 2% -3.79% (p=0.000 n=25+24) UnquoteEasy-8 146ns ± 4% 145ns ± 5% ~ (p=0.112 n=24+24) UnquoteHard-8 804ns ± 6% 771ns ± 6% -4.10% (p=0.000 n=25+24) Change-Id: Ibd384e46e90f1cfa40503c8c6352a54c65b72980 Reviewed-on: https://go-review.googlesource.com/27652 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-26 18:08:24 -07:00
cmp = walkexpr(cmp, init)
return cmp
}
// return 1 if integer n must be in range [0, max), 0 otherwise
func bounded(n *Node, max int64) bool {
if n.Type == nil || !n.Type.IsInteger() {
return false
}
sign := n.Type.IsSigned()
bits := int32(8 * n.Type.Width)
if smallintconst(n) {
v := n.Int64()
return 0 <= v && v < max
}
switch n.Op {
case OAND:
v := int64(-1)
if smallintconst(n.Left) {
v = n.Left.Int64()
} else if smallintconst(n.Right) {
v = n.Right.Int64()
}
if 0 <= v && v < max {
return true
}
case OMOD:
if !sign && smallintconst(n.Right) {
v := n.Right.Int64()
if 0 <= v && v <= max {
return true
}
}
case ODIV:
if !sign && smallintconst(n.Right) {
v := n.Right.Int64()
for bits > 0 && v >= 2 {
bits--
v >>= 1
}
}
case ORSH:
if !sign && smallintconst(n.Right) {
v := n.Right.Int64()
if v > int64(bits) {
return true
}
bits -= int32(v)
}
}
if !sign && bits <= 62 && 1<<uint(bits) <= max {
return true
}
return false
}
// usemethod checks interface method calls for uses of reflect.Type.Method.
func usemethod(n *Node) {
t := n.Left.Type
// Looking for either of:
// Method(int) reflect.Method
// MethodByName(string) (reflect.Method, bool)
//
// TODO(crawshaw): improve precision of match by working out
// how to check the method name.
if n := t.NumParams(); n != 1 {
return
}
if n := t.NumResults(); n != 1 && n != 2 {
return
}
p0 := t.Params().Field(0)
res0 := t.Results().Field(0)
var res1 *types.Field
if t.NumResults() == 2 {
res1 = t.Results().Field(1)
}
if res1 == nil {
if p0.Type.Etype != TINT {
return
}
} else {
if !p0.Type.IsString() {
return
}
if !res1.Type.IsBoolean() {
return
}
}
// Note: Don't rely on res0.Type.String() since its formatting depends on multiple factors
// (including global variables such as numImports - was issue #19028).
if s := res0.Type.Sym; s != nil && s.Name == "Method" && s.Pkg != nil && s.Pkg.Path == "reflect" {
Curfn.Func.SetReflectMethod(true)
}
}
func usefield(n *Node) {
if objabi.Fieldtrack_enabled == 0 {
return
}
switch n.Op {
default:
Fatalf("usefield %v", n.Op)
case ODOT, ODOTPTR:
break
}
if n.Sym == nil {
// No field name. This DOTPTR was built by the compiler for access
// to runtime data structures. Ignore.
return
}
t := n.Left.Type
if t.IsPtr() {
t = t.Elem()
}
field := dotField[typeSymKey{t.Orig, n.Sym}]
if field == nil {
Fatalf("usefield %v %v without paramfld", n.Left.Type, n.Sym)
}
if !strings.Contains(field.Note, "go:\"track\"") {
return
}
outer := n.Left.Type
if outer.IsPtr() {
outer = outer.Elem()
}
if outer.Sym == nil {
yyerror("tracked field must be in named struct type")
}
if !exportname(field.Sym.Name) {
yyerror("tracked field must be exported (upper case)")
}
sym := tracksym(outer, field)
if Curfn.Func.FieldTrack == nil {
Curfn.Func.FieldTrack = make(map[*types.Sym]struct{})
}
Curfn.Func.FieldTrack[sym] = struct{}{}
}
func candiscardlist(l Nodes) bool {
for _, n := range l.Slice() {
if !candiscard(n) {
return false
}
}
return true
}
func candiscard(n *Node) bool {
if n == nil {
return true
}
switch n.Op {
default:
return false
// Discardable as long as the subpieces are.
case ONAME,
ONONAME,
OTYPE,
OPACK,
OLITERAL,
OADD,
OSUB,
OOR,
OXOR,
OADDSTR,
OADDR,
OANDAND,
OARRAYBYTESTR,
OARRAYRUNESTR,
OSTRARRAYBYTE,
OSTRARRAYRUNE,
OCAP,
OCMPIFACE,
OCMPSTR,
OCOMPLIT,
OMAPLIT,
OSTRUCTLIT,
OARRAYLIT,
OSLICELIT,
OPTRLIT,
OCONV,
OCONVIFACE,
OCONVNOP,
ODOT,
OEQ,
ONE,
OLT,
OLE,
OGT,
OGE,
OKEY,
cmd/compile: add OSTRUCTKEY for keyed struct literals Previously, we used OKEY nodes to represent keyed struct literal elements. The field names were represented by an ONAME node, but this is clumsy because it's the only remaining case where ONAME was used to represent a bare identifier and not a variable. This CL introduces a new OSTRUCTKEY node op for use in struct literals. These ops instead store the field name in the node's own Sym field. This is similar in spirit to golang.org/cl/20890. Significant reduction in allocations for struct literal heavy code like package unicode: name old time/op new time/op delta Template 345ms ± 6% 341ms ± 6% ~ (p=0.141 n=29+28) Unicode 200ms ± 9% 184ms ± 7% -7.77% (p=0.000 n=29+30) GoTypes 1.04s ± 3% 1.05s ± 3% ~ (p=0.096 n=30+30) Compiler 4.47s ± 9% 4.49s ± 6% ~ (p=0.890 n=29+29) name old user-ns/op new user-ns/op delta Template 523M ±13% 516M ±17% ~ (p=0.400 n=29+30) Unicode 334M ±27% 314M ±30% ~ (p=0.093 n=30+30) GoTypes 1.53G ±10% 1.52G ±10% ~ (p=0.572 n=30+30) Compiler 6.28G ± 7% 6.34G ±11% ~ (p=0.300 n=30+30) name old alloc/op new alloc/op delta Template 44.5MB ± 0% 44.4MB ± 0% -0.35% (p=0.000 n=27+30) Unicode 39.2MB ± 0% 34.5MB ± 0% -11.79% (p=0.000 n=26+30) GoTypes 125MB ± 0% 125MB ± 0% -0.12% (p=0.000 n=29+30) Compiler 515MB ± 0% 515MB ± 0% -0.10% (p=0.000 n=29+30) name old allocs/op new allocs/op delta Template 426k ± 0% 424k ± 0% -0.39% (p=0.000 n=29+30) Unicode 374k ± 0% 323k ± 0% -13.67% (p=0.000 n=29+30) GoTypes 1.21M ± 0% 1.21M ± 0% -0.14% (p=0.000 n=29+29) Compiler 4.40M ± 0% 4.39M ± 0% -0.13% (p=0.000 n=29+30) Passes toolstash/buildall. Change-Id: Iba4ee765dd1748f67e52fcade1cd75c9f6e13fa9 Reviewed-on: https://go-review.googlesource.com/30974 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-10-12 15:48:18 -07:00
OSTRUCTKEY,
OLEN,
OMUL,
OLSH,
ORSH,
OAND,
OANDNOT,
ONEW,
ONOT,
OCOM,
OPLUS,
OMINUS,
OOROR,
OPAREN,
ORUNESTR,
OREAL,
OIMAG,
OCOMPLEX:
break
// Discardable as long as we know it's not division by zero.
case ODIV, OMOD:
if Isconst(n.Right, CTINT) && n.Right.Val().U.(*Mpint).CmpInt64(0) != 0 {
break
}
if Isconst(n.Right, CTFLT) && n.Right.Val().U.(*Mpflt).CmpFloat64(0) != 0 {
break
}
return false
// Discardable as long as we know it won't fail because of a bad size.
case OMAKECHAN, OMAKEMAP:
if Isconst(n.Left, CTINT) && n.Left.Val().U.(*Mpint).CmpInt64(0) == 0 {
break
}
return false
// Difficult to tell what sizes are okay.
case OMAKESLICE:
return false
}
if !candiscard(n.Left) || !candiscard(n.Right) || !candiscardlist(n.Ninit) || !candiscardlist(n.Nbody) || !candiscardlist(n.List) || !candiscardlist(n.Rlist) {
return false
}
return true
}
// rewrite
// print(x, y, z)
// into
// func(a1, a2, a3) {
// print(a1, a2, a3)
// }(x, y, z)
// and same for println.
var walkprintfunc_prgen int
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
// The result of walkprintfunc MUST be assigned back to n, e.g.
// n.Left = walkprintfunc(n.Left, init)
func walkprintfunc(n *Node, init *Nodes) *Node {
if n.Ninit.Len() != 0 {
walkstmtlist(n.Ninit.Slice())
init.AppendNodes(&n.Ninit)
}
t := nod(OTFUNC, nil, nil)
var printargs []*Node
for i, n1 := range n.List.Slice() {
buf := fmt.Sprintf("a%d", i)
a := namedfield(buf, n1.Type)
t.List.Append(a)
printargs = append(printargs, a.Left)
}
oldfn := Curfn
Curfn = nil
walkprintfunc_prgen++
sym := lookupN("print·%d", walkprintfunc_prgen)
fn := dclfunc(sym, t)
a := nod(n.Op, nil, nil)
a.List.Set(printargs)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
a = walkstmt(a)
fn.Nbody.Set1(a)
funcbody()
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
fn = typecheck(fn, Etop)
typecheckslice(fn.Nbody.Slice(), Etop)
xtop = append(xtop, fn)
Curfn = oldfn
a = nod(OCALL, nil, nil)
a.Left = fn.Func.Nname
a.List.Set(n.List.Slice())
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
a = walkexpr(a, init)
return a
}
// substArgTypes substitutes the given list of types for
// successive occurrences of the "any" placeholder in the
// type syntax expression n.Type.
// The result of substArgTypes MUST be assigned back to old, e.g.
// n.Left = substArgTypes(n.Left, t1, t2)
func substArgTypes(old *Node, types_ ...*types.Type) *Node {
n := *old // make shallow copy
for _, t := range types_ {
dowidth(t)
}
n.Type = types.SubstAny(n.Type, &types_)
if len(types_) > 0 {
Fatalf("substArgTypes: too many argument types")
}
return &n
}