2016-03-01 22:57:46 +00:00
|
|
|
// Copyright 2012 The Go Authors. All rights reserved.
|
2015-02-13 14:40:36 -05:00
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import (
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
"cmd/compile/internal/types"
|
2016-12-07 16:02:42 -08:00
|
|
|
"cmd/internal/src"
|
2015-02-13 14:40:36 -05:00
|
|
|
"fmt"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Rewrite tree to use separate statements to enforce
|
2016-03-01 23:21:55 +00:00
|
|
|
// order of evaluation. Makes walk easier, because it
|
2015-02-13 14:40:36 -05:00
|
|
|
// can (after this runs) reorder at will within an expression.
|
|
|
|
|
//
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
// Rewrite m[k] op= r into m[k] = m[k] op r if op is / or %.
|
2015-02-13 14:40:36 -05:00
|
|
|
//
|
|
|
|
|
// Introduce temporaries as needed by runtime routines.
|
|
|
|
|
// For example, the map runtime routines take the map key
|
|
|
|
|
// by reference, so make sure all map keys are addressable
|
|
|
|
|
// by copying them to temporaries as needed.
|
|
|
|
|
// The same is true for channel operations.
|
|
|
|
|
//
|
|
|
|
|
// Arrange that map index expressions only appear in direct
|
|
|
|
|
// assignments x = m[k] or m[k] = x, never in larger expressions.
|
|
|
|
|
//
|
|
|
|
|
// Arrange that receive expressions only appear in direct assignments
|
|
|
|
|
// x = <-c or as standalone statements <-c, never in larger expressions.
|
|
|
|
|
|
|
|
|
|
// TODO(rsc): The temporary introduction during multiple assignments
|
|
|
|
|
// should be moved into this file, so that the temporaries can be cleaned
|
|
|
|
|
// and so that conversions implicit in the OAS2FUNC and OAS2RECV
|
|
|
|
|
// nodes can be made explicit and then have their temporaries cleaned.
|
|
|
|
|
|
|
|
|
|
// TODO(rsc): Goto and multilevel break/continue can jump over
|
|
|
|
|
// inserted VARKILL annotations. Work out a way to handle these.
|
|
|
|
|
// The current implementation is safe, in that it will execute correctly.
|
|
|
|
|
// But it won't reuse temporaries as aggressively as it might, and
|
|
|
|
|
// it can result in unnecessary zeroing of those variables in the function
|
|
|
|
|
// prologue.
|
|
|
|
|
|
|
|
|
|
// Order holds state during the ordering process.
|
|
|
|
|
type Order struct {
|
2016-02-27 14:31:33 -08:00
|
|
|
out []*Node // list of generated statements
|
|
|
|
|
temp []*Node // stack of temporary variables
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// Order rewrites fn.Nbody to apply the ordering constraints
|
2015-02-13 14:40:36 -05:00
|
|
|
// described in the comment at the top of the file.
|
|
|
|
|
func order(fn *Node) {
|
|
|
|
|
if Debug['W'] > 1 {
|
2015-05-27 10:42:55 -04:00
|
|
|
s := fmt.Sprintf("\nbefore order %v", fn.Func.Nname.Sym)
|
2016-03-04 13:16:48 -08:00
|
|
|
dumplist(s, fn.Nbody)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
orderBlock(&fn.Nbody)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// newTemp allocates a new temporary with the given type,
|
2015-02-13 14:40:36 -05:00
|
|
|
// pushes it onto the temp stack, and returns it.
|
2018-03-04 17:17:55 -08:00
|
|
|
// If clear is true, newTemp emits code to zero the temporary.
|
|
|
|
|
func (o *Order) newTemp(t *types.Type, clear bool) *Node {
|
|
|
|
|
v := temp(t)
|
2015-02-17 22:13:49 -05:00
|
|
|
if clear {
|
2018-03-04 17:17:55 -08:00
|
|
|
a := nod(OAS, v, nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
a = typecheck(a, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, a)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.temp = append(o.temp, v)
|
|
|
|
|
return v
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// copyExpr behaves like ordertemp but also emits
|
2015-02-13 14:40:36 -05:00
|
|
|
// code to initialize the temporary to the value n.
|
|
|
|
|
//
|
|
|
|
|
// The clear argument is provided for use when the evaluation
|
|
|
|
|
// of tmp = n turns into a function call that is passed a pointer
|
|
|
|
|
// to the temporary as the output space. If the call blocks before
|
|
|
|
|
// tmp has been written, the garbage collector will still treat the
|
|
|
|
|
// temporary as live, so we must zero it before entering that call.
|
|
|
|
|
// Today, this only happens for channel receive operations.
|
|
|
|
|
// (The other candidate would be map access, but map access
|
|
|
|
|
// returns a pointer to the result data instead of taking a pointer
|
|
|
|
|
// to be filled in.)
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) copyExpr(n *Node, t *types.Type, clear bool) *Node {
|
|
|
|
|
v := o.newTemp(t, clear)
|
|
|
|
|
a := nod(OAS, v, n)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
a = typecheck(a, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, a)
|
|
|
|
|
return v
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// cheapExpr returns a cheap version of n.
|
2015-02-13 14:40:36 -05:00
|
|
|
// The definition of cheap is that n is a variable or constant.
|
2018-03-04 17:17:55 -08:00
|
|
|
// If not, cheapExpr allocates a new tmp, emits tmp = n,
|
2015-02-13 14:40:36 -05:00
|
|
|
// and then returns tmp.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) cheapExpr(n *Node) *Node {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
if n == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Op {
|
2015-04-01 09:38:44 -07:00
|
|
|
case ONAME, OLITERAL:
|
2015-02-13 14:40:36 -05:00
|
|
|
return n
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
case OLEN, OCAP:
|
2018-03-04 17:17:55 -08:00
|
|
|
l := o.cheapExpr(n.Left)
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
if l == n.Left {
|
|
|
|
|
return n
|
|
|
|
|
}
|
2017-10-23 19:57:07 +01:00
|
|
|
a := n.copy()
|
|
|
|
|
a.Orig = a
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
a.Left = l
|
2017-10-23 19:57:07 +01:00
|
|
|
return typecheck(a, Erv)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
return o.copyExpr(n, n.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// safeExpr returns a safe version of n.
|
2015-02-13 14:40:36 -05:00
|
|
|
// The definition of safe is that n can appear multiple times
|
|
|
|
|
// without violating the semantics of the original program,
|
|
|
|
|
// and that assigning to the safe version has the same effect
|
|
|
|
|
// as assigning to the original n.
|
|
|
|
|
//
|
|
|
|
|
// The intended use is to apply to x when rewriting x += y into x = x + y.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) safeExpr(n *Node) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Op {
|
2015-04-01 09:38:44 -07:00
|
|
|
case ONAME, OLITERAL:
|
2015-02-13 14:40:36 -05:00
|
|
|
return n
|
|
|
|
|
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
case ODOT, OLEN, OCAP:
|
2018-03-04 17:17:55 -08:00
|
|
|
l := o.safeExpr(n.Left)
|
2015-02-13 14:40:36 -05:00
|
|
|
if l == n.Left {
|
|
|
|
|
return n
|
|
|
|
|
}
|
2017-10-23 19:57:07 +01:00
|
|
|
a := n.copy()
|
|
|
|
|
a.Orig = a
|
2015-02-13 14:40:36 -05:00
|
|
|
a.Left = l
|
2017-10-23 19:57:07 +01:00
|
|
|
return typecheck(a, Erv)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case ODOTPTR, OIND:
|
2018-03-04 17:17:55 -08:00
|
|
|
l := o.cheapExpr(n.Left)
|
2015-02-13 14:40:36 -05:00
|
|
|
if l == n.Left {
|
|
|
|
|
return n
|
|
|
|
|
}
|
2017-10-23 19:57:07 +01:00
|
|
|
a := n.copy()
|
|
|
|
|
a.Orig = a
|
2015-02-13 14:40:36 -05:00
|
|
|
a.Left = l
|
2017-10-23 19:57:07 +01:00
|
|
|
return typecheck(a, Erv)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OINDEX, OINDEXMAP:
|
2015-02-23 16:07:24 -05:00
|
|
|
var l *Node
|
2016-03-30 14:45:47 -07:00
|
|
|
if n.Left.Type.IsArray() {
|
2018-03-04 17:17:55 -08:00
|
|
|
l = o.safeExpr(n.Left)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2018-03-04 17:17:55 -08:00
|
|
|
l = o.cheapExpr(n.Left)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
r := o.cheapExpr(n.Right)
|
2015-02-13 14:40:36 -05:00
|
|
|
if l == n.Left && r == n.Right {
|
|
|
|
|
return n
|
|
|
|
|
}
|
2017-10-23 19:57:07 +01:00
|
|
|
a := n.copy()
|
|
|
|
|
a.Orig = a
|
2015-02-13 14:40:36 -05:00
|
|
|
a.Left = l
|
|
|
|
|
a.Right = r
|
2017-10-23 19:57:07 +01:00
|
|
|
return typecheck(a, Erv)
|
2018-03-04 17:17:55 -08:00
|
|
|
|
2016-03-23 16:01:15 +11:00
|
|
|
default:
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("ordersafeexpr %v", n.Op)
|
2016-03-23 16:01:15 +11:00
|
|
|
return nil // not reached
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Isaddrokay reports whether it is okay to pass n's address to runtime routines.
|
|
|
|
|
// Taking the address of a variable makes the liveness and optimization analyses
|
|
|
|
|
// lose track of where the variable's lifetime ends. To avoid hurting the analyses
|
|
|
|
|
// of ordinary stack variables, those are not 'isaddrokay'. Temporaries are okay,
|
|
|
|
|
// because we emit explicit VARKILL instructions marking the end of those
|
|
|
|
|
// temporaries' lifetimes.
|
2015-02-17 22:13:49 -05:00
|
|
|
func isaddrokay(n *Node) bool {
|
2017-04-25 18:14:12 -07:00
|
|
|
return islvalue(n) && (n.Op != ONAME || n.Class() == PEXTERN || n.IsAutoTmp())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// addrTemp ensures that n is okay to pass by address to runtime routines.
|
|
|
|
|
// If the original argument n is not okay, addrTemp creates a tmp, emits
|
2016-10-11 08:36:38 -07:00
|
|
|
// tmp = n, and then returns tmp.
|
2018-03-04 17:17:55 -08:00
|
|
|
// The result of addrTemp MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = o.addrTemp(n.Left)
|
|
|
|
|
func (o *Order) addrTemp(n *Node) *Node {
|
2017-10-10 17:36:03 +01:00
|
|
|
if consttype(n) > 0 {
|
cmd/compile: convert constants to interfaces without allocating
The order pass is responsible for ensuring that
values passed to runtime functions, including
convT2E/convT2I, are addressable.
Prior to this CL, this was always accomplished
by creating a temp, which frequently escaped to
the heap, causing allocations, perhaps most
notably in code like:
fmt.Println(1, 2, 3) // allocates three times
None of the runtime routines modify the contents
of the pointers they receive, so in the case of
constants, instead of creating a temp value,
we can create a static value.
(Marking the static value as read-only provides
protection against accidental attempts by the runtime
to modify the constant data.)
This improves code generation for code like:
panic("abc")
c <- 2 // c is a chan int
which can now simply refer to "abc" and 2,
rather than going by way of a temporary.
It also allows us to optimize convT2E/convT2I,
by recognizing static readonly values
and directly constructing the interface.
This CL adds ~0.5% to binary size, despite
decreasing the size of many functions,
because it also adds many static symbols.
This binary size regression could be recovered in
future (but currently unplanned) work.
There is a lot of content-duplication in these
symbols; this statement generates six new symbols,
three containing an int 1 and three containing
a pointer to the string "a":
fmt.Println(1, 1, 1, "a", "a", "a")
These symbols could be made content-addressable.
Furthermore, these symbols are small, so the
alignment and naming overhead is large.
As with the go.strings section, these symbols
could be hidden and have their alignment reduced.
The changes to test/live.go make it impossible
(at least with current optimization techniques)
to place the values being passed to the runtime
in static symbols, preserving autotmp creation.
Fixes #18704
Benchmarks from fmt and go-kit's logging package:
github.com/go-kit/kit/log
name old time/op new time/op delta
JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10)
JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10)
Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9)
OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10)
TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9)
TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10)
LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9)
LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10)
NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10)
NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10)
ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10)
ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10)
JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10)
Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10)
OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10)
TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10)
TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10)
ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10)
JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10)
Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10)
ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
fmt
name old time/op new time/op delta
SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7)
SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10)
SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10)
SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10)
SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10)
SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9)
SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8)
SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10)
SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10)
SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10)
SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10)
SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9)
SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10)
SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10)
SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10)
SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10)
ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10)
FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10)
FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10)
FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10)
ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8)
ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10)
ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9)
name old alloc/op new alloc/op delta
SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfEmpty-8 0.00B 0.00B ~ (all equal)
SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10)
SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10)
SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10)
SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10)
SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10)
SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal)
SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal)
SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal)
ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10)
FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal)
ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10)
ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal)
name old allocs/op new allocs/op delta
SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfEmpty-8 0.00 0.00 ~ (all equal)
SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal)
SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal)
ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00 0.00 ~ (all equal)
ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal)
ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5
Reviewed-on: https://go-review.googlesource.com/35554
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
|
|
|
// TODO: expand this to all static composite literal nodes?
|
|
|
|
|
n = defaultlit(n, nil)
|
|
|
|
|
dowidth(n.Type)
|
|
|
|
|
vstat := staticname(n.Type)
|
2017-02-27 19:56:38 +02:00
|
|
|
vstat.Name.SetReadonly(true)
|
cmd/compile: convert constants to interfaces without allocating
The order pass is responsible for ensuring that
values passed to runtime functions, including
convT2E/convT2I, are addressable.
Prior to this CL, this was always accomplished
by creating a temp, which frequently escaped to
the heap, causing allocations, perhaps most
notably in code like:
fmt.Println(1, 2, 3) // allocates three times
None of the runtime routines modify the contents
of the pointers they receive, so in the case of
constants, instead of creating a temp value,
we can create a static value.
(Marking the static value as read-only provides
protection against accidental attempts by the runtime
to modify the constant data.)
This improves code generation for code like:
panic("abc")
c <- 2 // c is a chan int
which can now simply refer to "abc" and 2,
rather than going by way of a temporary.
It also allows us to optimize convT2E/convT2I,
by recognizing static readonly values
and directly constructing the interface.
This CL adds ~0.5% to binary size, despite
decreasing the size of many functions,
because it also adds many static symbols.
This binary size regression could be recovered in
future (but currently unplanned) work.
There is a lot of content-duplication in these
symbols; this statement generates six new symbols,
three containing an int 1 and three containing
a pointer to the string "a":
fmt.Println(1, 1, 1, "a", "a", "a")
These symbols could be made content-addressable.
Furthermore, these symbols are small, so the
alignment and naming overhead is large.
As with the go.strings section, these symbols
could be hidden and have their alignment reduced.
The changes to test/live.go make it impossible
(at least with current optimization techniques)
to place the values being passed to the runtime
in static symbols, preserving autotmp creation.
Fixes #18704
Benchmarks from fmt and go-kit's logging package:
github.com/go-kit/kit/log
name old time/op new time/op delta
JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10)
JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10)
Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9)
OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10)
TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9)
TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10)
LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9)
LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10)
NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10)
NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10)
ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10)
ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10)
JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10)
Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10)
OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10)
TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10)
TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10)
ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10)
JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10)
Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10)
ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
fmt
name old time/op new time/op delta
SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7)
SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10)
SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10)
SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10)
SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10)
SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9)
SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8)
SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10)
SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10)
SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10)
SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10)
SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9)
SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10)
SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10)
SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10)
SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10)
ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10)
FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10)
FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10)
FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10)
ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8)
ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10)
ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9)
name old alloc/op new alloc/op delta
SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfEmpty-8 0.00B 0.00B ~ (all equal)
SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10)
SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10)
SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10)
SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10)
SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10)
SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal)
SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal)
SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal)
ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10)
FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal)
ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10)
ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal)
name old allocs/op new allocs/op delta
SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfEmpty-8 0.00 0.00 ~ (all equal)
SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal)
SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal)
ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00 0.00 ~ (all equal)
ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal)
ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5
Reviewed-on: https://go-review.googlesource.com/35554
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
|
|
|
var out []*Node
|
|
|
|
|
staticassign(vstat, n, &out)
|
|
|
|
|
if out != nil {
|
|
|
|
|
Fatalf("staticassign of const generated code: %+v", n)
|
|
|
|
|
}
|
|
|
|
|
vstat = typecheck(vstat, Erv)
|
|
|
|
|
return vstat
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
if isaddrokay(n) {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
return o.copyExpr(n, n.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// mapKeyTemp prepares n to be a key in a map runtime call and returns n.
|
2017-03-14 11:11:28 -07:00
|
|
|
// It should only be used for map runtime calls which have *_fast* versions.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) mapKeyTemp(t *types.Type, n *Node) *Node {
|
2017-02-09 14:00:23 -08:00
|
|
|
// Most map calls need to take the address of the key.
|
2017-03-14 11:11:28 -07:00
|
|
|
// Exception: map*_fast* calls. See golang.org/issue/19015.
|
|
|
|
|
if mapfast(t) == mapslow {
|
2018-03-04 17:17:55 -08:00
|
|
|
return o.addrTemp(n)
|
2017-02-09 14:00:23 -08:00
|
|
|
}
|
2017-03-14 11:11:28 -07:00
|
|
|
return n
|
2017-02-09 14:00:23 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-20 18:49:22 -08:00
|
|
|
type ordermarker int
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Marktemp returns the top of the temporary variable stack.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) markTemp() ordermarker {
|
|
|
|
|
return ordermarker(len(o.temp))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Poptemp pops temporaries off the stack until reaching the mark,
|
|
|
|
|
// which must have been returned by marktemp.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) popTemp(mark ordermarker) {
|
|
|
|
|
o.temp = o.temp[:mark]
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-01-21 10:25:07 +01:00
|
|
|
// Cleantempnopop emits VARKILL and if needed VARLIVE instructions
|
|
|
|
|
// to *out for each temporary above the mark on the temporary stack.
|
|
|
|
|
// It does not pop the temporaries from the stack.
|
2018-03-22 12:49:48 -07:00
|
|
|
func (o *Order) cleanTempNoPop(mark ordermarker) []*Node {
|
|
|
|
|
var out []*Node
|
2018-03-04 17:17:55 -08:00
|
|
|
for i := len(o.temp) - 1; i >= int(mark); i-- {
|
|
|
|
|
n := o.temp[i]
|
2017-02-27 19:56:38 +02:00
|
|
|
if n.Name.Keepalive() {
|
|
|
|
|
n.Name.SetKeepalive(false)
|
|
|
|
|
n.SetAddrtaken(true) // ensure SSA keeps the n variable
|
2018-01-21 10:25:07 +01:00
|
|
|
live := nod(OVARLIVE, n, nil)
|
|
|
|
|
live = typecheck(live, Etop)
|
2018-03-22 12:49:48 -07:00
|
|
|
out = append(out, live)
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
}
|
2017-10-11 10:14:31 +01:00
|
|
|
kill := nod(OVARKILL, n, nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
kill = typecheck(kill, Etop)
|
2018-03-22 12:49:48 -07:00
|
|
|
out = append(out, kill)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-22 12:49:48 -07:00
|
|
|
return out
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// cleanTemp emits VARKILL instructions for each temporary above the
|
2015-02-13 14:40:36 -05:00
|
|
|
// mark on the temporary stack and removes them from the stack.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) cleanTemp(top ordermarker) {
|
2018-03-22 12:49:48 -07:00
|
|
|
o.out = append(o.out, o.cleanTempNoPop(top)...)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.popTemp(top)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// stmtList orders each of the statements in the list.
|
|
|
|
|
func (o *Order) stmtList(l Nodes) {
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n := range l.Slice() {
|
2018-03-04 17:17:55 -08:00
|
|
|
o.stmt(n)
|
2016-02-27 14:31:33 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// orderBlock orders the block of statements in n into a new slice,
|
2016-02-27 14:31:33 -08:00
|
|
|
// and then replaces the old slice in n with the new slice.
|
2018-03-04 17:17:55 -08:00
|
|
|
func orderBlock(n *Nodes) {
|
2016-02-27 14:31:33 -08:00
|
|
|
var order Order
|
2018-03-04 17:17:55 -08:00
|
|
|
mark := order.markTemp()
|
|
|
|
|
order.stmtList(*n)
|
|
|
|
|
order.cleanTemp(mark)
|
2016-02-27 14:31:33 -08:00
|
|
|
n.Set(order.out)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// exprInPlace orders the side effects in *np and
|
2015-02-13 14:40:36 -05:00
|
|
|
// leaves them as the init list of the final *np.
|
2018-03-04 17:17:55 -08:00
|
|
|
// The result of exprInPlace MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = o.exprInPlace(n.Left)
|
|
|
|
|
func (o *Order) exprInPlace(n *Node) *Node {
|
2015-03-02 14:22:05 -05:00
|
|
|
var order Order
|
2018-03-04 17:17:55 -08:00
|
|
|
n = order.expr(n, nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = addinit(n, order.out)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// insert new temporaries from order
|
|
|
|
|
// at head of outer list.
|
2018-03-04 17:17:55 -08:00
|
|
|
o.temp = append(o.temp, order.temp...)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// orderStmtInPlace orders the side effects of the single statement *np
|
2015-02-13 14:40:36 -05:00
|
|
|
// and replaces it with the resulting statement list.
|
2018-03-04 17:17:55 -08:00
|
|
|
// The result of orderStmtInPlace MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = orderStmtInPlace(n.Left)
|
|
|
|
|
func orderStmtInPlace(n *Node) *Node {
|
2015-03-02 14:22:05 -05:00
|
|
|
var order Order
|
2018-03-04 17:17:55 -08:00
|
|
|
mark := order.markTemp()
|
|
|
|
|
order.stmt(n)
|
|
|
|
|
order.cleanTemp(mark)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return liststmt(order.out)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// init moves n's init list to o.out.
|
|
|
|
|
func (o *Order) init(n *Node) {
|
2017-03-28 07:12:57 -07:00
|
|
|
if n.mayBeShared() {
|
|
|
|
|
// For concurrency safety, don't mutate potentially shared nodes.
|
|
|
|
|
// First, ensure that no work is required here.
|
|
|
|
|
if n.Ninit.Len() > 0 {
|
|
|
|
|
Fatalf("orderinit shared node with ninit")
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.stmtList(n.Ninit)
|
2016-03-08 15:10:26 -08:00
|
|
|
n.Ninit.Set(nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ismulticall reports whether the list l is f() for a multi-value function.
|
|
|
|
|
// Such an f() could appear as the lone argument to a multi-arg function.
|
2016-03-08 10:26:20 -08:00
|
|
|
func ismulticall(l Nodes) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
// one arg only
|
2016-03-08 15:10:26 -08:00
|
|
|
if l.Len() != 1 {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-08 15:10:26 -08:00
|
|
|
n := l.First()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// must be call
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-04-01 09:38:44 -07:00
|
|
|
case OCALLFUNC, OCALLMETH, OCALLINTER:
|
2018-04-03 13:17:28 +01:00
|
|
|
// call must return multiple values
|
|
|
|
|
return n.Left.Type.NumResults() > 1
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// copyRet emits t1, t2, ... = n, where n is a function call,
|
2015-02-13 14:40:36 -05:00
|
|
|
// and then returns the list t1, t2, ....
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) copyRet(n *Node) []*Node {
|
2016-04-05 16:44:07 -07:00
|
|
|
if !n.Type.IsFuncArgStruct() {
|
2017-05-02 09:16:22 -07:00
|
|
|
Fatalf("copyret %v %d", n.Type, n.Left.Type.NumResults())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
var l1, l2 []*Node
|
|
|
|
|
for _, f := range n.Type.Fields().Slice() {
|
|
|
|
|
tmp := temp(f.Type)
|
2016-03-08 10:26:20 -08:00
|
|
|
l1 = append(l1, tmp)
|
|
|
|
|
l2 = append(l2, tmp)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
as := nod(OAS2, nil, nil)
|
2016-03-08 10:26:20 -08:00
|
|
|
as.List.Set(l1)
|
2016-03-10 10:13:42 -08:00
|
|
|
as.Rlist.Set1(n)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
as = typecheck(as, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.stmt(as)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-09 20:29:21 -08:00
|
|
|
return l2
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// callArgs orders the list of call arguments *l.
|
|
|
|
|
func (o *Order) callArgs(l *Nodes) {
|
2016-03-09 20:29:21 -08:00
|
|
|
if ismulticall(*l) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// return f() where f() is multiple values.
|
2018-03-04 17:17:55 -08:00
|
|
|
l.Set(o.copyRet(l.First()))
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2018-03-04 17:17:55 -08:00
|
|
|
o.exprList(*l)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// call orders the call expression n.
|
|
|
|
|
// n.Op is OCALLMETH/OCALLFUNC/OCALLINTER or a builtin like OCOPY.
|
|
|
|
|
func (o *Order) call(n *Node) {
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil) // ODDDARG temp
|
|
|
|
|
o.callArgs(&n.List)
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
if n.Op != OCALLFUNC {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
keepAlive := func(i int) {
|
|
|
|
|
// If the argument is really a pointer being converted to uintptr,
|
|
|
|
|
// arrange for the pointer to be kept alive until the call returns,
|
|
|
|
|
// by copying it into a temp and marking that temp
|
|
|
|
|
// still alive when we pop the temp stack.
|
|
|
|
|
xp := n.List.Addr(i)
|
|
|
|
|
for (*xp).Op == OCONVNOP && !(*xp).Type.IsUnsafePtr() {
|
|
|
|
|
xp = &(*xp).Left
|
|
|
|
|
}
|
|
|
|
|
x := *xp
|
|
|
|
|
if x.Type.IsUnsafePtr() {
|
|
|
|
|
x = o.copyExpr(x, x.Type, false)
|
|
|
|
|
x.Name.SetKeepalive(true)
|
|
|
|
|
*xp = x
|
2017-04-05 19:38:21 -07:00
|
|
|
}
|
2018-04-03 13:17:28 +01:00
|
|
|
}
|
2017-04-05 19:38:21 -07:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
for i, t := range n.Left.Type.Params().FieldSlice() {
|
|
|
|
|
// Check for "unsafe-uintptr" tag provided by escape analysis.
|
|
|
|
|
if t.Isddd() && !n.Isddd() {
|
|
|
|
|
if t.Note == uintptrEscapesTag {
|
|
|
|
|
for ; i < n.List.Len(); i++ {
|
2017-04-05 19:38:21 -07:00
|
|
|
keepAlive(i)
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
}
|
|
|
|
|
}
|
2018-04-03 13:17:28 +01:00
|
|
|
} else {
|
|
|
|
|
if t.Note == unsafeUintptrTag || t.Note == uintptrEscapesTag {
|
|
|
|
|
keepAlive(i)
|
|
|
|
|
}
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// mapAssign appends n to o.out, introducing temporaries
|
2016-10-11 08:36:38 -07:00
|
|
|
// to make sure that all map assignments have the form m[k] = x.
|
2018-03-04 17:17:55 -08:00
|
|
|
// (Note: expr has already been called on n, so we know k is addressable.)
|
2015-02-13 14:40:36 -05:00
|
|
|
//
|
2017-12-04 14:47:32 -08:00
|
|
|
// If n is the multiple assignment form ..., m[k], ... = ..., x, ..., the rewrite is
|
2015-02-13 14:40:36 -05:00
|
|
|
// t1 = m
|
|
|
|
|
// t2 = k
|
2017-12-04 14:47:32 -08:00
|
|
|
// ...., t3, ... = ..., x, ...
|
2015-02-13 14:40:36 -05:00
|
|
|
// t1[t2] = t3
|
|
|
|
|
//
|
|
|
|
|
// The temporaries t1, t2 are needed in case the ... being assigned
|
|
|
|
|
// contain m or k. They are usually unnecessary, but in the unnecessary
|
|
|
|
|
// cases they are also typically registerizable, so not much harm done.
|
|
|
|
|
// And this only applies to the multiple-assignment form.
|
2015-10-22 09:51:12 +09:00
|
|
|
// We could do a more precise analysis if needed, like in walk.go.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) mapAssign(n *Node) {
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("ordermapassign %v", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
case OAS, OASOP:
|
2017-12-04 14:47:32 -08:00
|
|
|
if n.Left.Op == OINDEXMAP {
|
|
|
|
|
// Make sure we evaluate the RHS before starting the map insert.
|
|
|
|
|
// We need to make sure the RHS won't panic. See issue 22881.
|
cmd/compile: avoid mapaccess at m[k]=append(m[k]..
Currently rvalue m[k] is transformed during walk into:
tmp1 := *mapaccess(m, k)
tmp2 := append(tmp1, ...)
*mapassign(m, k) = tmp2
However, this is suboptimal, as we could instead produce just:
tmp := mapassign(m, k)
*tmp := append(*tmp, ...)
Optimization is possible only if during Order it may tell that m[k] is
exactly the same at left and right part of assignment. It doesn't work:
1) m[f(k)] = append(m[f(k)], ...)
2) sink, m[k] = sink, append(m[k]...)
3) m[k] = append(..., m[k],...)
Benchmark:
name old time/op new time/op delta
MapAppendAssign/Int32/256-8 33.5ns ± 3% 22.4ns ±10% -33.24% (p=0.000 n=16+18)
MapAppendAssign/Int32/65536-8 68.2ns ± 6% 48.5ns ±29% -28.90% (p=0.000 n=20+20)
MapAppendAssign/Int64/256-8 34.3ns ± 4% 23.3ns ± 5% -32.23% (p=0.000 n=17+18)
MapAppendAssign/Int64/65536-8 65.9ns ± 7% 61.2ns ±19% -7.06% (p=0.002 n=18+20)
MapAppendAssign/Str/256-8 116ns ±12% 79ns ±16% -31.70% (p=0.000 n=20+19)
MapAppendAssign/Str/65536-8 134ns ±15% 111ns ±45% -16.95% (p=0.000 n=19+20)
name old alloc/op new alloc/op delta
MapAppendAssign/Int32/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=19+18)
MapAppendAssign/Int32/65536-8 27.0B ± 0% 20.7B ±30% -23.33% (p=0.000 n=20+20)
MapAppendAssign/Int64/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=20+17)
MapAppendAssign/Int64/65536-8 27.0B ± 0% 27.0B ± 0% ~ (all equal)
MapAppendAssign/Str/256-8 94.0B ± 0% 78.0B ± 0% -17.02% (p=0.000 n=20+16)
MapAppendAssign/Str/65536-8 54.0B ± 0% 54.0B ± 0% ~ (all equal)
Fixes #24364
Updates #5147
Change-Id: Id257d052b75b9a445b4885dc571bf06ce6f6b409
Reviewed-on: https://go-review.googlesource.com/100838
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-03-14 01:16:43 -07:00
|
|
|
if n.Right.Op == OAPPEND {
|
|
|
|
|
s := n.Right.List.Slice()[1:]
|
|
|
|
|
for i, n := range s {
|
|
|
|
|
s[i] = o.cheapExpr(n)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
n.Right = o.cheapExpr(n.Right)
|
|
|
|
|
}
|
2017-12-04 14:47:32 -08:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OAS2, OAS2DOTTYPE, OAS2MAPR, OAS2FUNC:
|
2016-02-27 14:31:33 -08:00
|
|
|
var post []*Node
|
2017-10-11 10:14:31 +01:00
|
|
|
for i, m := range n.List.Slice() {
|
|
|
|
|
switch {
|
|
|
|
|
case m.Op == OINDEXMAP:
|
2016-10-28 13:33:57 -04:00
|
|
|
if !m.Left.IsAutoTmp() {
|
2018-03-04 17:17:55 -08:00
|
|
|
m.Left = o.copyExpr(m.Left, m.Left.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-28 13:33:57 -04:00
|
|
|
if !m.Right.IsAutoTmp() {
|
2018-03-04 17:17:55 -08:00
|
|
|
m.Right = o.copyExpr(m.Right, m.Right.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-10-11 10:14:31 +01:00
|
|
|
fallthrough
|
2018-04-08 13:39:10 +01:00
|
|
|
case instrumenting && n.Op == OAS2FUNC && !m.isBlank():
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.newTemp(m.Type, false)
|
2017-10-11 10:14:31 +01:00
|
|
|
n.List.SetIndex(i, t)
|
|
|
|
|
a := nod(OAS, m, t)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
a = typecheck(a, Etop)
|
2016-02-27 14:31:33 -08:00
|
|
|
post = append(post, a)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.out = append(o.out, post...)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// stmt orders the statement n, appending to o.out.
|
2015-02-13 14:40:36 -05:00
|
|
|
// Temporaries created during the statement are cleaned
|
|
|
|
|
// up using VARKILL instructions as possible.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) stmt(n *Node) {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-02 17:34:42 -08:00
|
|
|
lno := setlineno(n)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.init(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("orderstmt %v", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
case OVARKILL, OVARLIVE:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x
This slows down more things than I expected, but it also speeds things up,
and it reduces stack frame sizes and the load on the optimizer, so it's still
likely a net win.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795)
BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000)
BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000)
BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000)
BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000)
BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013)
BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039)
BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104)
BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035)
BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131)
BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572)
BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000)
BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000)
BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924)
BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242)
BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000)
BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000)
BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000)
BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000)
BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000)
BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791)
BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110)
BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433)
BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303)
BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001)
BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000)
Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f
Reviewed-on: https://go-review.googlesource.com/9721
Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
|
|
|
case OAS:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, n.Left)
|
|
|
|
|
o.mapAssign(n)
|
|
|
|
|
o.cleanTemp(t)
|
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x
This slows down more things than I expected, but it also speeds things up,
and it reduces stack frame sizes and the load on the optimizer, so it's still
likely a net win.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795)
BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000)
BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000)
BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000)
BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000)
BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013)
BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039)
BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104)
BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035)
BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131)
BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572)
BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000)
BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000)
BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924)
BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242)
BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000)
BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000)
BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000)
BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000)
BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000)
BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791)
BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110)
BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433)
BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303)
BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001)
BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000)
Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f
Reviewed-on: https://go-review.googlesource.com/9721
Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
|
|
|
|
|
|
|
|
case OAS2,
|
2015-02-13 14:40:36 -05:00
|
|
|
OCLOSE,
|
|
|
|
|
OCOPY,
|
|
|
|
|
OPRINT,
|
|
|
|
|
OPRINTN,
|
|
|
|
|
ORECOVER,
|
|
|
|
|
ORECV:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
|
|
|
|
o.exprList(n.List)
|
|
|
|
|
o.exprList(n.Rlist)
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Op {
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
case OAS2:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.mapAssign(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
default:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OASOP:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
2017-08-29 15:40:32 -07:00
|
|
|
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
if instrumenting || n.Left.Op == OINDEXMAP && (n.SubOp() == ODIV || n.SubOp() == OMOD) {
|
|
|
|
|
// Rewrite m[k] op= r into m[k] = m[k] op r so
|
|
|
|
|
// that we can ensure that if op panics
|
|
|
|
|
// because r is zero, the panic happens before
|
|
|
|
|
// the map assignment.
|
|
|
|
|
|
|
|
|
|
n.Left = o.safeExpr(n.Left)
|
|
|
|
|
|
|
|
|
|
l := treecopy(n.Left, src.NoXPos)
|
|
|
|
|
if l.Op == OINDEXMAP {
|
|
|
|
|
l.SetIndexMapLValue(false)
|
|
|
|
|
}
|
|
|
|
|
l = o.copyExpr(l, n.Left.Type, false)
|
|
|
|
|
n.Right = nod(n.SubOp(), l, n.Right)
|
|
|
|
|
n.Right = typecheck(n.Right, Erv)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
|
|
|
|
|
|
|
|
|
n.Op = OAS
|
|
|
|
|
n.ResetAux()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.mapAssign(n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-02-09 14:00:23 -08:00
|
|
|
// Special: make sure key is addressable if needed,
|
2015-02-13 14:40:36 -05:00
|
|
|
// and make sure OINDEXMAP is not copied out.
|
|
|
|
|
case OAS2MAPR:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
o.exprList(n.List)
|
2016-03-08 15:10:26 -08:00
|
|
|
r := n.Rlist.First()
|
2018-03-04 17:17:55 -08:00
|
|
|
r.Left = o.expr(r.Left, nil)
|
|
|
|
|
r.Right = o.expr(r.Right, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// See case OINDEXMAP below.
|
|
|
|
|
if r.Right.Op == OARRAYBYTESTR {
|
|
|
|
|
r.Right.Op = OARRAYBYTESTRTMP
|
|
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
r.Right = o.mapKeyTemp(r.Left.Type, r.Right)
|
|
|
|
|
o.okAs2(n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// Special: avoid copy of func call n.Rlist.First().
|
2015-02-13 14:40:36 -05:00
|
|
|
case OAS2FUNC:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
o.exprList(n.List)
|
|
|
|
|
o.call(n.Rlist.First())
|
|
|
|
|
o.as2(n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: use temporary variables to hold result,
|
2015-02-13 14:40:36 -05:00
|
|
|
// so that assertI2Tetc can take address of temporary.
|
|
|
|
|
// No temporary for blank assignment.
|
|
|
|
|
case OAS2DOTTYPE:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
o.exprList(n.List)
|
|
|
|
|
n.Rlist.First().Left = o.expr(n.Rlist.First().Left, nil) // i in i.(T)
|
|
|
|
|
o.okAs2(n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: use temporary variables to hold result,
|
2015-02-13 14:40:36 -05:00
|
|
|
// so that chanrecv can take address of temporary.
|
|
|
|
|
case OAS2RECV:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
o.exprList(n.List)
|
|
|
|
|
n.Rlist.First().Left = o.expr(n.Rlist.First().Left, nil) // arg to recv
|
2016-03-08 15:10:26 -08:00
|
|
|
ch := n.Rlist.First().Left.Type
|
2018-03-04 17:17:55 -08:00
|
|
|
tmp1 := o.newTemp(ch.Elem(), types.Haspointers(ch.Elem()))
|
|
|
|
|
tmp2 := o.newTemp(types.Types[TBOOL], false)
|
|
|
|
|
o.out = append(o.out, n)
|
2016-09-16 11:00:54 +10:00
|
|
|
r := nod(OAS, n.List.First(), tmp1)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = typecheck(r, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.mapAssign(r)
|
2016-08-26 10:50:12 -07:00
|
|
|
r = okas(n.List.Second(), tmp2)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = typecheck(r, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.mapAssign(r)
|
2017-02-19 15:57:58 +01:00
|
|
|
n.List.Set2(tmp1, tmp2)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: does not save n onto out.
|
2015-04-01 09:38:44 -07:00
|
|
|
case OBLOCK, OEMPTY:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.stmtList(n.List)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: n->left is not an expression; save as is.
|
2015-02-13 14:40:36 -05:00
|
|
|
case OBREAK,
|
|
|
|
|
OCONTINUE,
|
|
|
|
|
ODCL,
|
|
|
|
|
ODCLCONST,
|
|
|
|
|
ODCLTYPE,
|
|
|
|
|
OFALL,
|
|
|
|
|
OGOTO,
|
|
|
|
|
OLABEL,
|
|
|
|
|
ORETJMP:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: handle call arguments.
|
2015-04-01 09:38:44 -07:00
|
|
|
case OCALLFUNC, OCALLINTER, OCALLMETH:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
o.call(n)
|
|
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: order arguments to inner call but not call itself.
|
2015-04-01 09:38:44 -07:00
|
|
|
case ODEFER, OPROC:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
o.call(n.Left)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ODELETE:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.List.SetFirst(o.expr(n.List.First(), nil))
|
|
|
|
|
n.List.SetSecond(o.expr(n.List.Second(), nil))
|
|
|
|
|
n.List.SetSecond(o.mapKeyTemp(n.List.First().Type, n.List.Second()))
|
|
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Clean temporaries from condition evaluation at
|
2015-02-13 14:40:36 -05:00
|
|
|
// beginning of loop body and after for statement.
|
|
|
|
|
case OFOR:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.exprInPlace(n.Left)
|
2018-03-22 12:49:48 -07:00
|
|
|
n.Nbody.Prepend(o.cleanTempNoPop(t)...)
|
2018-03-04 17:17:55 -08:00
|
|
|
orderBlock(&n.Nbody)
|
|
|
|
|
n.Right = orderStmtInPlace(n.Right)
|
|
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Clean temporaries from condition at
|
2015-02-13 14:40:36 -05:00
|
|
|
// beginning of both branches.
|
|
|
|
|
case OIF:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.exprInPlace(n.Left)
|
2018-03-22 12:49:48 -07:00
|
|
|
n.Nbody.Prepend(o.cleanTempNoPop(t)...)
|
|
|
|
|
n.Rlist.Prepend(o.cleanTempNoPop(t)...)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.popTemp(t)
|
|
|
|
|
orderBlock(&n.Nbody)
|
|
|
|
|
orderBlock(&n.Rlist)
|
|
|
|
|
o.out = append(o.out, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: argument will be converted to interface using convT2E
|
2015-02-13 14:40:36 -05:00
|
|
|
// so make sure it is an addressable temporary.
|
|
|
|
|
case OPANIC:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
2016-03-30 14:45:47 -07:00
|
|
|
if !n.Left.Type.IsInterface() {
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.addrTemp(n.Left)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ORANGE:
|
2016-03-25 21:11:33 -07:00
|
|
|
// n.Right is the expression being ranged over.
|
|
|
|
|
// order it, and then make a copy if we need one.
|
|
|
|
|
// We almost always do, to ensure that we don't
|
|
|
|
|
// see any value changes made during the loop.
|
|
|
|
|
// Usually the copy is cheap (e.g., array pointer,
|
|
|
|
|
// chan, slice, string are all tiny).
|
|
|
|
|
// The exception is ranging over an array value
|
|
|
|
|
// (not a slice, not a pointer to array),
|
|
|
|
|
// which must make a copy to avoid seeing updates made during
|
|
|
|
|
// the range body. Ranging over an array value is uncommon though.
|
|
|
|
|
|
|
|
|
|
// Mark []byte(str) range expression to reuse string backing storage.
|
|
|
|
|
// It is safe because the storage cannot be mutated.
|
|
|
|
|
if n.Right.Op == OSTRARRAYBYTE {
|
|
|
|
|
n.Right.Op = OSTRARRAYBYTETMP
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
2018-04-27 21:58:59 +02:00
|
|
|
|
|
|
|
|
orderBody := true
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Type.Etype {
|
|
|
|
|
default:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("orderstmt range %v", n.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-04-18 14:02:08 -07:00
|
|
|
case TARRAY, TSLICE:
|
2018-04-08 13:39:10 +01:00
|
|
|
if n.List.Len() < 2 || n.List.Second().isBlank() {
|
2015-02-13 14:40:36 -05:00
|
|
|
// for i := range x will only use x once, to compute len(x).
|
|
|
|
|
// No need to copy it.
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case TCHAN, TSTRING:
|
2016-03-25 21:11:33 -07:00
|
|
|
// chan, string, slice, array ranges use value multiple times.
|
|
|
|
|
// make copy.
|
2015-02-23 16:07:24 -05:00
|
|
|
r := n.Right
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
if r.Type.IsString() && r.Type != types.Types[TSTRING] {
|
2016-09-16 11:00:54 +10:00
|
|
|
r = nod(OCONV, r, nil)
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
r.Type = types.Types[TSTRING]
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = typecheck(r, Erv)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Right = o.copyExpr(r, r.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case TMAP:
|
2018-04-27 21:58:59 +02:00
|
|
|
if isMapClear(n) {
|
|
|
|
|
// Preserve the body of the map clear pattern so it can
|
|
|
|
|
// be detected during walk. The loop body will not be used
|
|
|
|
|
// when optimizing away the range loop to a runtime call.
|
|
|
|
|
orderBody = false
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-25 21:11:33 -07:00
|
|
|
// copy the map value in case it is a map literal.
|
|
|
|
|
// TODO(rsc): Make tmp = literal expressions reuse tmp.
|
|
|
|
|
// For maps tmp is just one word so it hardly matters.
|
2015-02-23 16:07:24 -05:00
|
|
|
r := n.Right
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Right = o.copyExpr(r, r.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-09-02 14:31:13 +02:00
|
|
|
// prealloc[n] is the temp for the iterator.
|
|
|
|
|
// hiter contains pointers and needs to be zeroed.
|
2018-03-04 17:17:55 -08:00
|
|
|
prealloc[n] = o.newTemp(hiter(n.Type), true)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.exprListInPlace(n.List)
|
2018-04-27 21:58:59 +02:00
|
|
|
if orderBody {
|
|
|
|
|
orderBlock(&n.Nbody)
|
|
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ORETURN:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.callArgs(&n.List)
|
|
|
|
|
o.out = append(o.out, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// Special: clean case temporaries in each block entry.
|
2015-02-13 14:40:36 -05:00
|
|
|
// Select must enter one of its blocks, so there is no
|
|
|
|
|
// need for a cleaning at the end.
|
|
|
|
|
// Doubly special: evaluation order for select is stricter
|
|
|
|
|
// than ordinary expressions. Even something like p.c
|
|
|
|
|
// has to be hoisted into a temporary, so that it cannot be
|
|
|
|
|
// reordered after the channel evaluation for a different
|
|
|
|
|
// case (if p were nil, then the timing of the fault would
|
|
|
|
|
// give this away).
|
|
|
|
|
case OSELECT:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n2 := range n.List.Slice() {
|
|
|
|
|
if n2.Op != OXCASE {
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("order select case %v", n2.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-10-11 10:14:31 +01:00
|
|
|
r := n2.Left
|
2016-03-08 15:10:26 -08:00
|
|
|
setlineno(n2)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Append any new body prologue to ninit.
|
|
|
|
|
// The next loop will insert ninit into nbody.
|
2016-03-08 15:10:26 -08:00
|
|
|
if n2.Ninit.Len() != 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("order select ninit")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-04-03 13:17:28 +01:00
|
|
|
if r == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
switch r.Op {
|
|
|
|
|
default:
|
|
|
|
|
Dump("select case", r)
|
|
|
|
|
Fatalf("unknown op in select %v", r.Op)
|
|
|
|
|
|
|
|
|
|
// If this is case x := <-ch or case x, y := <-ch, the case has
|
|
|
|
|
// the ODCL nodes to declare x and y. We want to delay that
|
|
|
|
|
// declaration (and possible allocation) until inside the case body.
|
|
|
|
|
// Delete the ODCL nodes here and recreate them inside the body below.
|
|
|
|
|
case OSELRECV, OSELRECV2:
|
|
|
|
|
if r.Colas() {
|
|
|
|
|
i := 0
|
|
|
|
|
if r.Ninit.Len() != 0 && r.Ninit.First().Op == ODCL && r.Ninit.First().Left == r.Left {
|
|
|
|
|
i++
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-04-03 13:17:28 +01:00
|
|
|
if i < r.Ninit.Len() && r.Ninit.Index(i).Op == ODCL && r.List.Len() != 0 && r.Ninit.Index(i).Left == r.List.First() {
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
if i >= r.Ninit.Len() {
|
|
|
|
|
r.Ninit.Set(nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-04-03 13:17:28 +01:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
if r.Ninit.Len() != 0 {
|
|
|
|
|
dumplist("ninit", r.Ninit)
|
|
|
|
|
Fatalf("ninit on select recv")
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
// case x = <-c
|
|
|
|
|
// case x, ok = <-c
|
|
|
|
|
// r->left is x, r->ntest is ok, r->right is ORECV, r->right->left is c.
|
|
|
|
|
// r->left == N means 'case <-c'.
|
|
|
|
|
// c is always evaluated; x and ok are only evaluated when assigned.
|
|
|
|
|
r.Right.Left = o.expr(r.Right.Left, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
if r.Right.Left.Op != ONAME {
|
|
|
|
|
r.Right.Left = o.copyExpr(r.Right.Left, r.Right.Left.Type, false)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Introduce temporary for receive and move actual copy into case body.
|
|
|
|
|
// avoids problems with target being addressed, as usual.
|
|
|
|
|
// NOTE: If we wanted to be clever, we could arrange for just one
|
|
|
|
|
// temporary per distinct type, sharing the temp among all receives
|
|
|
|
|
// with that temp. Similarly one ok bool could be shared among all
|
|
|
|
|
// the x,ok receives. Not worth doing until there's a clear need.
|
2018-04-08 13:39:10 +01:00
|
|
|
if r.Left != nil && r.Left.isBlank() {
|
2018-04-03 13:17:28 +01:00
|
|
|
r.Left = nil
|
|
|
|
|
}
|
|
|
|
|
if r.Left != nil {
|
|
|
|
|
// use channel element type for temporary to avoid conversions,
|
|
|
|
|
// such as in case interfacevalue = <-intchan.
|
|
|
|
|
// the conversion happens in the OAS instead.
|
|
|
|
|
tmp1 := r.Left
|
|
|
|
|
|
|
|
|
|
if r.Colas() {
|
|
|
|
|
tmp2 := nod(ODCL, tmp1, nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
tmp2 = typecheck(tmp2, Etop)
|
2016-03-08 15:10:26 -08:00
|
|
|
n2.Ninit.Append(tmp2)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
r.Left = o.newTemp(r.Right.Left.Type.Elem(), types.Haspointers(r.Right.Left.Type.Elem()))
|
|
|
|
|
tmp2 := nod(OAS, tmp1, r.Left)
|
|
|
|
|
tmp2 = typecheck(tmp2, Etop)
|
|
|
|
|
n2.Ninit.Append(tmp2)
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-08 13:39:10 +01:00
|
|
|
if r.List.Len() != 0 && r.List.First().isBlank() {
|
2018-04-03 13:17:28 +01:00
|
|
|
r.List.Set(nil)
|
|
|
|
|
}
|
|
|
|
|
if r.List.Len() != 0 {
|
|
|
|
|
tmp1 := r.List.First()
|
|
|
|
|
if r.Colas() {
|
|
|
|
|
tmp2 := nod(ODCL, tmp1, nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
tmp2 = typecheck(tmp2, Etop)
|
2016-03-08 15:10:26 -08:00
|
|
|
n2.Ninit.Append(tmp2)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
r.List.Set1(o.newTemp(types.Types[TBOOL], false))
|
|
|
|
|
tmp2 := okas(tmp1, r.List.First())
|
|
|
|
|
tmp2 = typecheck(tmp2, Etop)
|
|
|
|
|
n2.Ninit.Append(tmp2)
|
|
|
|
|
}
|
|
|
|
|
orderBlock(&n2.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
case OSEND:
|
|
|
|
|
if r.Ninit.Len() != 0 {
|
|
|
|
|
dumplist("ninit", r.Ninit)
|
|
|
|
|
Fatalf("ninit on select send")
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-04-03 13:17:28 +01:00
|
|
|
// case c <- x
|
|
|
|
|
// r->left is c, r->right is x, both are always evaluated.
|
|
|
|
|
r.Left = o.expr(r.Left, nil)
|
|
|
|
|
|
|
|
|
|
if !r.Left.IsAutoTmp() {
|
|
|
|
|
r.Left = o.copyExpr(r.Left, r.Left.Type, false)
|
|
|
|
|
}
|
|
|
|
|
r.Right = o.expr(r.Right, nil)
|
|
|
|
|
if !r.Right.IsAutoTmp() {
|
|
|
|
|
r.Right = o.copyExpr(r.Right, r.Right.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Now that we have accumulated all the temporaries, clean them.
|
|
|
|
|
// Also insert any ninit queued during the previous loop.
|
|
|
|
|
// (The temporary cleaning must follow that ninit work.)
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n3 := range n.List.Slice() {
|
2018-04-03 13:17:28 +01:00
|
|
|
orderBlock(&n3.Nbody)
|
2018-03-22 12:49:48 -07:00
|
|
|
n3.Nbody.Prepend(o.cleanTempNoPop(t)...)
|
|
|
|
|
|
|
|
|
|
// TODO(mdempsky): Is this actually necessary?
|
|
|
|
|
// walkselect appears to walk Ninit.
|
|
|
|
|
n3.Nbody.Prepend(n3.Ninit.Slice()...)
|
2016-03-08 15:10:26 -08:00
|
|
|
n3.Ninit.Set(nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.popTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Special: value being sent is passed as a pointer; make it addressable.
|
2015-02-13 14:40:36 -05:00
|
|
|
case OSEND:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
2017-03-18 15:55:41 +00:00
|
|
|
if instrumenting {
|
|
|
|
|
// Force copying to the stack so that (chan T)(nil) <- x
|
|
|
|
|
// is still instrumented as a read of x.
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Right = o.copyExpr(n.Right, n.Right.Type, false)
|
2017-03-18 15:55:41 +00:00
|
|
|
} else {
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Right = o.addrTemp(n.Right)
|
2017-03-18 15:55:41 +00:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// TODO(rsc): Clean temporaries more aggressively.
|
2015-02-13 14:40:36 -05:00
|
|
|
// Note that because walkswitch will rewrite some of the
|
|
|
|
|
// switch into a binary search, this is not as easy as it looks.
|
|
|
|
|
// (If we ran that code here we could invoke orderstmt on
|
|
|
|
|
// the if-else chain instead.)
|
|
|
|
|
// For now just clean all the temporaries at the end.
|
|
|
|
|
// In practice that's fine.
|
|
|
|
|
case OSWITCH:
|
2018-03-04 17:17:55 -08:00
|
|
|
t := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
for _, ncas := range n.List.Slice() {
|
|
|
|
|
if ncas.Op != OXCASE {
|
|
|
|
|
Fatalf("order switch case %v", ncas.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-03-04 17:17:55 -08:00
|
|
|
o.exprListInPlace(ncas.List)
|
|
|
|
|
orderBlock(&ncas.Nbody)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
|
|
|
|
o.cleanTemp(t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-02 17:34:42 -08:00
|
|
|
lineno = lno
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// exprList orders the expression list l into o.
|
|
|
|
|
func (o *Order) exprList(l Nodes) {
|
2016-03-09 12:39:36 -08:00
|
|
|
s := l.Slice()
|
|
|
|
|
for i := range s {
|
2018-03-04 17:17:55 -08:00
|
|
|
s[i] = o.expr(s[i], nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// exprListInPlace orders the expression list l but saves
|
2015-02-13 14:40:36 -05:00
|
|
|
// the side effects on the individual expression ninit lists.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) exprListInPlace(l Nodes) {
|
2016-03-09 12:39:36 -08:00
|
|
|
s := l.Slice()
|
|
|
|
|
for i := range s {
|
2018-03-04 17:17:55 -08:00
|
|
|
s[i] = o.exprInPlace(s[i])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-26 23:05:35 -04:00
|
|
|
// prealloc[x] records the allocation to use for x.
|
|
|
|
|
var prealloc = map[*Node]*Node{}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// expr orders a single expression, appending side
|
|
|
|
|
// effects to o.out as needed.
|
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x
This slows down more things than I expected, but it also speeds things up,
and it reduces stack frame sizes and the load on the optimizer, so it's still
likely a net win.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795)
BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000)
BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000)
BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000)
BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000)
BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013)
BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039)
BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104)
BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035)
BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131)
BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572)
BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000)
BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000)
BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924)
BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242)
BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000)
BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000)
BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000)
BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000)
BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000)
BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791)
BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110)
BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433)
BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303)
BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001)
BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000)
Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f
Reviewed-on: https://go-review.googlesource.com/9721
Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
|
|
|
// If this is part of an assignment lhs = *np, lhs is given.
|
|
|
|
|
// Otherwise lhs == nil. (When lhs != nil it may be possible
|
|
|
|
|
// to avoid copying the result of the expression to a temporary.)
|
2018-03-04 17:17:55 -08:00
|
|
|
// The result of expr MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = o.expr(n.Left, lhs)
|
|
|
|
|
func (o *Order) expr(n, lhs *Node) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n == nil {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-02 17:34:42 -08:00
|
|
|
lno := setlineno(n)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.init(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
|
|
|
|
o.exprList(n.List)
|
|
|
|
|
o.exprList(n.Rlist)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// Addition of strings turns into a function call.
|
2015-02-13 14:40:36 -05:00
|
|
|
// Allocate a temporary to hold the strings.
|
|
|
|
|
// Fewer than 5 strings use direct runtime helpers.
|
|
|
|
|
case OADDSTR:
|
2018-03-04 17:17:55 -08:00
|
|
|
o.exprList(n.List)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
if n.List.Len() > 5 {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewArray(types.Types[TSTRING], int64(n.List.Len()))
|
2018-03-04 17:17:55 -08:00
|
|
|
prealloc[n] = o.newTemp(t, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Mark string(byteSlice) arguments to reuse byteSlice backing
|
|
|
|
|
// buffer during conversion. String concatenation does not
|
|
|
|
|
// memorize the strings for later use, so it is safe.
|
|
|
|
|
// However, we can do it only if there is at least one non-empty string literal.
|
|
|
|
|
// Otherwise if all other arguments are empty strings,
|
|
|
|
|
// concatstrings will return the reference to the temp string
|
|
|
|
|
// to the caller.
|
2015-02-23 16:07:24 -05:00
|
|
|
hasbyte := false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
haslit := false
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n1 := range n.List.Slice() {
|
|
|
|
|
hasbyte = hasbyte || n1.Op == OARRAYBYTESTR
|
|
|
|
|
haslit = haslit || n1.Op == OLITERAL && len(n1.Val().U.(string)) != 0
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if haslit && hasbyte {
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n2 := range n.List.Slice() {
|
|
|
|
|
if n2.Op == OARRAYBYTESTR {
|
|
|
|
|
n2.Op = OARRAYBYTESTRTMP
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OCMPSTR:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Mark string(byteSlice) arguments to reuse byteSlice backing
|
|
|
|
|
// buffer during conversion. String comparison does not
|
|
|
|
|
// memorize the strings for later use, so it is safe.
|
|
|
|
|
if n.Left.Op == OARRAYBYTESTR {
|
|
|
|
|
n.Left.Op = OARRAYBYTESTRTMP
|
|
|
|
|
}
|
|
|
|
|
if n.Right.Op == OARRAYBYTESTR {
|
|
|
|
|
n.Right.Op = OARRAYBYTESTRTMP
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// key must be addressable
|
|
|
|
|
case OINDEXMAP:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
2016-10-11 08:36:38 -07:00
|
|
|
needCopy := false
|
|
|
|
|
|
2018-03-08 04:18:18 -08:00
|
|
|
if !n.IndexMapLValue() && instrumenting {
|
2016-10-11 08:36:38 -07:00
|
|
|
// Race detector needs the copy so it can
|
|
|
|
|
// call treecopy on the result.
|
|
|
|
|
needCopy = true
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// For x = m[string(k)] where k is []byte, the allocation of
|
|
|
|
|
// backing bytes for the string can be avoided by reusing
|
|
|
|
|
// the []byte backing array. This is a special case that it
|
|
|
|
|
// would be nice to handle more generally, but because
|
|
|
|
|
// there are no []byte-keyed maps, this specific case comes
|
|
|
|
|
// up in important cases in practice. See issue 3512.
|
|
|
|
|
// Nothing can change the []byte we are not copying before
|
|
|
|
|
// the map index, because the map access is going to
|
|
|
|
|
// be forced to happen immediately following this
|
|
|
|
|
// conversion (by the ordercopyexpr a few lines below).
|
2018-03-08 04:18:18 -08:00
|
|
|
if !n.IndexMapLValue() && n.Right.Op == OARRAYBYTESTR {
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Right.Op = OARRAYBYTESTRTMP
|
2016-10-11 08:36:38 -07:00
|
|
|
needCopy = true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Right = o.mapKeyTemp(n.Left.Type, n.Right)
|
2016-10-11 08:36:38 -07:00
|
|
|
if needCopy {
|
2018-03-04 17:17:55 -08:00
|
|
|
n = o.copyExpr(n, n.Type, false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// concrete type (not interface) argument must be addressable
|
2015-02-13 14:40:36 -05:00
|
|
|
// temporary to pass to runtime.
|
|
|
|
|
case OCONVIFACE:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-30 14:45:47 -07:00
|
|
|
if !n.Left.Type.IsInterface() {
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.addrTemp(n.Left)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-04-19 15:02:06 -07:00
|
|
|
case OCONVNOP:
|
|
|
|
|
if n.Type.IsKind(TUNSAFEPTR) && n.Left.Type.IsKind(TUINTPTR) && (n.Left.Op == OCALLFUNC || n.Left.Op == OCALLINTER || n.Left.Op == OCALLMETH) {
|
|
|
|
|
// When reordering unsafe.Pointer(f()) into a separate
|
|
|
|
|
// statement, the conversion and function call must stay
|
|
|
|
|
// together. See golang.org/issue/15329.
|
2018-03-04 17:17:55 -08:00
|
|
|
o.init(n.Left)
|
|
|
|
|
o.call(n.Left)
|
2016-04-19 15:02:06 -07:00
|
|
|
if lhs == nil || lhs.Op != ONAME || instrumenting {
|
2018-03-04 17:17:55 -08:00
|
|
|
n = o.copyExpr(n, n.Type, false)
|
2016-04-19 15:02:06 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
2016-04-19 15:02:06 -07:00
|
|
|
}
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OANDAND, OOROR:
|
2018-03-04 17:17:55 -08:00
|
|
|
mark := o.markTemp()
|
|
|
|
|
n.Left = o.expr(n.Left, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Clean temporaries from first branch at beginning of second.
|
|
|
|
|
// Leave them on the stack so that they can be killed in the outer
|
|
|
|
|
// context in case the short circuit is taken.
|
2018-03-22 12:49:48 -07:00
|
|
|
n.Right = addinit(n.Right, o.cleanTempNoPop(mark))
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Right = o.exprInPlace(n.Right)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
case OCALLFUNC,
|
2015-02-13 14:40:36 -05:00
|
|
|
OCALLINTER,
|
|
|
|
|
OCALLMETH,
|
|
|
|
|
OCAP,
|
|
|
|
|
OCOMPLEX,
|
|
|
|
|
OCOPY,
|
|
|
|
|
OIMAG,
|
|
|
|
|
OLEN,
|
|
|
|
|
OMAKECHAN,
|
|
|
|
|
OMAKEMAP,
|
|
|
|
|
OMAKESLICE,
|
|
|
|
|
ONEW,
|
|
|
|
|
OREAL,
|
2015-11-18 14:37:12 -05:00
|
|
|
ORECOVER,
|
|
|
|
|
OSTRARRAYBYTE,
|
|
|
|
|
OSTRARRAYBYTETMP,
|
|
|
|
|
OSTRARRAYRUNE:
|
2018-04-24 15:13:08 +02:00
|
|
|
|
|
|
|
|
if isRuneCount(n) {
|
|
|
|
|
// len([]rune(s)) is rewritten to runtime.countrunes(s) later.
|
|
|
|
|
n.Left.Left = o.expr(n.Left.Left, nil)
|
|
|
|
|
} else {
|
|
|
|
|
o.call(n)
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-20 10:00:07 -07:00
|
|
|
if lhs == nil || lhs.Op != ONAME || instrumenting {
|
2018-03-04 17:17:55 -08:00
|
|
|
n = o.copyExpr(n, n.Type, false)
|
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x
This slows down more things than I expected, but it also speeds things up,
and it reduces stack frame sizes and the load on the optimizer, so it's still
likely a net win.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795)
BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000)
BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000)
BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000)
BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000)
BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013)
BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039)
BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104)
BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035)
BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131)
BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572)
BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000)
BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000)
BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924)
BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242)
BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000)
BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000)
BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000)
BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000)
BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000)
BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791)
BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110)
BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433)
BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303)
BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001)
BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000)
Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f
Reviewed-on: https://go-review.googlesource.com/9721
Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
case OAPPEND:
|
2018-04-26 18:30:11 +02:00
|
|
|
// Check for append(x, make([]T, y)...) .
|
|
|
|
|
if isAppendOfMake(n) {
|
|
|
|
|
n.List.SetFirst(o.expr(n.List.First(), nil)) // order x
|
|
|
|
|
n.List.Second().Left = o.expr(n.List.Second().Left, nil) // order y
|
|
|
|
|
} else {
|
|
|
|
|
o.callArgs(&n.List)
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
if lhs == nil || lhs.Op != ONAME && !samesafeexpr(lhs, n.List.First()) {
|
2018-03-04 17:17:55 -08:00
|
|
|
n = o.copyExpr(n, n.Type, false)
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
}
|
|
|
|
|
|
2016-04-21 11:55:33 -07:00
|
|
|
case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
2016-04-21 11:55:33 -07:00
|
|
|
low, high, max := n.SliceBounds()
|
2018-03-04 17:17:55 -08:00
|
|
|
low = o.expr(low, nil)
|
|
|
|
|
low = o.cheapExpr(low)
|
|
|
|
|
high = o.expr(high, nil)
|
|
|
|
|
high = o.cheapExpr(high)
|
|
|
|
|
max = o.expr(max, nil)
|
|
|
|
|
max = o.cheapExpr(max)
|
2016-04-21 11:55:33 -07:00
|
|
|
n.SetSliceBounds(low, high, max)
|
2015-05-27 15:20:49 -04:00
|
|
|
if lhs == nil || lhs.Op != ONAME && !samesafeexpr(lhs, n.Left) {
|
2018-03-04 17:17:55 -08:00
|
|
|
n = o.copyExpr(n, n.Type, false)
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
case OCLOSURE:
|
cmd/compile: cleanup closure.go
The main thing is we now eagerly create the ODCLFUNC node for
closures, immediately cross-link them, and assign fields (e.g., Nbody,
Dcl, Parents, Marks) directly on the ODCLFUNC (previously they were
assigned on the OCLOSURE and later moved to the ODCLFUNC).
This allows us to set Curfn to the ODCLFUNC instead of the OCLOSURE,
which makes things more consistent with normal function declarations.
(Notably, this means Cvars now hang off the ODCLFUNC instead of the
OCLOSURE.)
Assignment of xfunc symbol names also now happens before typechecking
their body, which means debugging output now provides a more helpful
name than "<S>".
In golang.org/cl/66810, we changed "x := y" statements to avoid
creating false closure variables for x, but we still create them for
struct literals like "s{f: x}". Update comment in capturevars
accordingly.
More opportunity for cleanups still, but this makes some substantial
progress, IMO.
Passes toolstash-check.
Change-Id: I65a4efc91886e3dcd1000561348af88297775cd7
Reviewed-on: https://go-review.googlesource.com/100197
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2018-03-08 06:25:04 -08:00
|
|
|
if n.Noescape() && n.Func.Closure.Func.Cvars.Len() > 0 {
|
2018-03-04 17:17:55 -08:00
|
|
|
prealloc[n] = o.newTemp(types.Types[TUINT8], false) // walk will fill in correct type
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-06-19 07:20:28 -07:00
|
|
|
case OARRAYLIT, OSLICELIT, OCALLPART:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
|
|
|
|
o.exprList(n.List)
|
|
|
|
|
o.exprList(n.Rlist)
|
2017-02-27 19:56:38 +02:00
|
|
|
if n.Noescape() {
|
2018-03-04 17:17:55 -08:00
|
|
|
prealloc[n] = o.newTemp(types.Types[TUINT8], false) // walk will fill in correct type
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ODDDARG:
|
2017-02-27 19:56:38 +02:00
|
|
|
if n.Noescape() {
|
2015-02-13 14:40:36 -05:00
|
|
|
// The ddd argument does not live beyond the call it is created for.
|
|
|
|
|
// Allocate a temporary that will be cleaned up when this statement
|
|
|
|
|
// completes. We could be more aggressive and try to arrange for it
|
|
|
|
|
// to be cleaned up when the call completes.
|
2018-03-04 17:17:55 -08:00
|
|
|
prealloc[n] = o.newTemp(n.Type.Elem(), false)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-03-20 00:06:10 -04:00
|
|
|
case ODOTTYPE, ODOTTYPE2:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
2016-09-15 14:34:20 +10:00
|
|
|
// TODO(rsc): The isfat is for consistency with componentgen and walkexpr.
|
2015-03-20 00:06:10 -04:00
|
|
|
// It needs to be removed in all three places.
|
|
|
|
|
// That would allow inlining x.(struct{*int}) the same as x.(*int).
|
2016-09-15 14:34:20 +10:00
|
|
|
if !isdirectiface(n.Type) || isfat(n.Type) || instrumenting {
|
2018-03-04 17:17:55 -08:00
|
|
|
n = o.copyExpr(n, n.Type, true)
|
2015-03-20 00:06:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ORECV:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n = o.copyExpr(n, n.Type, true)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OEQ, ONE:
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.expr(n.Left, nil)
|
|
|
|
|
n.Right = o.expr(n.Right, nil)
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Left.Type
|
2016-03-30 14:56:08 -07:00
|
|
|
if t.IsStruct() || t.IsArray() {
|
2015-02-13 14:40:36 -05:00
|
|
|
// for complex comparisons, we need both args to be
|
|
|
|
|
// addressable so we can pass them to the runtime.
|
2018-03-04 17:17:55 -08:00
|
|
|
n.Left = o.addrTemp(n.Left)
|
|
|
|
|
n.Right = o.addrTemp(n.Right)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-02 17:34:42 -08:00
|
|
|
lineno = lno
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-08-26 10:50:12 -07:00
|
|
|
|
|
|
|
|
// okas creates and returns an assignment of val to ok,
|
|
|
|
|
// including an explicit conversion if necessary.
|
|
|
|
|
func okas(ok, val *Node) *Node {
|
2018-04-08 13:39:10 +01:00
|
|
|
if !ok.isBlank() {
|
2016-08-26 10:50:12 -07:00
|
|
|
val = conv(val, ok.Type)
|
|
|
|
|
}
|
2016-09-16 11:00:54 +10:00
|
|
|
return nod(OAS, ok, val)
|
2016-08-26 10:50:12 -07:00
|
|
|
}
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// as2 orders OAS2XXXX nodes. It creates temporaries to ensure left-to-right assignment.
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
// The caller should order the right-hand side of the assignment before calling orderas2.
|
|
|
|
|
// It rewrites,
|
|
|
|
|
// a, b, a = ...
|
|
|
|
|
// as
|
|
|
|
|
// tmp1, tmp2, tmp3 = ...
|
|
|
|
|
// a, b, a = tmp1, tmp2, tmp3
|
|
|
|
|
// This is necessary to ensure left to right assignment order.
|
2018-03-04 17:17:55 -08:00
|
|
|
func (o *Order) as2(n *Node) {
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
tmplist := []*Node{}
|
|
|
|
|
left := []*Node{}
|
|
|
|
|
for _, l := range n.List.Slice() {
|
2018-04-08 13:39:10 +01:00
|
|
|
if !l.isBlank() {
|
2018-03-04 17:17:55 -08:00
|
|
|
tmp := o.newTemp(l.Type, types.Haspointers(l.Type))
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
tmplist = append(tmplist, tmp)
|
|
|
|
|
left = append(left, l)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
|
|
|
|
|
as := nod(OAS2, nil, nil)
|
|
|
|
|
as.List.Set(left)
|
|
|
|
|
as.Rlist.Set(tmplist)
|
|
|
|
|
as = typecheck(as, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.stmt(as)
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
|
|
|
|
|
ti := 0
|
|
|
|
|
for ni, l := range n.List.Slice() {
|
2018-04-08 13:39:10 +01:00
|
|
|
if !l.isBlank() {
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
n.List.SetIndex(ni, tmplist[ti])
|
|
|
|
|
ti++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
// okAs2 orders OAS2 with ok.
|
|
|
|
|
// Just like as2, this also adds temporaries to ensure left-to-right assignment.
|
|
|
|
|
func (o *Order) okAs2(n *Node) {
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
var tmp1, tmp2 *Node
|
2018-04-08 13:39:10 +01:00
|
|
|
if !n.List.First().isBlank() {
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
typ := n.Rlist.First().Type
|
2018-03-04 17:17:55 -08:00
|
|
|
tmp1 = o.newTemp(typ, types.Haspointers(typ))
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
}
|
|
|
|
|
|
2018-04-08 13:39:10 +01:00
|
|
|
if !n.List.Second().isBlank() {
|
2018-03-04 17:17:55 -08:00
|
|
|
tmp2 = o.newTemp(types.Types[TBOOL], false)
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
}
|
|
|
|
|
|
2018-03-04 17:17:55 -08:00
|
|
|
o.out = append(o.out, n)
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
|
|
|
|
|
if tmp1 != nil {
|
|
|
|
|
r := nod(OAS, n.List.First(), tmp1)
|
|
|
|
|
r = typecheck(r, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.mapAssign(r)
|
2017-02-19 15:57:58 +01:00
|
|
|
n.List.SetFirst(tmp1)
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
}
|
|
|
|
|
if tmp2 != nil {
|
|
|
|
|
r := okas(n.List.Second(), tmp2)
|
|
|
|
|
r = typecheck(r, Etop)
|
2018-03-04 17:17:55 -08:00
|
|
|
o.mapAssign(r)
|
2017-02-19 15:57:58 +01:00
|
|
|
n.List.SetSecond(tmp2)
|
cmd/compile: Ensure left-to-right assignment
Add temporaries to reorder the assignment for OAS2XXX nodes.
This makes orderstmt(), rewrite
a, b, c = ...
as
tmp1, tmp2, tmp3 = ...
a, b, c = tmp1, tmp2, tmp3
and
a, ok = ...
as
t1, t2 = ...
a = t1
ok = t2
Fixes #13433.
Change-Id: Id0f5956e3a254d0a6f4b89b5f7b0e055b1f0e21f
Reviewed-on: https://go-review.googlesource.com/34713
Run-TryBot: Dhananjay Nakrani <dhananjayn@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2016-12-23 22:28:45 -08:00
|
|
|
}
|
|
|
|
|
}
|