go/src/cmd/compile/internal/gc/order.go

1277 lines
34 KiB
Go
Raw Normal View History

// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc
import (
"cmd/compile/internal/types"
"cmd/internal/src"
"fmt"
)
// Rewrite tree to use separate statements to enforce
// order of evaluation. Makes walk easier, because it
// can (after this runs) reorder at will within an expression.
//
// Rewrite m[k] op= r into m[k] = m[k] op r if op is / or %.
//
// Introduce temporaries as needed by runtime routines.
// For example, the map runtime routines take the map key
// by reference, so make sure all map keys are addressable
// by copying them to temporaries as needed.
// The same is true for channel operations.
//
// Arrange that map index expressions only appear in direct
// assignments x = m[k] or m[k] = x, never in larger expressions.
//
// Arrange that receive expressions only appear in direct assignments
// x = <-c or as standalone statements <-c, never in larger expressions.
// TODO(rsc): The temporary introduction during multiple assignments
// should be moved into this file, so that the temporaries can be cleaned
// and so that conversions implicit in the OAS2FUNC and OAS2RECV
// nodes can be made explicit and then have their temporaries cleaned.
// TODO(rsc): Goto and multilevel break/continue can jump over
// inserted VARKILL annotations. Work out a way to handle these.
// The current implementation is safe, in that it will execute correctly.
// But it won't reuse temporaries as aggressively as it might, and
// it can result in unnecessary zeroing of those variables in the function
// prologue.
// Order holds state during the ordering process.
type Order struct {
out []*Node // list of generated statements
temp []*Node // stack of temporary variables
}
// Order rewrites fn.Nbody to apply the ordering constraints
// described in the comment at the top of the file.
func order(fn *Node) {
if Debug['W'] > 1 {
s := fmt.Sprintf("\nbefore order %v", fn.Func.Nname.Sym)
dumplist(s, fn.Nbody)
}
orderBlock(&fn.Nbody)
}
// newTemp allocates a new temporary with the given type,
// pushes it onto the temp stack, and returns it.
// If clear is true, newTemp emits code to zero the temporary.
func (o *Order) newTemp(t *types.Type, clear bool) *Node {
v := temp(t)
if clear {
a := nod(OAS, v, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
o.out = append(o.out, a)
}
o.temp = append(o.temp, v)
return v
}
// copyExpr behaves like ordertemp but also emits
// code to initialize the temporary to the value n.
//
// The clear argument is provided for use when the evaluation
// of tmp = n turns into a function call that is passed a pointer
// to the temporary as the output space. If the call blocks before
// tmp has been written, the garbage collector will still treat the
// temporary as live, so we must zero it before entering that call.
// Today, this only happens for channel receive operations.
// (The other candidate would be map access, but map access
// returns a pointer to the result data instead of taking a pointer
// to be filled in.)
func (o *Order) copyExpr(n *Node, t *types.Type, clear bool) *Node {
v := o.newTemp(t, clear)
a := nod(OAS, v, n)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
o.out = append(o.out, a)
return v
}
// cheapExpr returns a cheap version of n.
// The definition of cheap is that n is a variable or constant.
// If not, cheapExpr allocates a new tmp, emits tmp = n,
// and then returns tmp.
func (o *Order) cheapExpr(n *Node) *Node {
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
if n == nil {
return nil
}
switch n.Op {
case ONAME, OLITERAL:
return n
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
case OLEN, OCAP:
l := o.cheapExpr(n.Left)
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
if l == n.Left {
return n
}
a := n.copy()
a.Orig = a
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
a.Left = l
return typecheck(a, Erv)
}
return o.copyExpr(n, n.Type, false)
}
// safeExpr returns a safe version of n.
// The definition of safe is that n can appear multiple times
// without violating the semantics of the original program,
// and that assigning to the safe version has the same effect
// as assigning to the original n.
//
// The intended use is to apply to x when rewriting x += y into x = x + y.
func (o *Order) safeExpr(n *Node) *Node {
switch n.Op {
case ONAME, OLITERAL:
return n
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
case ODOT, OLEN, OCAP:
l := o.safeExpr(n.Left)
if l == n.Left {
return n
}
a := n.copy()
a.Orig = a
a.Left = l
return typecheck(a, Erv)
case ODOTPTR, OIND:
l := o.cheapExpr(n.Left)
if l == n.Left {
return n
}
a := n.copy()
a.Orig = a
a.Left = l
return typecheck(a, Erv)
case OINDEX, OINDEXMAP:
var l *Node
if n.Left.Type.IsArray() {
l = o.safeExpr(n.Left)
} else {
l = o.cheapExpr(n.Left)
}
r := o.cheapExpr(n.Right)
if l == n.Left && r == n.Right {
return n
}
a := n.copy()
a.Orig = a
a.Left = l
a.Right = r
return typecheck(a, Erv)
default:
Fatalf("ordersafeexpr %v", n.Op)
return nil // not reached
}
}
// Isaddrokay reports whether it is okay to pass n's address to runtime routines.
// Taking the address of a variable makes the liveness and optimization analyses
// lose track of where the variable's lifetime ends. To avoid hurting the analyses
// of ordinary stack variables, those are not 'isaddrokay'. Temporaries are okay,
// because we emit explicit VARKILL instructions marking the end of those
// temporaries' lifetimes.
func isaddrokay(n *Node) bool {
cmd/compile: move Node.Class to flags Put it at position zero, since it is fairly hot. This shrinks gc.Node into a smaller size class on 64 bit systems. name old time/op new time/op delta Template 193ms ± 5% 192ms ± 3% ~ (p=0.353 n=94+93) Unicode 86.1ms ± 5% 85.0ms ± 4% -1.23% (p=0.000 n=95+98) GoTypes 546ms ± 3% 544ms ± 4% -0.40% (p=0.007 n=94+97) Compiler 2.56s ± 3% 2.54s ± 3% -0.67% (p=0.000 n=99+97) SSA 5.13s ± 2% 5.10s ± 3% -0.55% (p=0.000 n=94+98) Flate 122ms ± 6% 121ms ± 4% -0.75% (p=0.002 n=97+95) GoParser 144ms ± 5% 144ms ± 4% ~ (p=0.298 n=98+97) Reflect 348ms ± 4% 349ms ± 4% ~ (p=0.350 n=98+97) Tar 105ms ± 5% 104ms ± 5% ~ (p=0.154 n=96+98) XML 200ms ± 5% 198ms ± 4% -0.71% (p=0.015 n=97+98) [Geo mean] 330ms 328ms -0.52% name old user-time/op new user-time/op delta Template 229ms ±11% 224ms ± 7% -2.16% (p=0.001 n=100+87) Unicode 109ms ± 5% 109ms ± 6% ~ (p=0.897 n=96+91) GoTypes 712ms ± 4% 709ms ± 4% ~ (p=0.085 n=96+98) Compiler 3.41s ± 3% 3.36s ± 3% -1.43% (p=0.000 n=98+98) SSA 7.46s ± 3% 7.31s ± 3% -2.02% (p=0.000 n=100+99) Flate 145ms ± 6% 143ms ± 6% -1.11% (p=0.001 n=99+97) GoParser 177ms ± 5% 176ms ± 5% -0.78% (p=0.018 n=95+95) Reflect 432ms ± 7% 435ms ± 9% ~ (p=0.296 n=100+100) Tar 121ms ± 7% 121ms ± 5% ~ (p=0.072 n=100+95) XML 241ms ± 4% 239ms ± 5% ~ (p=0.085 n=97+99) [Geo mean] 413ms 410ms -0.73% name old alloc/op new alloc/op delta Template 38.4MB ± 0% 37.7MB ± 0% -1.85% (p=0.008 n=5+5) Unicode 30.1MB ± 0% 28.8MB ± 0% -4.09% (p=0.008 n=5+5) GoTypes 112MB ± 0% 110MB ± 0% -1.69% (p=0.008 n=5+5) Compiler 470MB ± 0% 461MB ± 0% -1.91% (p=0.008 n=5+5) SSA 1.13GB ± 0% 1.11GB ± 0% -1.70% (p=0.008 n=5+5) Flate 25.0MB ± 0% 24.6MB ± 0% -1.67% (p=0.008 n=5+5) GoParser 31.6MB ± 0% 31.1MB ± 0% -1.66% (p=0.008 n=5+5) Reflect 77.1MB ± 0% 75.8MB ± 0% -1.69% (p=0.008 n=5+5) Tar 26.3MB ± 0% 25.7MB ± 0% -2.06% (p=0.008 n=5+5) XML 41.9MB ± 0% 41.1MB ± 0% -1.93% (p=0.008 n=5+5) [Geo mean] 73.5MB 72.0MB -2.03% name old allocs/op new allocs/op delta Template 383k ± 0% 383k ± 0% ~ (p=0.690 n=5+5) Unicode 343k ± 0% 343k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=0.310 n=5+5) Compiler 4.43M ± 0% 4.42M ± 0% -0.17% (p=0.008 n=5+5) SSA 9.85M ± 0% 9.85M ± 0% ~ (p=0.310 n=5+5) Flate 236k ± 0% 236k ± 1% ~ (p=0.841 n=5+5) GoParser 320k ± 0% 320k ± 0% ~ (p=0.421 n=5+5) Reflect 988k ± 0% 987k ± 0% ~ (p=0.690 n=5+5) Tar 252k ± 0% 251k ± 0% ~ (p=0.095 n=5+5) XML 399k ± 0% 399k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 741k 740k -0.07% Change-Id: I9e952b58a98e30a12494304db9ce50d0a85e459c Reviewed-on: https://go-review.googlesource.com/41797 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
2017-04-25 18:14:12 -07:00
return islvalue(n) && (n.Op != ONAME || n.Class() == PEXTERN || n.IsAutoTmp())
}
// addrTemp ensures that n is okay to pass by address to runtime routines.
// If the original argument n is not okay, addrTemp creates a tmp, emits
// tmp = n, and then returns tmp.
// The result of addrTemp MUST be assigned back to n, e.g.
// n.Left = o.addrTemp(n.Left)
func (o *Order) addrTemp(n *Node) *Node {
if consttype(n) > 0 {
cmd/compile: convert constants to interfaces without allocating The order pass is responsible for ensuring that values passed to runtime functions, including convT2E/convT2I, are addressable. Prior to this CL, this was always accomplished by creating a temp, which frequently escaped to the heap, causing allocations, perhaps most notably in code like: fmt.Println(1, 2, 3) // allocates three times None of the runtime routines modify the contents of the pointers they receive, so in the case of constants, instead of creating a temp value, we can create a static value. (Marking the static value as read-only provides protection against accidental attempts by the runtime to modify the constant data.) This improves code generation for code like: panic("abc") c <- 2 // c is a chan int which can now simply refer to "abc" and 2, rather than going by way of a temporary. It also allows us to optimize convT2E/convT2I, by recognizing static readonly values and directly constructing the interface. This CL adds ~0.5% to binary size, despite decreasing the size of many functions, because it also adds many static symbols. This binary size regression could be recovered in future (but currently unplanned) work. There is a lot of content-duplication in these symbols; this statement generates six new symbols, three containing an int 1 and three containing a pointer to the string "a": fmt.Println(1, 1, 1, "a", "a", "a") These symbols could be made content-addressable. Furthermore, these symbols are small, so the alignment and naming overhead is large. As with the go.strings section, these symbols could be hidden and have their alignment reduced. The changes to test/live.go make it impossible (at least with current optimization techniques) to place the values being passed to the runtime in static symbols, preserving autotmp creation. Fixes #18704 Benchmarks from fmt and go-kit's logging package: github.com/go-kit/kit/log name old time/op new time/op delta JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10) JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10) Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9) OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10) TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9) TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10) LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9) LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10) NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10) NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10) ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10) ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10) JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10) Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10) TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10) TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10) LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10) ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10) Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) fmt name old time/op new time/op delta SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7) SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10) SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10) SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10) SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10) SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9) SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8) SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10) SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10) SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10) SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10) SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9) SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10) SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10) SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10) SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10) ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10) FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10) FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10) FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10) ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8) ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10) ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9) name old alloc/op new alloc/op delta SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfEmpty-8 0.00B 0.00B ~ (all equal) SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10) SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10) SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10) SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10) SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10) SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal) SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal) SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal) ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal) ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10) ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfEmpty-8 0.00 0.00 ~ (all equal) SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal) SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal) ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00 0.00 ~ (all equal) ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal) ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5 Reviewed-on: https://go-review.googlesource.com/35554 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
// TODO: expand this to all static composite literal nodes?
n = defaultlit(n, nil)
dowidth(n.Type)
vstat := staticname(n.Type)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
vstat.Name.SetReadonly(true)
cmd/compile: convert constants to interfaces without allocating The order pass is responsible for ensuring that values passed to runtime functions, including convT2E/convT2I, are addressable. Prior to this CL, this was always accomplished by creating a temp, which frequently escaped to the heap, causing allocations, perhaps most notably in code like: fmt.Println(1, 2, 3) // allocates three times None of the runtime routines modify the contents of the pointers they receive, so in the case of constants, instead of creating a temp value, we can create a static value. (Marking the static value as read-only provides protection against accidental attempts by the runtime to modify the constant data.) This improves code generation for code like: panic("abc") c <- 2 // c is a chan int which can now simply refer to "abc" and 2, rather than going by way of a temporary. It also allows us to optimize convT2E/convT2I, by recognizing static readonly values and directly constructing the interface. This CL adds ~0.5% to binary size, despite decreasing the size of many functions, because it also adds many static symbols. This binary size regression could be recovered in future (but currently unplanned) work. There is a lot of content-duplication in these symbols; this statement generates six new symbols, three containing an int 1 and three containing a pointer to the string "a": fmt.Println(1, 1, 1, "a", "a", "a") These symbols could be made content-addressable. Furthermore, these symbols are small, so the alignment and naming overhead is large. As with the go.strings section, these symbols could be hidden and have their alignment reduced. The changes to test/live.go make it impossible (at least with current optimization techniques) to place the values being passed to the runtime in static symbols, preserving autotmp creation. Fixes #18704 Benchmarks from fmt and go-kit's logging package: github.com/go-kit/kit/log name old time/op new time/op delta JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10) JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10) Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9) OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10) TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9) TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10) LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9) LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10) NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10) NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10) ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10) ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10) name old alloc/op new alloc/op delta JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10) JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10) Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10) TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10) TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10) LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10) NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10) ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10) ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10) Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10) ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10) ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10) fmt name old time/op new time/op delta SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7) SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10) SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10) SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10) SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10) SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9) SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8) SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10) SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10) SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10) SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10) SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9) SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10) SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10) SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10) SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10) ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10) FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10) FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10) FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10) ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8) ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10) ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9) name old alloc/op new alloc/op delta SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfEmpty-8 0.00B 0.00B ~ (all equal) SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10) SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10) SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10) SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10) SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10) SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10) SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal) SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal) SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal) ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal) ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10) ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfEmpty-8 0.00 0.00 ~ (all equal) SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10) SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal) SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal) ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) FprintIntNoAlloc-8 0.00 0.00 ~ (all equal) ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal) ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal) Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5 Reviewed-on: https://go-review.googlesource.com/35554 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
var out []*Node
staticassign(vstat, n, &out)
if out != nil {
Fatalf("staticassign of const generated code: %+v", n)
}
vstat = typecheck(vstat, Erv)
return vstat
}
if isaddrokay(n) {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
return o.copyExpr(n, n.Type, false)
}
// mapKeyTemp prepares n to be a key in a map runtime call and returns n.
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
// It should only be used for map runtime calls which have *_fast* versions.
func (o *Order) mapKeyTemp(t *types.Type, n *Node) *Node {
// Most map calls need to take the address of the key.
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
// Exception: map*_fast* calls. See golang.org/issue/19015.
if mapfast(t) == mapslow {
return o.addrTemp(n)
}
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
return n
}
type ordermarker int
// Marktemp returns the top of the temporary variable stack.
func (o *Order) markTemp() ordermarker {
return ordermarker(len(o.temp))
}
// Poptemp pops temporaries off the stack until reaching the mark,
// which must have been returned by marktemp.
func (o *Order) popTemp(mark ordermarker) {
o.temp = o.temp[:mark]
}
// Cleantempnopop emits VARKILL and if needed VARLIVE instructions
// to *out for each temporary above the mark on the temporary stack.
// It does not pop the temporaries from the stack.
func (o *Order) cleanTempNoPop(mark ordermarker) []*Node {
var out []*Node
for i := len(o.temp) - 1; i >= int(mark); i-- {
n := o.temp[i]
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if n.Name.Keepalive() {
n.Name.SetKeepalive(false)
n.SetAddrtaken(true) // ensure SSA keeps the n variable
live := nod(OVARLIVE, n, nil)
live = typecheck(live, Etop)
out = append(out, live)
cmd/compile: recognize Syscall-like functions for liveness analysis Consider this code: func f(*int) func g() { p := new(int) f(p) } where f is an assembly function. In general liveness analysis assumes that during the call to f, p is dead in this frame. If f has retained p, p will be found alive in f's frame and keep the new(int) from being garbage collected. This is all correct and works. We use the Go func declaration for f to give the assembly function liveness information (the arguments are assumed live for the entire call). Now consider this code: func h1() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) } Here syscall.Syscall is taking the place of f, but because its arguments are uintptr, the liveness analysis and the garbage collector ignore them. Since p is no longer live in h once the call starts, if the garbage collector scans the stack while the system call is blocked, it will find no reference to the new(int) and reclaim it. If the kernel is going to write to *p once the call finishes, reclaiming the memory is a mistake. We can't change the arguments or the liveness information for syscall.Syscall itself, both for compatibility and because sometimes the arguments really are integers, and the garbage collector will get quite upset if it finds an integer where it expects a pointer. The problem is that these arguments are fundamentally untyped. The solution we have taken in the syscall package's wrappers in past releases is to insert a call to a dummy function named "use", to make it look like the argument is live during the call to syscall.Syscall: func h2() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) use(unsafe.Pointer(p)) } Keeping p alive during the call means that if the garbage collector scans the stack during the system call now, it will find the reference to p. Unfortunately, this approach is not available to users outside syscall, because 'use' is unexported, and people also have to realize they need to use it and do so. There is much existing code using syscall.Syscall without a 'use'-like function. That code will fail very occasionally in mysterious ways (see #13372). This CL fixes all that existing code by making the compiler do the right thing automatically, without any code modifications. That is, it takes h1 above, which is incorrect code today, and makes it correct code. Specifically, if the compiler sees a foreign func definition (one without a body) that has uintptr arguments, it marks those arguments as "unsafe uintptrs". If it later sees the function being called with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x as having escaped, and it makes sure to hold x in a live temporary variable until the call returns, so that the garbage collector cannot reclaim whatever heap memory x points to. For now I am leaving the explicit calls to use in package syscall, but they can be removed early in a future cycle (likely Go 1.7). The rule has no effect on escape analysis, only on liveness analysis. Fixes #13372. Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500 Reviewed-on: https://go-review.googlesource.com/18584 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
}
kill := nod(OVARKILL, n, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
kill = typecheck(kill, Etop)
out = append(out, kill)
}
return out
}
// cleanTemp emits VARKILL instructions for each temporary above the
// mark on the temporary stack and removes them from the stack.
func (o *Order) cleanTemp(top ordermarker) {
o.out = append(o.out, o.cleanTempNoPop(top)...)
o.popTemp(top)
}
// stmtList orders each of the statements in the list.
func (o *Order) stmtList(l Nodes) {
for _, n := range l.Slice() {
o.stmt(n)
}
}
// orderBlock orders the block of statements in n into a new slice,
// and then replaces the old slice in n with the new slice.
func orderBlock(n *Nodes) {
var order Order
mark := order.markTemp()
order.stmtList(*n)
order.cleanTemp(mark)
n.Set(order.out)
}
// exprInPlace orders the side effects in *np and
// leaves them as the init list of the final *np.
// The result of exprInPlace MUST be assigned back to n, e.g.
// n.Left = o.exprInPlace(n.Left)
func (o *Order) exprInPlace(n *Node) *Node {
var order Order
n = order.expr(n, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
n = addinit(n, order.out)
// insert new temporaries from order
// at head of outer list.
o.temp = append(o.temp, order.temp...)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
// orderStmtInPlace orders the side effects of the single statement *np
// and replaces it with the resulting statement list.
// The result of orderStmtInPlace MUST be assigned back to n, e.g.
// n.Left = orderStmtInPlace(n.Left)
func orderStmtInPlace(n *Node) *Node {
var order Order
mark := order.markTemp()
order.stmt(n)
order.cleanTemp(mark)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return liststmt(order.out)
}
// init moves n's init list to o.out.
func (o *Order) init(n *Node) {
if n.mayBeShared() {
// For concurrency safety, don't mutate potentially shared nodes.
// First, ensure that no work is required here.
if n.Ninit.Len() > 0 {
Fatalf("orderinit shared node with ninit")
}
return
}
o.stmtList(n.Ninit)
n.Ninit.Set(nil)
}
// Ismulticall reports whether the list l is f() for a multi-value function.
// Such an f() could appear as the lone argument to a multi-arg function.
func ismulticall(l Nodes) bool {
// one arg only
if l.Len() != 1 {
return false
}
n := l.First()
// must be call
switch n.Op {
default:
return false
case OCALLFUNC, OCALLMETH, OCALLINTER:
// call must return multiple values
return n.Left.Type.NumResults() > 1
}
}
// copyRet emits t1, t2, ... = n, where n is a function call,
// and then returns the list t1, t2, ....
func (o *Order) copyRet(n *Node) []*Node {
if !n.Type.IsFuncArgStruct() {
Fatalf("copyret %v %d", n.Type, n.Left.Type.NumResults())
}
var l1, l2 []*Node
for _, f := range n.Type.Fields().Slice() {
tmp := temp(f.Type)
l1 = append(l1, tmp)
l2 = append(l2, tmp)
}
as := nod(OAS2, nil, nil)
as.List.Set(l1)
as.Rlist.Set1(n)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
as = typecheck(as, Etop)
o.stmt(as)
return l2
}
// callArgs orders the list of call arguments *l.
func (o *Order) callArgs(l *Nodes) {
if ismulticall(*l) {
// return f() where f() is multiple values.
l.Set(o.copyRet(l.First()))
} else {
o.exprList(*l)
}
}
// call orders the call expression n.
// n.Op is OCALLMETH/OCALLFUNC/OCALLINTER or a builtin like OCOPY.
func (o *Order) call(n *Node) {
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil) // ODDDARG temp
o.callArgs(&n.List)
cmd/compile: recognize Syscall-like functions for liveness analysis Consider this code: func f(*int) func g() { p := new(int) f(p) } where f is an assembly function. In general liveness analysis assumes that during the call to f, p is dead in this frame. If f has retained p, p will be found alive in f's frame and keep the new(int) from being garbage collected. This is all correct and works. We use the Go func declaration for f to give the assembly function liveness information (the arguments are assumed live for the entire call). Now consider this code: func h1() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) } Here syscall.Syscall is taking the place of f, but because its arguments are uintptr, the liveness analysis and the garbage collector ignore them. Since p is no longer live in h once the call starts, if the garbage collector scans the stack while the system call is blocked, it will find no reference to the new(int) and reclaim it. If the kernel is going to write to *p once the call finishes, reclaiming the memory is a mistake. We can't change the arguments or the liveness information for syscall.Syscall itself, both for compatibility and because sometimes the arguments really are integers, and the garbage collector will get quite upset if it finds an integer where it expects a pointer. The problem is that these arguments are fundamentally untyped. The solution we have taken in the syscall package's wrappers in past releases is to insert a call to a dummy function named "use", to make it look like the argument is live during the call to syscall.Syscall: func h2() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) use(unsafe.Pointer(p)) } Keeping p alive during the call means that if the garbage collector scans the stack during the system call now, it will find the reference to p. Unfortunately, this approach is not available to users outside syscall, because 'use' is unexported, and people also have to realize they need to use it and do so. There is much existing code using syscall.Syscall without a 'use'-like function. That code will fail very occasionally in mysterious ways (see #13372). This CL fixes all that existing code by making the compiler do the right thing automatically, without any code modifications. That is, it takes h1 above, which is incorrect code today, and makes it correct code. Specifically, if the compiler sees a foreign func definition (one without a body) that has uintptr arguments, it marks those arguments as "unsafe uintptrs". If it later sees the function being called with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x as having escaped, and it makes sure to hold x in a live temporary variable until the call returns, so that the garbage collector cannot reclaim whatever heap memory x points to. For now I am leaving the explicit calls to use in package syscall, but they can be removed early in a future cycle (likely Go 1.7). The rule has no effect on escape analysis, only on liveness analysis. Fixes #13372. Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500 Reviewed-on: https://go-review.googlesource.com/18584 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
if n.Op != OCALLFUNC {
return
}
keepAlive := func(i int) {
// If the argument is really a pointer being converted to uintptr,
// arrange for the pointer to be kept alive until the call returns,
// by copying it into a temp and marking that temp
// still alive when we pop the temp stack.
xp := n.List.Addr(i)
for (*xp).Op == OCONVNOP && !(*xp).Type.IsUnsafePtr() {
xp = &(*xp).Left
}
x := *xp
if x.Type.IsUnsafePtr() {
x = o.copyExpr(x, x.Type, false)
x.Name.SetKeepalive(true)
*xp = x
}
}
for i, t := range n.Left.Type.Params().FieldSlice() {
// Check for "unsafe-uintptr" tag provided by escape analysis.
if t.Isddd() && !n.Isddd() {
if t.Note == uintptrEscapesTag {
for ; i < n.List.Len(); i++ {
keepAlive(i)
cmd/compile: recognize Syscall-like functions for liveness analysis Consider this code: func f(*int) func g() { p := new(int) f(p) } where f is an assembly function. In general liveness analysis assumes that during the call to f, p is dead in this frame. If f has retained p, p will be found alive in f's frame and keep the new(int) from being garbage collected. This is all correct and works. We use the Go func declaration for f to give the assembly function liveness information (the arguments are assumed live for the entire call). Now consider this code: func h1() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) } Here syscall.Syscall is taking the place of f, but because its arguments are uintptr, the liveness analysis and the garbage collector ignore them. Since p is no longer live in h once the call starts, if the garbage collector scans the stack while the system call is blocked, it will find no reference to the new(int) and reclaim it. If the kernel is going to write to *p once the call finishes, reclaiming the memory is a mistake. We can't change the arguments or the liveness information for syscall.Syscall itself, both for compatibility and because sometimes the arguments really are integers, and the garbage collector will get quite upset if it finds an integer where it expects a pointer. The problem is that these arguments are fundamentally untyped. The solution we have taken in the syscall package's wrappers in past releases is to insert a call to a dummy function named "use", to make it look like the argument is live during the call to syscall.Syscall: func h2() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) use(unsafe.Pointer(p)) } Keeping p alive during the call means that if the garbage collector scans the stack during the system call now, it will find the reference to p. Unfortunately, this approach is not available to users outside syscall, because 'use' is unexported, and people also have to realize they need to use it and do so. There is much existing code using syscall.Syscall without a 'use'-like function. That code will fail very occasionally in mysterious ways (see #13372). This CL fixes all that existing code by making the compiler do the right thing automatically, without any code modifications. That is, it takes h1 above, which is incorrect code today, and makes it correct code. Specifically, if the compiler sees a foreign func definition (one without a body) that has uintptr arguments, it marks those arguments as "unsafe uintptrs". If it later sees the function being called with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x as having escaped, and it makes sure to hold x in a live temporary variable until the call returns, so that the garbage collector cannot reclaim whatever heap memory x points to. For now I am leaving the explicit calls to use in package syscall, but they can be removed early in a future cycle (likely Go 1.7). The rule has no effect on escape analysis, only on liveness analysis. Fixes #13372. Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500 Reviewed-on: https://go-review.googlesource.com/18584 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
}
}
} else {
if t.Note == unsafeUintptrTag || t.Note == uintptrEscapesTag {
keepAlive(i)
}
cmd/compile: recognize Syscall-like functions for liveness analysis Consider this code: func f(*int) func g() { p := new(int) f(p) } where f is an assembly function. In general liveness analysis assumes that during the call to f, p is dead in this frame. If f has retained p, p will be found alive in f's frame and keep the new(int) from being garbage collected. This is all correct and works. We use the Go func declaration for f to give the assembly function liveness information (the arguments are assumed live for the entire call). Now consider this code: func h1() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) } Here syscall.Syscall is taking the place of f, but because its arguments are uintptr, the liveness analysis and the garbage collector ignore them. Since p is no longer live in h once the call starts, if the garbage collector scans the stack while the system call is blocked, it will find no reference to the new(int) and reclaim it. If the kernel is going to write to *p once the call finishes, reclaiming the memory is a mistake. We can't change the arguments or the liveness information for syscall.Syscall itself, both for compatibility and because sometimes the arguments really are integers, and the garbage collector will get quite upset if it finds an integer where it expects a pointer. The problem is that these arguments are fundamentally untyped. The solution we have taken in the syscall package's wrappers in past releases is to insert a call to a dummy function named "use", to make it look like the argument is live during the call to syscall.Syscall: func h2() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) use(unsafe.Pointer(p)) } Keeping p alive during the call means that if the garbage collector scans the stack during the system call now, it will find the reference to p. Unfortunately, this approach is not available to users outside syscall, because 'use' is unexported, and people also have to realize they need to use it and do so. There is much existing code using syscall.Syscall without a 'use'-like function. That code will fail very occasionally in mysterious ways (see #13372). This CL fixes all that existing code by making the compiler do the right thing automatically, without any code modifications. That is, it takes h1 above, which is incorrect code today, and makes it correct code. Specifically, if the compiler sees a foreign func definition (one without a body) that has uintptr arguments, it marks those arguments as "unsafe uintptrs". If it later sees the function being called with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x as having escaped, and it makes sure to hold x in a live temporary variable until the call returns, so that the garbage collector cannot reclaim whatever heap memory x points to. For now I am leaving the explicit calls to use in package syscall, but they can be removed early in a future cycle (likely Go 1.7). The rule has no effect on escape analysis, only on liveness analysis. Fixes #13372. Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500 Reviewed-on: https://go-review.googlesource.com/18584 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
}
}
}
// mapAssign appends n to o.out, introducing temporaries
// to make sure that all map assignments have the form m[k] = x.
// (Note: expr has already been called on n, so we know k is addressable.)
//
// If n is the multiple assignment form ..., m[k], ... = ..., x, ..., the rewrite is
// t1 = m
// t2 = k
// ...., t3, ... = ..., x, ...
// t1[t2] = t3
//
// The temporaries t1, t2 are needed in case the ... being assigned
// contain m or k. They are usually unnecessary, but in the unnecessary
// cases they are also typically registerizable, so not much harm done.
// And this only applies to the multiple-assignment form.
// We could do a more precise analysis if needed, like in walk.go.
func (o *Order) mapAssign(n *Node) {
switch n.Op {
default:
Fatalf("ordermapassign %v", n.Op)
case OAS, OASOP:
if n.Left.Op == OINDEXMAP {
// Make sure we evaluate the RHS before starting the map insert.
// We need to make sure the RHS won't panic. See issue 22881.
cmd/compile: avoid mapaccess at m[k]=append(m[k].. Currently rvalue m[k] is transformed during walk into: tmp1 := *mapaccess(m, k) tmp2 := append(tmp1, ...) *mapassign(m, k) = tmp2 However, this is suboptimal, as we could instead produce just: tmp := mapassign(m, k) *tmp := append(*tmp, ...) Optimization is possible only if during Order it may tell that m[k] is exactly the same at left and right part of assignment. It doesn't work: 1) m[f(k)] = append(m[f(k)], ...) 2) sink, m[k] = sink, append(m[k]...) 3) m[k] = append(..., m[k],...) Benchmark: name old time/op new time/op delta MapAppendAssign/Int32/256-8 33.5ns ± 3% 22.4ns ±10% -33.24% (p=0.000 n=16+18) MapAppendAssign/Int32/65536-8 68.2ns ± 6% 48.5ns ±29% -28.90% (p=0.000 n=20+20) MapAppendAssign/Int64/256-8 34.3ns ± 4% 23.3ns ± 5% -32.23% (p=0.000 n=17+18) MapAppendAssign/Int64/65536-8 65.9ns ± 7% 61.2ns ±19% -7.06% (p=0.002 n=18+20) MapAppendAssign/Str/256-8 116ns ±12% 79ns ±16% -31.70% (p=0.000 n=20+19) MapAppendAssign/Str/65536-8 134ns ±15% 111ns ±45% -16.95% (p=0.000 n=19+20) name old alloc/op new alloc/op delta MapAppendAssign/Int32/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=19+18) MapAppendAssign/Int32/65536-8 27.0B ± 0% 20.7B ±30% -23.33% (p=0.000 n=20+20) MapAppendAssign/Int64/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=20+17) MapAppendAssign/Int64/65536-8 27.0B ± 0% 27.0B ± 0% ~ (all equal) MapAppendAssign/Str/256-8 94.0B ± 0% 78.0B ± 0% -17.02% (p=0.000 n=20+16) MapAppendAssign/Str/65536-8 54.0B ± 0% 54.0B ± 0% ~ (all equal) Fixes #24364 Updates #5147 Change-Id: Id257d052b75b9a445b4885dc571bf06ce6f6b409 Reviewed-on: https://go-review.googlesource.com/100838 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-03-14 01:16:43 -07:00
if n.Right.Op == OAPPEND {
s := n.Right.List.Slice()[1:]
for i, n := range s {
s[i] = o.cheapExpr(n)
}
} else {
n.Right = o.cheapExpr(n.Right)
}
}
o.out = append(o.out, n)
case OAS2, OAS2DOTTYPE, OAS2MAPR, OAS2FUNC:
var post []*Node
for i, m := range n.List.Slice() {
switch {
case m.Op == OINDEXMAP:
if !m.Left.IsAutoTmp() {
m.Left = o.copyExpr(m.Left, m.Left.Type, false)
}
if !m.Right.IsAutoTmp() {
m.Right = o.copyExpr(m.Right, m.Right.Type, false)
}
fallthrough
case instrumenting && n.Op == OAS2FUNC && !m.isBlank():
t := o.newTemp(m.Type, false)
n.List.SetIndex(i, t)
a := nod(OAS, m, t)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
a = typecheck(a, Etop)
post = append(post, a)
}
}
o.out = append(o.out, n)
o.out = append(o.out, post...)
}
}
// stmt orders the statement n, appending to o.out.
// Temporaries created during the statement are cleaned
// up using VARKILL instructions as possible.
func (o *Order) stmt(n *Node) {
if n == nil {
return
}
lno := setlineno(n)
o.init(n)
switch n.Op {
default:
Fatalf("orderstmt %v", n.Op)
cmd/compile: recognize Syscall-like functions for liveness analysis Consider this code: func f(*int) func g() { p := new(int) f(p) } where f is an assembly function. In general liveness analysis assumes that during the call to f, p is dead in this frame. If f has retained p, p will be found alive in f's frame and keep the new(int) from being garbage collected. This is all correct and works. We use the Go func declaration for f to give the assembly function liveness information (the arguments are assumed live for the entire call). Now consider this code: func h1() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) } Here syscall.Syscall is taking the place of f, but because its arguments are uintptr, the liveness analysis and the garbage collector ignore them. Since p is no longer live in h once the call starts, if the garbage collector scans the stack while the system call is blocked, it will find no reference to the new(int) and reclaim it. If the kernel is going to write to *p once the call finishes, reclaiming the memory is a mistake. We can't change the arguments or the liveness information for syscall.Syscall itself, both for compatibility and because sometimes the arguments really are integers, and the garbage collector will get quite upset if it finds an integer where it expects a pointer. The problem is that these arguments are fundamentally untyped. The solution we have taken in the syscall package's wrappers in past releases is to insert a call to a dummy function named "use", to make it look like the argument is live during the call to syscall.Syscall: func h2() { p := new(int) syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p))) use(unsafe.Pointer(p)) } Keeping p alive during the call means that if the garbage collector scans the stack during the system call now, it will find the reference to p. Unfortunately, this approach is not available to users outside syscall, because 'use' is unexported, and people also have to realize they need to use it and do so. There is much existing code using syscall.Syscall without a 'use'-like function. That code will fail very occasionally in mysterious ways (see #13372). This CL fixes all that existing code by making the compiler do the right thing automatically, without any code modifications. That is, it takes h1 above, which is incorrect code today, and makes it correct code. Specifically, if the compiler sees a foreign func definition (one without a body) that has uintptr arguments, it marks those arguments as "unsafe uintptrs". If it later sees the function being called with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x as having escaped, and it makes sure to hold x in a live temporary variable until the call returns, so that the garbage collector cannot reclaim whatever heap memory x points to. For now I am leaving the explicit calls to use in package syscall, but they can be removed early in a future cycle (likely Go 1.7). The rule has no effect on escape analysis, only on liveness analysis. Fixes #13372. Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500 Reviewed-on: https://go-review.googlesource.com/18584 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
case OVARKILL, OVARLIVE:
o.out = append(o.out, n)
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x This slows down more things than I expected, but it also speeds things up, and it reduces stack frame sizes and the load on the optimizer, so it's still likely a net win. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795) BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000) BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000) BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000) BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000) BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013) BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039) BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104) BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035) BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131) BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572) BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000) BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000) BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924) BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242) BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000) BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000) BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000) BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000) BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000) BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791) BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110) BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433) BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303) BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001) BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000) Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f Reviewed-on: https://go-review.googlesource.com/9721 Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
case OAS:
t := o.markTemp()
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, n.Left)
o.mapAssign(n)
o.cleanTemp(t)
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x This slows down more things than I expected, but it also speeds things up, and it reduces stack frame sizes and the load on the optimizer, so it's still likely a net win. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795) BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000) BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000) BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000) BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000) BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013) BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039) BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104) BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035) BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131) BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572) BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000) BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000) BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924) BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242) BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000) BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000) BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000) BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000) BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000) BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791) BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110) BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433) BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303) BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001) BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000) Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f Reviewed-on: https://go-review.googlesource.com/9721 Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
case OAS2,
OCLOSE,
OCOPY,
OPRINT,
OPRINTN,
ORECOVER,
ORECV:
t := o.markTemp()
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
o.exprList(n.List)
o.exprList(n.Rlist)
switch n.Op {
case OAS2:
o.mapAssign(n)
default:
o.out = append(o.out, n)
}
o.cleanTemp(t)
case OASOP:
t := o.markTemp()
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
if instrumenting || n.Left.Op == OINDEXMAP && (n.SubOp() == ODIV || n.SubOp() == OMOD) {
// Rewrite m[k] op= r into m[k] = m[k] op r so
// that we can ensure that if op panics
// because r is zero, the panic happens before
// the map assignment.
n.Left = o.safeExpr(n.Left)
l := treecopy(n.Left, src.NoXPos)
if l.Op == OINDEXMAP {
l.SetIndexMapLValue(false)
}
l = o.copyExpr(l, n.Left.Type, false)
n.Right = nod(n.SubOp(), l, n.Right)
n.Right = typecheck(n.Right, Erv)
n.Right = o.expr(n.Right, nil)
n.Op = OAS
n.ResetAux()
}
o.mapAssign(n)
o.cleanTemp(t)
// Special: make sure key is addressable if needed,
// and make sure OINDEXMAP is not copied out.
case OAS2MAPR:
t := o.markTemp()
o.exprList(n.List)
r := n.Rlist.First()
r.Left = o.expr(r.Left, nil)
r.Right = o.expr(r.Right, nil)
// See case OINDEXMAP below.
if r.Right.Op == OARRAYBYTESTR {
r.Right.Op = OARRAYBYTESTRTMP
}
r.Right = o.mapKeyTemp(r.Left.Type, r.Right)
o.okAs2(n)
o.cleanTemp(t)
// Special: avoid copy of func call n.Rlist.First().
case OAS2FUNC:
t := o.markTemp()
o.exprList(n.List)
o.call(n.Rlist.First())
o.as2(n)
o.cleanTemp(t)
// Special: use temporary variables to hold result,
// so that assertI2Tetc can take address of temporary.
// No temporary for blank assignment.
case OAS2DOTTYPE:
t := o.markTemp()
o.exprList(n.List)
n.Rlist.First().Left = o.expr(n.Rlist.First().Left, nil) // i in i.(T)
o.okAs2(n)
o.cleanTemp(t)
// Special: use temporary variables to hold result,
// so that chanrecv can take address of temporary.
case OAS2RECV:
t := o.markTemp()
o.exprList(n.List)
n.Rlist.First().Left = o.expr(n.Rlist.First().Left, nil) // arg to recv
ch := n.Rlist.First().Left.Type
tmp1 := o.newTemp(ch.Elem(), types.Haspointers(ch.Elem()))
tmp2 := o.newTemp(types.Types[TBOOL], false)
o.out = append(o.out, n)
r := nod(OAS, n.List.First(), tmp1)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Etop)
o.mapAssign(r)
r = okas(n.List.Second(), tmp2)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Etop)
o.mapAssign(r)
n.List.Set2(tmp1, tmp2)
o.cleanTemp(t)
// Special: does not save n onto out.
case OBLOCK, OEMPTY:
o.stmtList(n.List)
// Special: n->left is not an expression; save as is.
case OBREAK,
OCONTINUE,
ODCL,
ODCLCONST,
ODCLTYPE,
OFALL,
OGOTO,
OLABEL,
ORETJMP:
o.out = append(o.out, n)
// Special: handle call arguments.
case OCALLFUNC, OCALLINTER, OCALLMETH:
t := o.markTemp()
o.call(n)
o.out = append(o.out, n)
o.cleanTemp(t)
// Special: order arguments to inner call but not call itself.
case ODEFER, OPROC:
t := o.markTemp()
o.call(n.Left)
o.out = append(o.out, n)
o.cleanTemp(t)
case ODELETE:
t := o.markTemp()
n.List.SetFirst(o.expr(n.List.First(), nil))
n.List.SetSecond(o.expr(n.List.Second(), nil))
n.List.SetSecond(o.mapKeyTemp(n.List.First().Type, n.List.Second()))
o.out = append(o.out, n)
o.cleanTemp(t)
// Clean temporaries from condition evaluation at
// beginning of loop body and after for statement.
case OFOR:
t := o.markTemp()
n.Left = o.exprInPlace(n.Left)
n.Nbody.Prepend(o.cleanTempNoPop(t)...)
orderBlock(&n.Nbody)
n.Right = orderStmtInPlace(n.Right)
o.out = append(o.out, n)
o.cleanTemp(t)
// Clean temporaries from condition at
// beginning of both branches.
case OIF:
t := o.markTemp()
n.Left = o.exprInPlace(n.Left)
n.Nbody.Prepend(o.cleanTempNoPop(t)...)
n.Rlist.Prepend(o.cleanTempNoPop(t)...)
o.popTemp(t)
orderBlock(&n.Nbody)
orderBlock(&n.Rlist)
o.out = append(o.out, n)
// Special: argument will be converted to interface using convT2E
// so make sure it is an addressable temporary.
case OPANIC:
t := o.markTemp()
n.Left = o.expr(n.Left, nil)
if !n.Left.Type.IsInterface() {
n.Left = o.addrTemp(n.Left)
}
o.out = append(o.out, n)
o.cleanTemp(t)
case ORANGE:
// n.Right is the expression being ranged over.
// order it, and then make a copy if we need one.
// We almost always do, to ensure that we don't
// see any value changes made during the loop.
// Usually the copy is cheap (e.g., array pointer,
// chan, slice, string are all tiny).
// The exception is ranging over an array value
// (not a slice, not a pointer to array),
// which must make a copy to avoid seeing updates made during
// the range body. Ranging over an array value is uncommon though.
// Mark []byte(str) range expression to reuse string backing storage.
// It is safe because the storage cannot be mutated.
if n.Right.Op == OSTRARRAYBYTE {
n.Right.Op = OSTRARRAYBYTETMP
}
t := o.markTemp()
n.Right = o.expr(n.Right, nil)
orderBody := true
switch n.Type.Etype {
default:
Fatalf("orderstmt range %v", n.Type)
case TARRAY, TSLICE:
if n.List.Len() < 2 || n.List.Second().isBlank() {
// for i := range x will only use x once, to compute len(x).
// No need to copy it.
break
}
fallthrough
case TCHAN, TSTRING:
// chan, string, slice, array ranges use value multiple times.
// make copy.
r := n.Right
if r.Type.IsString() && r.Type != types.Types[TSTRING] {
r = nod(OCONV, r, nil)
r.Type = types.Types[TSTRING]
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
r = typecheck(r, Erv)
}
n.Right = o.copyExpr(r, r.Type, false)
case TMAP:
if isMapClear(n) {
// Preserve the body of the map clear pattern so it can
// be detected during walk. The loop body will not be used
// when optimizing away the range loop to a runtime call.
orderBody = false
break
}
// copy the map value in case it is a map literal.
// TODO(rsc): Make tmp = literal expressions reuse tmp.
// For maps tmp is just one word so it hardly matters.
r := n.Right
n.Right = o.copyExpr(r, r.Type, false)
// prealloc[n] is the temp for the iterator.
// hiter contains pointers and needs to be zeroed.
prealloc[n] = o.newTemp(hiter(n.Type), true)
}
o.exprListInPlace(n.List)
if orderBody {
orderBlock(&n.Nbody)
}
o.out = append(o.out, n)
o.cleanTemp(t)
case ORETURN:
o.callArgs(&n.List)
o.out = append(o.out, n)
// Special: clean case temporaries in each block entry.
// Select must enter one of its blocks, so there is no
// need for a cleaning at the end.
// Doubly special: evaluation order for select is stricter
// than ordinary expressions. Even something like p.c
// has to be hoisted into a temporary, so that it cannot be
// reordered after the channel evaluation for a different
// case (if p were nil, then the timing of the fault would
// give this away).
case OSELECT:
t := o.markTemp()
for _, n2 := range n.List.Slice() {
if n2.Op != OXCASE {
Fatalf("order select case %v", n2.Op)
}
r := n2.Left
setlineno(n2)
// Append any new body prologue to ninit.
// The next loop will insert ninit into nbody.
if n2.Ninit.Len() != 0 {
Fatalf("order select ninit")
}
if r == nil {
continue
}
switch r.Op {
default:
Dump("select case", r)
Fatalf("unknown op in select %v", r.Op)
// If this is case x := <-ch or case x, y := <-ch, the case has
// the ODCL nodes to declare x and y. We want to delay that
// declaration (and possible allocation) until inside the case body.
// Delete the ODCL nodes here and recreate them inside the body below.
case OSELRECV, OSELRECV2:
if r.Colas() {
i := 0
if r.Ninit.Len() != 0 && r.Ninit.First().Op == ODCL && r.Ninit.First().Left == r.Left {
i++
}
if i < r.Ninit.Len() && r.Ninit.Index(i).Op == ODCL && r.List.Len() != 0 && r.Ninit.Index(i).Left == r.List.First() {
i++
}
if i >= r.Ninit.Len() {
r.Ninit.Set(nil)
}
}
if r.Ninit.Len() != 0 {
dumplist("ninit", r.Ninit)
Fatalf("ninit on select recv")
}
// case x = <-c
// case x, ok = <-c
// r->left is x, r->ntest is ok, r->right is ORECV, r->right->left is c.
// r->left == N means 'case <-c'.
// c is always evaluated; x and ok are only evaluated when assigned.
r.Right.Left = o.expr(r.Right.Left, nil)
if r.Right.Left.Op != ONAME {
r.Right.Left = o.copyExpr(r.Right.Left, r.Right.Left.Type, false)
}
// Introduce temporary for receive and move actual copy into case body.
// avoids problems with target being addressed, as usual.
// NOTE: If we wanted to be clever, we could arrange for just one
// temporary per distinct type, sharing the temp among all receives
// with that temp. Similarly one ok bool could be shared among all
// the x,ok receives. Not worth doing until there's a clear need.
if r.Left != nil && r.Left.isBlank() {
r.Left = nil
}
if r.Left != nil {
// use channel element type for temporary to avoid conversions,
// such as in case interfacevalue = <-intchan.
// the conversion happens in the OAS instead.
tmp1 := r.Left
if r.Colas() {
tmp2 := nod(ODCL, tmp1, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
tmp2 = typecheck(tmp2, Etop)
n2.Ninit.Append(tmp2)
}
r.Left = o.newTemp(r.Right.Left.Type.Elem(), types.Haspointers(r.Right.Left.Type.Elem()))
tmp2 := nod(OAS, tmp1, r.Left)
tmp2 = typecheck(tmp2, Etop)
n2.Ninit.Append(tmp2)
}
if r.List.Len() != 0 && r.List.First().isBlank() {
r.List.Set(nil)
}
if r.List.Len() != 0 {
tmp1 := r.List.First()
if r.Colas() {
tmp2 := nod(ODCL, tmp1, nil)
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
tmp2 = typecheck(tmp2, Etop)
n2.Ninit.Append(tmp2)
}
r.List.Set1(o.newTemp(types.Types[TBOOL], false))
tmp2 := okas(tmp1, r.List.First())
tmp2 = typecheck(tmp2, Etop)
n2.Ninit.Append(tmp2)
}
orderBlock(&n2.Ninit)
case OSEND:
if r.Ninit.Len() != 0 {
dumplist("ninit", r.Ninit)
Fatalf("ninit on select send")
}
// case c <- x
// r->left is c, r->right is x, both are always evaluated.
r.Left = o.expr(r.Left, nil)
if !r.Left.IsAutoTmp() {
r.Left = o.copyExpr(r.Left, r.Left.Type, false)
}
r.Right = o.expr(r.Right, nil)
if !r.Right.IsAutoTmp() {
r.Right = o.copyExpr(r.Right, r.Right.Type, false)
}
}
}
// Now that we have accumulated all the temporaries, clean them.
// Also insert any ninit queued during the previous loop.
// (The temporary cleaning must follow that ninit work.)
for _, n3 := range n.List.Slice() {
orderBlock(&n3.Nbody)
n3.Nbody.Prepend(o.cleanTempNoPop(t)...)
// TODO(mdempsky): Is this actually necessary?
// walkselect appears to walk Ninit.
n3.Nbody.Prepend(n3.Ninit.Slice()...)
n3.Ninit.Set(nil)
}
o.out = append(o.out, n)
o.popTemp(t)
// Special: value being sent is passed as a pointer; make it addressable.
case OSEND:
t := o.markTemp()
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
if instrumenting {
// Force copying to the stack so that (chan T)(nil) <- x
// is still instrumented as a read of x.
n.Right = o.copyExpr(n.Right, n.Right.Type, false)
} else {
n.Right = o.addrTemp(n.Right)
}
o.out = append(o.out, n)
o.cleanTemp(t)
// TODO(rsc): Clean temporaries more aggressively.
// Note that because walkswitch will rewrite some of the
// switch into a binary search, this is not as easy as it looks.
// (If we ran that code here we could invoke orderstmt on
// the if-else chain instead.)
// For now just clean all the temporaries at the end.
// In practice that's fine.
case OSWITCH:
t := o.markTemp()
n.Left = o.expr(n.Left, nil)
for _, ncas := range n.List.Slice() {
if ncas.Op != OXCASE {
Fatalf("order switch case %v", ncas.Op)
}
o.exprListInPlace(ncas.List)
orderBlock(&ncas.Nbody)
}
o.out = append(o.out, n)
o.cleanTemp(t)
}
lineno = lno
}
// exprList orders the expression list l into o.
func (o *Order) exprList(l Nodes) {
s := l.Slice()
for i := range s {
s[i] = o.expr(s[i], nil)
}
}
// exprListInPlace orders the expression list l but saves
// the side effects on the individual expression ninit lists.
func (o *Order) exprListInPlace(l Nodes) {
s := l.Slice()
for i := range s {
s[i] = o.exprInPlace(s[i])
}
}
// prealloc[x] records the allocation to use for x.
var prealloc = map[*Node]*Node{}
// expr orders a single expression, appending side
// effects to o.out as needed.
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x This slows down more things than I expected, but it also speeds things up, and it reduces stack frame sizes and the load on the optimizer, so it's still likely a net win. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795) BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000) BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000) BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000) BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000) BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013) BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039) BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104) BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035) BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131) BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572) BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000) BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000) BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924) BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242) BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000) BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000) BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000) BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000) BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000) BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791) BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110) BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433) BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303) BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001) BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000) Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f Reviewed-on: https://go-review.googlesource.com/9721 Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
// If this is part of an assignment lhs = *np, lhs is given.
// Otherwise lhs == nil. (When lhs != nil it may be possible
// to avoid copying the result of the expression to a temporary.)
// The result of expr MUST be assigned back to n, e.g.
// n.Left = o.expr(n.Left, lhs)
func (o *Order) expr(n, lhs *Node) *Node {
if n == nil {
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
lno := setlineno(n)
o.init(n)
switch n.Op {
default:
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
o.exprList(n.List)
o.exprList(n.Rlist)
// Addition of strings turns into a function call.
// Allocate a temporary to hold the strings.
// Fewer than 5 strings use direct runtime helpers.
case OADDSTR:
o.exprList(n.List)
if n.List.Len() > 5 {
t := types.NewArray(types.Types[TSTRING], int64(n.List.Len()))
prealloc[n] = o.newTemp(t, false)
}
// Mark string(byteSlice) arguments to reuse byteSlice backing
// buffer during conversion. String concatenation does not
// memorize the strings for later use, so it is safe.
// However, we can do it only if there is at least one non-empty string literal.
// Otherwise if all other arguments are empty strings,
// concatstrings will return the reference to the temp string
// to the caller.
hasbyte := false
haslit := false
for _, n1 := range n.List.Slice() {
hasbyte = hasbyte || n1.Op == OARRAYBYTESTR
haslit = haslit || n1.Op == OLITERAL && len(n1.Val().U.(string)) != 0
}
if haslit && hasbyte {
for _, n2 := range n.List.Slice() {
if n2.Op == OARRAYBYTESTR {
n2.Op = OARRAYBYTESTRTMP
}
}
}
case OCMPSTR:
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
// Mark string(byteSlice) arguments to reuse byteSlice backing
// buffer during conversion. String comparison does not
// memorize the strings for later use, so it is safe.
if n.Left.Op == OARRAYBYTESTR {
n.Left.Op = OARRAYBYTESTRTMP
}
if n.Right.Op == OARRAYBYTESTR {
n.Right.Op = OARRAYBYTESTRTMP
}
// key must be addressable
case OINDEXMAP:
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
needCopy := false
if !n.IndexMapLValue() && instrumenting {
// Race detector needs the copy so it can
// call treecopy on the result.
needCopy = true
}
// For x = m[string(k)] where k is []byte, the allocation of
// backing bytes for the string can be avoided by reusing
// the []byte backing array. This is a special case that it
// would be nice to handle more generally, but because
// there are no []byte-keyed maps, this specific case comes
// up in important cases in practice. See issue 3512.
// Nothing can change the []byte we are not copying before
// the map index, because the map access is going to
// be forced to happen immediately following this
// conversion (by the ordercopyexpr a few lines below).
if !n.IndexMapLValue() && n.Right.Op == OARRAYBYTESTR {
n.Right.Op = OARRAYBYTESTRTMP
needCopy = true
}
n.Right = o.mapKeyTemp(n.Left.Type, n.Right)
if needCopy {
n = o.copyExpr(n, n.Type, false)
}
// concrete type (not interface) argument must be addressable
// temporary to pass to runtime.
case OCONVIFACE:
n.Left = o.expr(n.Left, nil)
if !n.Left.Type.IsInterface() {
n.Left = o.addrTemp(n.Left)
}
case OCONVNOP:
if n.Type.IsKind(TUNSAFEPTR) && n.Left.Type.IsKind(TUINTPTR) && (n.Left.Op == OCALLFUNC || n.Left.Op == OCALLINTER || n.Left.Op == OCALLMETH) {
// When reordering unsafe.Pointer(f()) into a separate
// statement, the conversion and function call must stay
// together. See golang.org/issue/15329.
o.init(n.Left)
o.call(n.Left)
if lhs == nil || lhs.Op != ONAME || instrumenting {
n = o.copyExpr(n, n.Type, false)
}
} else {
n.Left = o.expr(n.Left, nil)
}
case OANDAND, OOROR:
mark := o.markTemp()
n.Left = o.expr(n.Left, nil)
// Clean temporaries from first branch at beginning of second.
// Leave them on the stack so that they can be killed in the outer
// context in case the short circuit is taken.
n.Right = addinit(n.Right, o.cleanTempNoPop(mark))
n.Right = o.exprInPlace(n.Right)
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
case OCALLFUNC,
OCALLINTER,
OCALLMETH,
OCAP,
OCOMPLEX,
OCOPY,
OIMAG,
OLEN,
OMAKECHAN,
OMAKEMAP,
OMAKESLICE,
ONEW,
OREAL,
ORECOVER,
OSTRARRAYBYTE,
OSTRARRAYBYTETMP,
OSTRARRAYRUNE:
if isRuneCount(n) {
// len([]rune(s)) is rewritten to runtime.countrunes(s) later.
n.Left.Left = o.expr(n.Left.Left, nil)
} else {
o.call(n)
}
if lhs == nil || lhs.Op != ONAME || instrumenting {
n = o.copyExpr(n, n.Type, false)
cmd/internal/gc: avoid turning 'x = f()' into 'tmp = f(); x = tmp' for simple x This slows down more things than I expected, but it also speeds things up, and it reduces stack frame sizes and the load on the optimizer, so it's still likely a net win. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.98,1.03) 13.2s × (0.98,1.02) ~ (p=0.795) BenchmarkFannkuch11 4.41s × (1.00,1.00) 4.45s × (0.99,1.01) +0.88% (p=0.000) BenchmarkFmtFprintfEmpty 86.4ns × (0.99,1.01) 90.1ns × (0.95,1.05) +4.31% (p=0.000) BenchmarkFmtFprintfString 318ns × (0.96,1.07) 337ns × (0.98,1.03) +6.05% (p=0.000) BenchmarkFmtFprintfInt 332ns × (0.97,1.04) 320ns × (0.97,1.02) -3.42% (p=0.000) BenchmarkFmtFprintfIntInt 562ns × (0.96,1.04) 574ns × (0.96,1.06) +2.00% (p=0.013) BenchmarkFmtFprintfPrefixedInt 442ns × (0.96,1.06) 450ns × (0.97,1.05) +1.73% (p=0.039) BenchmarkFmtFprintfFloat 640ns × (0.99,1.02) 659ns × (0.99,1.03) +3.01% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.97,1.06) 2.21µs × (0.98,1.02) ~ (p=0.104) BenchmarkGobDecode 20.0ms × (0.98,1.03) 19.7ms × (0.97,1.04) -1.35% (p=0.035) BenchmarkGobEncode 17.8ms × (0.96,1.04) 18.0ms × (0.96,1.06) ~ (p=0.131) BenchmarkGzip 653ms × (0.99,1.02) 652ms × (0.99,1.01) ~ (p=0.572) BenchmarkGunzip 143ms × (0.99,1.02) 142ms × (1.00,1.01) -0.52% (p=0.005) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 108µs × (0.99,1.02) -1.90% (p=0.000) BenchmarkJSONEncode 40.0ms × (0.98,1.05) 41.5ms × (0.97,1.06) +3.89% (p=0.000) BenchmarkJSONDecode 118ms × (0.99,1.01) 118ms × (0.98,1.01) +0.69% (p=0.010) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.924) BenchmarkGoParse 8.43ms × (0.92,1.11) 8.56ms × (0.93,1.05) ~ (p=0.242) BenchmarkRegexpMatchEasy0_32 180ns × (0.91,1.07) 163ns × (1.00,1.00) -9.33% (p=0.000) BenchmarkRegexpMatchEasy0_1K 550ns × (0.98,1.02) 558ns × (0.99,1.01) +1.44% (p=0.000) BenchmarkRegexpMatchEasy1_32 152ns × (0.94,1.05) 139ns × (0.98,1.02) -8.51% (p=0.000) BenchmarkRegexpMatchEasy1_1K 909ns × (0.98,1.06) 868ns × (0.99,1.02) -4.52% (p=0.000) BenchmarkRegexpMatchMedium_32 262ns × (0.97,1.03) 253ns × (0.99,1.02) -3.31% (p=0.000) BenchmarkRegexpMatchMedium_1K 73.8µs × (0.98,1.04) 72.7µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchHard_32 3.87µs × (0.99,1.02) 3.87µs × (1.00,1.01) ~ (p=0.791) BenchmarkRegexpMatchHard_1K 118µs × (0.98,1.04) 117µs × (0.99,1.02) ~ (p=0.110) BenchmarkRevcomp 1.00s × (0.94,1.10) 0.99s × (0.94,1.09) ~ (p=0.433) BenchmarkTemplate 140ms × (0.97,1.04) 140ms × (0.99,1.01) ~ (p=0.303) BenchmarkTimeParse 622ns × (0.99,1.02) 625ns × (0.99,1.01) +0.51% (p=0.001) BenchmarkTimeFormat 731ns × (0.98,1.04) 719ns × (0.99,1.01) -1.66% (p=0.000) Change-Id: Ibc3edb59a178adafda50156f46a341f69a17d83f Reviewed-on: https://go-review.googlesource.com/9721 Reviewed-by: David Chase <drchase@google.com>
2015-04-30 20:35:47 -04:00
}
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
case OAPPEND:
// Check for append(x, make([]T, y)...) .
if isAppendOfMake(n) {
n.List.SetFirst(o.expr(n.List.First(), nil)) // order x
n.List.Second().Left = o.expr(n.List.Second().Left, nil) // order y
} else {
o.callArgs(&n.List)
}
if lhs == nil || lhs.Op != ONAME && !samesafeexpr(lhs, n.List.First()) {
n = o.copyExpr(n, n.Type, false)
cmd/internal/gc: optimize append + write barrier The code generated for x = append(x, v) is roughly: t := x if len(t)+1 > cap(t) { t = grow(t) } t[len(t)] = v len(t)++ x = t We used to generate this code as Go pseudocode during walk. Generate it instead as actual instructions during gen. Doing so lets us apply a few optimizations. The most important is that when, as in the above example, the source slice and the destination slice are the same, the code can instead do: t := x if len(t)+1 > cap(t) { t = grow(t) x = {base(t), len(t)+1, cap(t)} } else { len(x)++ } t[len(t)] = v That is, in the fast path that does not reallocate the array, only the updated length needs to be written back to x, not the array pointer and not the capacity. This is more like what you'd write by hand in C. It's faster in general, since the fast path elides two of the three stores, but it's especially faster when the form of x is such that the base pointer write would turn into a write barrier. No write, no barrier. name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023) Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090) FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038) FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219) FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002) FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231) GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000) GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000) Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055) Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034) HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468) JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190) JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887) Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167) GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014) RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000) RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000) Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808) Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000) TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062) TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524) See next CL for combination with a similar optimization for slice. The benchmarks that are slower in this CL are still faster overall with the combination of the two. Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e Reviewed-on: https://go-review.googlesource.com/9812 Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
}
case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
n.Left = o.expr(n.Left, nil)
low, high, max := n.SliceBounds()
low = o.expr(low, nil)
low = o.cheapExpr(low)
high = o.expr(high, nil)
high = o.cheapExpr(high)
max = o.expr(max, nil)
max = o.cheapExpr(max)
n.SetSliceBounds(low, high, max)
if lhs == nil || lhs.Op != ONAME && !samesafeexpr(lhs, n.Left) {
n = o.copyExpr(n, n.Type, false)
cmd/internal/gc: optimize slice + write barrier The code generated for a slice x[i:j] or x[i:j:k] computes the entire new slice (base, len, cap) and then uses it as the evaluation of the slice expression. If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are opportunities to avoid computing some of these fields. For x = x[0:i], we know that only the len is changing; base can be ignored completely, and cap can be left unmodified. For x = x[0:i:j], we know that only len and cap are changing; base can be ignored completely. For x = x[i:i], we know that the resulting cap is zero, and we don't adjust the base during a slice producing a zero-cap result, so again base can be ignored completely. No write to base, no write barrier. The old slice code was trying to work at a Go syntax level, mainly because that was how you wrote code just once instead of once per architecture. Now the compiler is factored a bit better and we can implement slice during code generation but still have one copy of the code. So the new code is working at that lower level. (It must, to update only parts of the result.) This CL by itself: name old mean new mean delta BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101) Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000) FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048) FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235) FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119) FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000) FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139) FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001) FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000) GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042) GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152) Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000) HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014) JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000) JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000) Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057) GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105) RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000) RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000) RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002) RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000) RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000) RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000) RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000) Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786) Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000) TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000) TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000) This CL and previous CL (append) combined: name old mean new mean delta BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638) Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474) FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004) FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078) FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000) FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002) FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000) FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000) FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305) GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237) GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000) Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101) Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001) HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003) JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000) JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000) Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090) GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000) RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000) RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000) RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000) RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012) RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000) RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000) Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998) Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000) TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011) TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223) Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc Reviewed-on: https://go-review.googlesource.com/9813 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
}
case OCLOSURE:
if n.Noescape() && n.Func.Closure.Func.Cvars.Len() > 0 {
prealloc[n] = o.newTemp(types.Types[TUINT8], false) // walk will fill in correct type
}
case OARRAYLIT, OSLICELIT, OCALLPART:
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
o.exprList(n.List)
o.exprList(n.Rlist)
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if n.Noescape() {
prealloc[n] = o.newTemp(types.Types[TUINT8], false) // walk will fill in correct type
}
case ODDDARG:
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if n.Noescape() {
// The ddd argument does not live beyond the call it is created for.
// Allocate a temporary that will be cleaned up when this statement
// completes. We could be more aggressive and try to arrange for it
// to be cleaned up when the call completes.
prealloc[n] = o.newTemp(n.Type.Elem(), false)
}
case ODOTTYPE, ODOTTYPE2:
n.Left = o.expr(n.Left, nil)
// TODO(rsc): The isfat is for consistency with componentgen and walkexpr.
// It needs to be removed in all three places.
// That would allow inlining x.(struct{*int}) the same as x.(*int).
if !isdirectiface(n.Type) || isfat(n.Type) || instrumenting {
n = o.copyExpr(n, n.Type, true)
}
case ORECV:
n.Left = o.expr(n.Left, nil)
n = o.copyExpr(n, n.Type, true)
case OEQ, ONE:
n.Left = o.expr(n.Left, nil)
n.Right = o.expr(n.Right, nil)
t := n.Left.Type
if t.IsStruct() || t.IsArray() {
// for complex comparisons, we need both args to be
// addressable so we can pass them to the runtime.
n.Left = o.addrTemp(n.Left)
n.Right = o.addrTemp(n.Right)
}
}
lineno = lno
cmd/compile: reduce use of **Node parameters Escape analysis has a hard time with tree-like structures (see #13493 and #14858). This is unlikely to change. As a result, when invoking a function that accepts a **Node parameter, we usually allocate a *Node on the heap. This happens a whole lot. This CL changes functions from taking a **Node to acting more like append: It both modifies the input and returns a replacement for it. Because of the cascading nature of escape analysis, in order to get the benefits, I had to modify almost all such functions. The remaining functions are in racewalk and the backend. I would be happy to update them as well in a separate CL. This CL was created by manually updating the function signatures and the directly impacted bits of code. The callsites were then automatically updated using a bespoke script: https://gist.github.com/josharian/046b1be7aceae244de39 For ease of reviewing and future understanding, this CL is also broken down into four CLs, mailed separately, which show the manual and the automated changes separately. They are CLs 20990, 20991, 20992, and 20993. Passes toolstash -cmp. name old time/op new time/op delta Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24) Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24) GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24) Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24) MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23) Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25) GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25) Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25) name old allocs/op new allocs/op delta Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25) Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24) GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25) Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25) name old text-bytes new text-bytes delta HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal) CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal) name old data-bytes new data-bytes delta HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal) CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal) CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal) Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475 Reviewed-on: https://go-review.googlesource.com/20959 Reviewed-by: Dave Cheney <dave@cheney.net> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
return n
}
// okas creates and returns an assignment of val to ok,
// including an explicit conversion if necessary.
func okas(ok, val *Node) *Node {
if !ok.isBlank() {
val = conv(val, ok.Type)
}
return nod(OAS, ok, val)
}
// as2 orders OAS2XXXX nodes. It creates temporaries to ensure left-to-right assignment.
// The caller should order the right-hand side of the assignment before calling orderas2.
// It rewrites,
// a, b, a = ...
// as
// tmp1, tmp2, tmp3 = ...
// a, b, a = tmp1, tmp2, tmp3
// This is necessary to ensure left to right assignment order.
func (o *Order) as2(n *Node) {
tmplist := []*Node{}
left := []*Node{}
for _, l := range n.List.Slice() {
if !l.isBlank() {
tmp := o.newTemp(l.Type, types.Haspointers(l.Type))
tmplist = append(tmplist, tmp)
left = append(left, l)
}
}
o.out = append(o.out, n)
as := nod(OAS2, nil, nil)
as.List.Set(left)
as.Rlist.Set(tmplist)
as = typecheck(as, Etop)
o.stmt(as)
ti := 0
for ni, l := range n.List.Slice() {
if !l.isBlank() {
n.List.SetIndex(ni, tmplist[ti])
ti++
}
}
}
// okAs2 orders OAS2 with ok.
// Just like as2, this also adds temporaries to ensure left-to-right assignment.
func (o *Order) okAs2(n *Node) {
var tmp1, tmp2 *Node
if !n.List.First().isBlank() {
typ := n.Rlist.First().Type
tmp1 = o.newTemp(typ, types.Haspointers(typ))
}
if !n.List.Second().isBlank() {
tmp2 = o.newTemp(types.Types[TBOOL], false)
}
o.out = append(o.out, n)
if tmp1 != nil {
r := nod(OAS, n.List.First(), tmp1)
r = typecheck(r, Etop)
o.mapAssign(r)
n.List.SetFirst(tmp1)
}
if tmp2 != nil {
r := okas(n.List.Second(), tmp2)
r = typecheck(r, Etop)
o.mapAssign(r)
n.List.SetSecond(tmp2)
}
}