2015-02-13 14:40:36 -05:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import (
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
"cmd/compile/internal/types"
|
[dev.link] cmd/compile, cmd/link: remove dead methods if type is not used in interface
Currently, a method of a reachable type is live if it matches a
method of a reachable interface. In fact, we only need to retain
the method if the type is actually converted to an interface. If
the type is never converted to an interface, there is no way to
call the method through an interface method call (but the type
descriptor could still be used, e.g. in calling
runtime.newobject).
A type can be used in an interface in two ways:
- directly converted to interface. (Any interface counts, as it
is possible to convert one interface to another.)
- obtained by reflection from a related type (e.g. obtaining an
interface of T from []T).
For the former, we let the compiler emit a marker on the type
descriptor symbol when it is converted to an interface. In the
linker, we only need to check methods of marked types.
For the latter, when the linker visits a marked type, it needs to
visit all its "child" types as marked (i.e. potentially could be
converted to interface).
This reduces binary size:
cmd/compile 18792016 18706096 (-0.5%)
cmd/go 14120572 13398948 (-5.1%)
Change-Id: I4465c7eeabf575f4dc84017214c610fa05ae31fd
Reviewed-on: https://go-review.googlesource.com/c/go/+/237298
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
2020-06-08 18:38:59 -04:00
|
|
|
"cmd/internal/obj"
|
2017-04-18 12:53:25 -07:00
|
|
|
"cmd/internal/objabi"
|
2016-04-06 12:01:40 -07:00
|
|
|
"cmd/internal/sys"
|
2018-03-26 21:18:27 +01:00
|
|
|
"encoding/binary"
|
2015-02-13 14:40:36 -05:00
|
|
|
"fmt"
|
2020-11-23 13:42:43 -08:00
|
|
|
"go/constant"
|
2015-02-13 14:40:36 -05:00
|
|
|
"strings"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// The constant is known to runtime.
|
2017-10-20 11:56:31 +01:00
|
|
|
const tmpstringbufsize = 32
|
2019-07-23 23:27:28 +05:30
|
|
|
const zeroValSize = 1024 // must match value of runtime/map.go:maxZero
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
func walk(fn *Node) {
|
|
|
|
|
Curfn = fn
|
2020-11-16 11:08:38 -05:00
|
|
|
errorsBefore := Errors()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.W != 0 {
|
2018-05-24 13:31:40 -07:00
|
|
|
s := fmt.Sprintf("\nbefore walk %v", Curfn.Func.Nname.Sym)
|
2016-03-04 13:16:48 -08:00
|
|
|
dumplist(s, Curfn.Nbody)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-02 17:34:42 -08:00
|
|
|
lno := lineno
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Final typecheck for any unused variables.
|
2016-02-25 10:35:19 -08:00
|
|
|
for i, ln := range fn.Func.Dcl {
|
2017-04-25 18:14:12 -07:00
|
|
|
if ln.Op == ONAME && (ln.Class() == PAUTO || ln.Class() == PAUTOHEAP) {
|
2018-11-18 08:34:38 -08:00
|
|
|
ln = typecheck(ln, ctxExpr|ctxAssign)
|
2016-02-25 10:35:19 -08:00
|
|
|
fn.Func.Dcl[i] = ln
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-06 06:10:25 +00:00
|
|
|
// Propagate the used flag for typeswitch variables up to the NONAME in its definition.
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
2017-04-27 15:17:57 -07:00
|
|
|
if ln.Op == ONAME && (ln.Class() == PAUTO || ln.Class() == PAUTOHEAP) && ln.Name.Defn != nil && ln.Name.Defn.Op == OTYPESW && ln.Name.Used() {
|
|
|
|
|
ln.Name.Defn.Left.Name.SetUsed(true)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
2017-04-27 15:17:57 -07:00
|
|
|
if ln.Op != ONAME || (ln.Class() != PAUTO && ln.Class() != PAUTOHEAP) || ln.Sym.Name[0] == '&' || ln.Name.Used() {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2016-02-25 10:35:19 -08:00
|
|
|
if defn := ln.Name.Defn; defn != nil && defn.Op == OTYPESW {
|
2017-04-27 15:17:57 -07:00
|
|
|
if defn.Left.Name.Used() {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2019-10-24 21:29:30 -04:00
|
|
|
yyerrorl(defn.Left.Pos, "%v declared but not used", ln.Sym)
|
2017-04-27 15:17:57 -07:00
|
|
|
defn.Left.Name.SetUsed(true) // suppress repeats
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2019-10-24 21:29:30 -04:00
|
|
|
yyerrorl(ln.Pos, "%v declared but not used", ln.Sym)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-02 17:34:42 -08:00
|
|
|
lineno = lno
|
2020-11-16 11:08:38 -05:00
|
|
|
if Errors() > errorsBefore {
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(Curfn.Nbody.Slice())
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.W != 0 {
|
2015-05-27 10:42:55 -04:00
|
|
|
s := fmt.Sprintf("after walk %v", Curfn.Func.Nname.Sym)
|
2016-03-04 13:16:48 -08:00
|
|
|
dumplist(s, Curfn.Nbody)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 10:38:45 -08:00
|
|
|
zeroResults()
|
2015-02-13 14:40:36 -05:00
|
|
|
heapmoves()
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.W != 0 && Curfn.Func.Enter.Len() > 0 {
|
2015-05-27 10:42:55 -04:00
|
|
|
s := fmt.Sprintf("enter %v", Curfn.Func.Nname.Sym)
|
2016-03-04 13:16:48 -08:00
|
|
|
dumplist(s, Curfn.Func.Enter)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
func walkstmtlist(s []*Node) {
|
|
|
|
|
for i := range s {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
s[i] = walkstmt(s[i])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-06 18:42:58 +11:00
|
|
|
func paramoutheap(fn *Node) bool {
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range fn.Func.Dcl {
|
2017-04-25 18:14:12 -07:00
|
|
|
switch ln.Class() {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
case PPARAMOUT:
|
cmd/compile: move some ONAME-specific flags from Node to Name
The IsClosureVar, IsOutputParamHeapAddr, Assigned, Addrtaken,
InlFormal, and InlLocal flags are only interesting for ONAME nodes, so
it's better to set these flags on Name.flags instead of Node.flags.
Two caveats though:
1. Previously, we would set Assigned and Addrtaken on the entire
expression tree involved in an assignment or addressing operation.
However, the rest of the compiler only actually cares about knowing
whether the underlying ONAME (if any) was assigned/addressed.
2. This actually requires bumping Name.flags from bitset8 to bitset16,
whereas it doesn't allow shrinking Node.flags any. However, Name has
some trailing padding bytes, so expanding Name.flags doesn't cost any
memory.
Passes toolstash-check.
Change-Id: I7775d713566a38d5b9723360b1659b79391744c2
Reviewed-on: https://go-review.googlesource.com/c/go/+/200898
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-10-12 16:21:55 -07:00
|
|
|
if ln.isParamStackCopy() || ln.Name.Addrtaken() {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
case PAUTO:
|
2015-02-13 14:40:36 -05:00
|
|
|
// stop early - parameters are over
|
2015-03-06 18:42:58 +11:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-06 18:42:58 +11:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
// The result of walkstmt MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = walkstmt(n.Left)
|
|
|
|
|
func walkstmt(n *Node) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n == nil {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setlineno(n)
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.Ninit.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
|
|
|
|
if n.Op == ONAME {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("%v is not a top level statement", n.Sym)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("%v is not a top level statement", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
Dump("nottop", n)
|
|
|
|
|
|
|
|
|
|
case OAS,
|
|
|
|
|
OASOP,
|
|
|
|
|
OAS2,
|
|
|
|
|
OAS2DOTTYPE,
|
|
|
|
|
OAS2RECV,
|
|
|
|
|
OAS2FUNC,
|
|
|
|
|
OAS2MAPR,
|
|
|
|
|
OCLOSE,
|
|
|
|
|
OCOPY,
|
|
|
|
|
OCALLMETH,
|
|
|
|
|
OCALLINTER,
|
|
|
|
|
OCALL,
|
|
|
|
|
OCALLFUNC,
|
|
|
|
|
ODELETE,
|
|
|
|
|
OSEND,
|
|
|
|
|
OPRINT,
|
|
|
|
|
OPRINTN,
|
|
|
|
|
OPANIC,
|
|
|
|
|
OEMPTY,
|
2015-04-03 12:23:28 -04:00
|
|
|
ORECOVER,
|
|
|
|
|
OGETG:
|
2017-04-25 18:02:43 -07:00
|
|
|
if n.Typecheck() == 0 {
|
2016-08-31 15:22:36 -07:00
|
|
|
Fatalf("missing typecheck: %+v", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
wascopy := n.Op == OCOPY
|
2015-02-23 16:07:24 -05:00
|
|
|
init := n.Ninit
|
2016-03-08 15:10:26 -08:00
|
|
|
n.Ninit.Set(nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkexpr(n, &init)
|
|
|
|
|
n = addinit(n, init.Slice())
|
|
|
|
|
if wascopy && n.Op == OCONVNOP {
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Op = OEMPTY // don't leave plain values as statements.
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-07 21:34:04 +02:00
|
|
|
// special case for a receive where we throw away
|
2015-02-13 14:40:36 -05:00
|
|
|
// the value received.
|
|
|
|
|
case ORECV:
|
2017-04-25 18:02:43 -07:00
|
|
|
if n.Typecheck() == 0 {
|
2016-08-31 15:22:36 -07:00
|
|
|
Fatalf("missing typecheck: %+v", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
init := n.Ninit
|
2016-03-08 15:10:26 -08:00
|
|
|
n.Ninit.Set(nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, &init)
|
2017-03-18 15:55:41 +00:00
|
|
|
n = mkcall1(chanfn("chanrecv1", 2, n.Left.Type), nil, &init, n.Left, nodnil())
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkexpr(n, &init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = addinit(n, init.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OBREAK,
|
|
|
|
|
OCONTINUE,
|
|
|
|
|
OFALL,
|
|
|
|
|
OGOTO,
|
|
|
|
|
OLABEL,
|
|
|
|
|
ODCLCONST,
|
|
|
|
|
ODCLTYPE,
|
|
|
|
|
OCHECKNIL,
|
2018-10-06 08:56:03 -07:00
|
|
|
OVARDEF,
|
cmd/compile: recognize Syscall-like functions for liveness analysis
Consider this code:
func f(*int)
func g() {
p := new(int)
f(p)
}
where f is an assembly function.
In general liveness analysis assumes that during the call to f, p is dead
in this frame. If f has retained p, p will be found alive in f's frame and keep
the new(int) from being garbage collected. This is all correct and works.
We use the Go func declaration for f to give the assembly function
liveness information (the arguments are assumed live for the entire call).
Now consider this code:
func h1() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
}
Here syscall.Syscall is taking the place of f, but because its arguments
are uintptr, the liveness analysis and the garbage collector ignore them.
Since p is no longer live in h once the call starts, if the garbage collector
scans the stack while the system call is blocked, it will find no reference
to the new(int) and reclaim it. If the kernel is going to write to *p once
the call finishes, reclaiming the memory is a mistake.
We can't change the arguments or the liveness information for
syscall.Syscall itself, both for compatibility and because sometimes the
arguments really are integers, and the garbage collector will get quite upset
if it finds an integer where it expects a pointer. The problem is that
these arguments are fundamentally untyped.
The solution we have taken in the syscall package's wrappers in past
releases is to insert a call to a dummy function named "use", to make
it look like the argument is live during the call to syscall.Syscall:
func h2() {
p := new(int)
syscall.Syscall(1, 2, 3, uintptr(unsafe.Pointer(p)))
use(unsafe.Pointer(p))
}
Keeping p alive during the call means that if the garbage collector
scans the stack during the system call now, it will find the reference to p.
Unfortunately, this approach is not available to users outside syscall,
because 'use' is unexported, and people also have to realize they need
to use it and do so. There is much existing code using syscall.Syscall
without a 'use'-like function. That code will fail very occasionally in
mysterious ways (see #13372).
This CL fixes all that existing code by making the compiler do the right
thing automatically, without any code modifications. That is, it takes h1
above, which is incorrect code today, and makes it correct code.
Specifically, if the compiler sees a foreign func definition (one
without a body) that has uintptr arguments, it marks those arguments
as "unsafe uintptrs". If it later sees the function being called
with uintptr(unsafe.Pointer(x)) as an argument, it arranges to mark x
as having escaped, and it makes sure to hold x in a live temporary
variable until the call returns, so that the garbage collector cannot
reclaim whatever heap memory x points to.
For now I am leaving the explicit calls to use in package syscall,
but they can be removed early in a future cycle (likely Go 1.7).
The rule has no effect on escape analysis, only on liveness analysis.
Fixes #13372.
Change-Id: I2addb83f70d08db08c64d394f9d06ff0a063c500
Reviewed-on: https://go-review.googlesource.com/18584
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-01-13 00:46:28 -05:00
|
|
|
OVARKILL,
|
|
|
|
|
OVARLIVE:
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
case ODCL:
|
|
|
|
|
v := n.Left
|
2017-04-25 18:14:12 -07:00
|
|
|
if v.Class() == PAUTOHEAP {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if compiling_runtime {
|
2019-10-14 12:16:52 -07:00
|
|
|
yyerror("%v escapes to heap, not allowed in runtime", v)
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
}
|
|
|
|
|
if prealloc[v] == nil {
|
|
|
|
|
prealloc[v] = callnew(v.Type)
|
|
|
|
|
}
|
2016-10-28 11:42:40 -07:00
|
|
|
nn := nod(OAS, v.Name.Param.Heapaddr, prealloc[v])
|
2017-02-27 19:56:38 +02:00
|
|
|
nn.SetColas(true)
|
2018-11-18 08:34:38 -08:00
|
|
|
nn = typecheck(nn, ctxStmt)
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
return walkstmt(nn)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
case OBLOCK:
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.List.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OCASE:
|
2019-09-17 18:32:04 -07:00
|
|
|
yyerror("case statement out of place")
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ODEFER:
|
2019-10-16 20:41:53 +00:00
|
|
|
Curfn.Func.SetHasDefer(true)
|
cmd/compile, cmd/link, runtime: make defers low-cost through inline code and extra funcdata
Generate inline code at defer time to save the args of defer calls to unique
(autotmp) stack slots, and generate inline code at exit time to check which defer
calls were made and make the associated function/method/interface calls. We
remember that a particular defer statement was reached by storing in the deferBits
variable (always stored on the stack). At exit time, we check the bits of the
deferBits variable to determine which defer function calls to make (in reverse
order). These low-cost defers are only used for functions where no defers
appear in loops. In addition, we don't do these low-cost defers if there are too
many defer statements or too many exits in a function (to limit code increase).
When a function uses open-coded defers, we produce extra
FUNCDATA_OpenCodedDeferInfo information that specifies the number of defers, and
for each defer, the stack slots where the closure and associated args have been
stored. The funcdata also includes the location of the deferBits variable.
Therefore, for panics, we can use this funcdata to determine exactly which defers
are active, and call the appropriate functions/methods/closures with the correct
arguments for each active defer.
In order to unwind the stack correctly after a recover(), we need to add an extra
code segment to functions with open-coded defers that simply calls deferreturn()
and returns. This segment is not reachable by the normal function, but is returned
to by the runtime during recovery. We set the liveness information of this
deferreturn() to be the same as the liveness at the first function call during the
last defer exit code (so all return values and all stack slots needed by the defer
calls will be live).
I needed to increase the stackguard constant from 880 to 896, because of a small
amount of new code in deferreturn().
The -N flag disables open-coded defers. '-d defer' prints out the kind of defer
being used at each defer statement (heap-allocated, stack-allocated, or
open-coded).
Cost of defer statement [ go test -run NONE -bench BenchmarkDefer$ runtime ]
With normal (stack-allocated) defers only: 35.4 ns/op
With open-coded defers: 5.6 ns/op
Cost of function call alone (remove defer keyword): 4.4 ns/op
Text size increase (including funcdata) for go binary without/with open-coded defers: 0.09%
The average size increase (including funcdata) for only the functions that use
open-coded defers is 1.1%.
The cost of a panic followed by a recover got noticeably slower, since panic
processing now requires a scan of the stack for open-coded defer frames. This scan
is required, even if no frames are using open-coded defers:
Cost of panic and recover [ go test -run NONE -bench BenchmarkPanicRecover runtime ]
Without open-coded defers: 62.0 ns/op
With open-coded defers: 255 ns/op
A CGO Go-to-C-to-Go benchmark got noticeably faster because of open-coded defers:
CGO Go-to-C-to-Go benchmark [cd misc/cgo/test; go test -run NONE -bench BenchmarkCGoCallback ]
Without open-coded defers: 443 ns/op
With open-coded defers: 347 ns/op
Updates #14939 (defer performance)
Updates #34481 (design doc)
Change-Id: I63b1a60d1ebf28126f55ee9fd7ecffe9cb23d1ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/202340
Reviewed-by: Austin Clements <austin@google.com>
2019-06-24 12:59:22 -07:00
|
|
|
Curfn.Func.numDefers++
|
|
|
|
|
if Curfn.Func.numDefers > maxOpenDefers {
|
|
|
|
|
// Don't allow open-coded defers if there are more than
|
|
|
|
|
// 8 defers in the function, since we use a single
|
|
|
|
|
// byte to record active defers.
|
|
|
|
|
Curfn.Func.SetOpenCodedDeferDisallowed(true)
|
|
|
|
|
}
|
|
|
|
|
if n.Esc != EscNever {
|
|
|
|
|
// If n.Esc is not EscNever, then this defer occurs in a loop,
|
|
|
|
|
// so open-coded defers cannot be used in this function.
|
|
|
|
|
Curfn.Func.SetOpenCodedDeferDisallowed(true)
|
|
|
|
|
}
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
fallthrough
|
2018-11-18 08:34:38 -08:00
|
|
|
case OGO:
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Left.Op {
|
2015-04-01 09:38:44 -07:00
|
|
|
case OPRINT, OPRINTN:
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
n.Left = wrapCall(n.Left, &n.Ninit)
|
|
|
|
|
|
|
|
|
|
case ODELETE:
|
|
|
|
|
if mapfast(n.Left.List.First().Type) == mapslow {
|
|
|
|
|
n.Left = wrapCall(n.Left, &n.Ninit)
|
|
|
|
|
} else {
|
|
|
|
|
n.Left = walkexpr(n.Left, &n.Ninit)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OCOPY:
|
2015-10-20 10:00:07 -07:00
|
|
|
n.Left = copyany(n.Left, &n.Ninit, true)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-09-12 07:19:22 +07:00
|
|
|
case OCALLFUNC, OCALLMETH, OCALLINTER:
|
|
|
|
|
if n.Left.Nbody.Len() > 0 {
|
2020-09-08 15:28:43 +07:00
|
|
|
n.Left = wrapCall(n.Left, &n.Ninit)
|
|
|
|
|
} else {
|
|
|
|
|
n.Left = walkexpr(n.Left, &n.Ninit)
|
|
|
|
|
}
|
2020-09-12 07:19:22 +07:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
n.Left = walkexpr(n.Left, &n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-02-02 11:53:41 -05:00
|
|
|
case OFOR, OFORUNTIL:
|
2015-05-26 21:30:20 -04:00
|
|
|
if n.Left != nil {
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.Left.Ninit.Slice())
|
2015-05-26 21:30:20 -04:00
|
|
|
init := n.Left.Ninit
|
2016-03-08 15:10:26 -08:00
|
|
|
n.Left.Ninit.Set(nil)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, &init)
|
|
|
|
|
n.Left = addinit(n.Left, init.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right = walkstmt(n.Right)
|
cmd/compile: don't produce a past-the-end pointer in range loops
Currently, range loops over slices and arrays are compiled roughly
like:
for i, x := range s { b }
⇓
for i, _n, _p := 0, len(s), &s[0]; i < _n; i, _p = i+1, _p + unsafe.Sizeof(s[0]) { b }
⇓
i, _n, _p := 0, len(s), &s[0]
goto cond
body:
{ b }
i, _p = i+1, _p + unsafe.Sizeof(s[0])
cond:
if i < _n { goto body } else { goto end }
end:
The problem with this lowering is that _p may temporarily point past
the end of the allocation the moment before the loop terminates. Right
now this isn't a problem because there's never a safe-point during
this brief moment.
We're about to introduce safe-points everywhere, so this bad pointer
is going to be a problem. We could mark the increment as an unsafe
block, but this inhibits reordering opportunities and could result in
infrequent safe-points if the body is short.
Instead, this CL fixes this by changing how we compile range loops to
never produce this past-the-end pointer. It changes the lowering to
roughly:
i, _n, _p := 0, len(s), &s[0]
if i < _n { goto body } else { goto end }
top:
_p += unsafe.Sizeof(s[0])
body:
{ b }
i++
if i < _n { goto top } else { goto end }
end:
Notably, the increment is split into two parts: we increment the index
before checking the condition, but increment the pointer only *after*
the condition check has succeeded.
The implementation builds on the OFORUNTIL construct that was
introduced during the loop preemption experiments, since OFORUNTIL
places the increment and condition after the loop body. To support the
extra "late increment" step, we further define OFORUNTIL's "List"
field to contain the late increment statements. This makes all of this
a relatively small change.
This depends on the improvements to the prove pass in CL 102603. With
the current lowering, bounds-check elimination knows that i < _n in
the body because the body block is dominated by the cond block. In the
new lowering, deriving this fact requires detecting that i < _n on
*both* paths into body and hence is true in body. CL 102603 made prove
able to detect this.
The code size effect of this is minimal. The cmd/go binary on
linux/amd64 increases by 0.17%. Performance-wise, this actually
appears to be a net win, though it's mostly noise:
name old time/op new time/op delta
BinaryTree17-12 2.80s ± 0% 2.61s ± 1% -6.88% (p=0.000 n=20+18)
Fannkuch11-12 2.41s ± 0% 2.42s ± 0% +0.05% (p=0.005 n=20+20)
FmtFprintfEmpty-12 41.6ns ± 5% 41.4ns ± 6% ~ (p=0.765 n=20+19)
FmtFprintfString-12 69.4ns ± 3% 69.3ns ± 1% ~ (p=0.084 n=19+17)
FmtFprintfInt-12 76.1ns ± 1% 77.3ns ± 1% +1.57% (p=0.000 n=19+19)
FmtFprintfIntInt-12 122ns ± 2% 123ns ± 3% +0.95% (p=0.015 n=20+20)
FmtFprintfPrefixedInt-12 153ns ± 2% 151ns ± 3% -1.27% (p=0.013 n=20+20)
FmtFprintfFloat-12 215ns ± 0% 216ns ± 0% +0.47% (p=0.000 n=20+16)
FmtManyArgs-12 486ns ± 1% 498ns ± 0% +2.40% (p=0.000 n=20+17)
GobDecode-12 6.43ms ± 0% 6.50ms ± 0% +1.08% (p=0.000 n=18+19)
GobEncode-12 5.43ms ± 1% 5.47ms ± 0% +0.76% (p=0.000 n=20+20)
Gzip-12 218ms ± 1% 218ms ± 1% ~ (p=0.883 n=20+20)
Gunzip-12 38.8ms ± 0% 38.9ms ± 0% ~ (p=0.644 n=19+19)
HTTPClientServer-12 76.2µs ± 1% 76.4µs ± 2% ~ (p=0.218 n=20+20)
JSONEncode-12 12.2ms ± 0% 12.3ms ± 1% +0.45% (p=0.000 n=19+19)
JSONDecode-12 54.2ms ± 1% 53.3ms ± 0% -1.67% (p=0.000 n=20+20)
Mandelbrot200-12 3.71ms ± 0% 3.71ms ± 0% ~ (p=0.143 n=19+20)
GoParse-12 3.22ms ± 0% 3.19ms ± 1% -0.72% (p=0.000 n=20+20)
RegexpMatchEasy0_32-12 76.7ns ± 1% 75.8ns ± 1% -1.19% (p=0.000 n=20+17)
RegexpMatchEasy0_1K-12 245ns ± 1% 243ns ± 0% -0.72% (p=0.000 n=18+17)
RegexpMatchEasy1_32-12 71.9ns ± 0% 71.7ns ± 1% -0.39% (p=0.006 n=12+18)
RegexpMatchEasy1_1K-12 358ns ± 1% 354ns ± 1% -1.13% (p=0.000 n=20+19)
RegexpMatchMedium_32-12 105ns ± 2% 105ns ± 1% -0.63% (p=0.007 n=19+20)
RegexpMatchMedium_1K-12 31.9µs ± 1% 31.9µs ± 1% ~ (p=1.000 n=17+17)
RegexpMatchHard_32-12 1.51µs ± 1% 1.52µs ± 2% +0.46% (p=0.042 n=18+18)
RegexpMatchHard_1K-12 45.3µs ± 1% 45.5µs ± 2% +0.44% (p=0.029 n=18+19)
Revcomp-12 388ms ± 1% 385ms ± 0% -0.57% (p=0.000 n=19+18)
Template-12 63.0ms ± 1% 63.3ms ± 0% +0.50% (p=0.000 n=19+20)
TimeParse-12 309ns ± 1% 307ns ± 0% -0.62% (p=0.000 n=20+20)
TimeFormat-12 328ns ± 0% 333ns ± 0% +1.35% (p=0.000 n=19+19)
[Geo mean] 47.0µs 46.9µs -0.20%
(https://perf.golang.org/search?q=upload:20180326.1)
For #10958.
For #24543.
Change-Id: Icbd52e711fdbe7938a1fea3e6baca1104b53ac3a
Reviewed-on: https://go-review.googlesource.com/102604
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2018-03-22 12:04:51 -04:00
|
|
|
if n.Op == OFORUNTIL {
|
|
|
|
|
walkstmtlist(n.List.Slice())
|
|
|
|
|
}
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.Nbody.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OIF:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, &n.Ninit)
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.Nbody.Slice())
|
|
|
|
|
walkstmtlist(n.Rlist.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ORETURN:
|
cmd/compile, cmd/link, runtime: make defers low-cost through inline code and extra funcdata
Generate inline code at defer time to save the args of defer calls to unique
(autotmp) stack slots, and generate inline code at exit time to check which defer
calls were made and make the associated function/method/interface calls. We
remember that a particular defer statement was reached by storing in the deferBits
variable (always stored on the stack). At exit time, we check the bits of the
deferBits variable to determine which defer function calls to make (in reverse
order). These low-cost defers are only used for functions where no defers
appear in loops. In addition, we don't do these low-cost defers if there are too
many defer statements or too many exits in a function (to limit code increase).
When a function uses open-coded defers, we produce extra
FUNCDATA_OpenCodedDeferInfo information that specifies the number of defers, and
for each defer, the stack slots where the closure and associated args have been
stored. The funcdata also includes the location of the deferBits variable.
Therefore, for panics, we can use this funcdata to determine exactly which defers
are active, and call the appropriate functions/methods/closures with the correct
arguments for each active defer.
In order to unwind the stack correctly after a recover(), we need to add an extra
code segment to functions with open-coded defers that simply calls deferreturn()
and returns. This segment is not reachable by the normal function, but is returned
to by the runtime during recovery. We set the liveness information of this
deferreturn() to be the same as the liveness at the first function call during the
last defer exit code (so all return values and all stack slots needed by the defer
calls will be live).
I needed to increase the stackguard constant from 880 to 896, because of a small
amount of new code in deferreturn().
The -N flag disables open-coded defers. '-d defer' prints out the kind of defer
being used at each defer statement (heap-allocated, stack-allocated, or
open-coded).
Cost of defer statement [ go test -run NONE -bench BenchmarkDefer$ runtime ]
With normal (stack-allocated) defers only: 35.4 ns/op
With open-coded defers: 5.6 ns/op
Cost of function call alone (remove defer keyword): 4.4 ns/op
Text size increase (including funcdata) for go binary without/with open-coded defers: 0.09%
The average size increase (including funcdata) for only the functions that use
open-coded defers is 1.1%.
The cost of a panic followed by a recover got noticeably slower, since panic
processing now requires a scan of the stack for open-coded defer frames. This scan
is required, even if no frames are using open-coded defers:
Cost of panic and recover [ go test -run NONE -bench BenchmarkPanicRecover runtime ]
Without open-coded defers: 62.0 ns/op
With open-coded defers: 255 ns/op
A CGO Go-to-C-to-Go benchmark got noticeably faster because of open-coded defers:
CGO Go-to-C-to-Go benchmark [cd misc/cgo/test; go test -run NONE -bench BenchmarkCGoCallback ]
Without open-coded defers: 443 ns/op
With open-coded defers: 347 ns/op
Updates #14939 (defer performance)
Updates #34481 (design doc)
Change-Id: I63b1a60d1ebf28126f55ee9fd7ecffe9cb23d1ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/202340
Reviewed-by: Austin Clements <austin@google.com>
2019-06-24 12:59:22 -07:00
|
|
|
Curfn.Func.numReturns++
|
2016-03-08 15:10:26 -08:00
|
|
|
if n.List.Len() == 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-04-01 20:11:30 -07:00
|
|
|
if (Curfn.Type.FuncType().Outnamed && n.List.Len() > 1) || paramoutheap(Curfn) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// assign to the function out parameters,
|
|
|
|
|
// so that reorder3 can fix up conflicts
|
2016-03-07 22:54:46 -08:00
|
|
|
var rl []*Node
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range Curfn.Func.Dcl {
|
2017-09-11 21:23:44 +02:00
|
|
|
cl := ln.Class()
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if cl == PAUTO || cl == PAUTOHEAP {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if cl == PPARAMOUT {
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if ln.isParamStackCopy() {
|
2018-11-18 08:34:38 -08:00
|
|
|
ln = walkexpr(typecheck(nod(ODEREF, ln.Name.Param.Heapaddr, nil), ctxExpr), nil)
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
}
|
2016-03-07 22:54:46 -08:00
|
|
|
rl = append(rl, ln)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
if got, want := n.List.Len(), len(rl); got != want {
|
2015-12-14 12:37:26 -08:00
|
|
|
// order should have rewritten multi-value function calls
|
|
|
|
|
// with explicit OAS2FUNC nodes.
|
|
|
|
|
Fatalf("expected %v return arguments, have %v", want, got)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// move function calls out, to make reorder3's job easier.
|
2016-03-07 22:54:46 -08:00
|
|
|
walkexprlistsafe(n.List.Slice(), &n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-08 10:26:20 -08:00
|
|
|
ll := ascompatee(n.Op, rl, n.List.Slice(), &n.Ninit)
|
2016-03-08 15:10:26 -08:00
|
|
|
n.List.Set(reorder3(ll))
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2018-06-12 14:10:33 -04:00
|
|
|
walkexprlist(n.List.Slice(), &n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
// For each return parameter (lhs), assign the corresponding result (rhs).
|
|
|
|
|
lhs := Curfn.Type.Results()
|
|
|
|
|
rhs := n.List.Slice()
|
|
|
|
|
res := make([]*Node, lhs.NumFields())
|
|
|
|
|
for i, nl := range lhs.FieldSlice() {
|
|
|
|
|
nname := asNode(nl.Nname)
|
|
|
|
|
if nname.isParamHeapCopy() {
|
|
|
|
|
nname = nname.Name.Param.Stackcopy
|
|
|
|
|
}
|
|
|
|
|
a := nod(OAS, nname, rhs[i])
|
|
|
|
|
res[i] = convas(a, &n.Ninit)
|
|
|
|
|
}
|
|
|
|
|
n.List.Set(res)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ORETJMP:
|
|
|
|
|
break
|
|
|
|
|
|
2018-12-04 07:58:18 -08:00
|
|
|
case OINLMARK:
|
|
|
|
|
break
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
case OSELECT:
|
|
|
|
|
walkselect(n)
|
|
|
|
|
|
|
|
|
|
case OSWITCH:
|
|
|
|
|
walkswitch(n)
|
|
|
|
|
|
|
|
|
|
case ORANGE:
|
2017-02-02 11:53:41 -05:00
|
|
|
n = walkrange(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Op == ONAME {
|
2016-08-31 15:22:36 -07:00
|
|
|
Fatalf("walkstmt ended up with name: %+v", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// walk the whole tree of the body of an
|
|
|
|
|
// expression or simple statement.
|
|
|
|
|
// the types expressions are calculated.
|
|
|
|
|
// compile-time constants are evaluated.
|
|
|
|
|
// complex side effects like statements are appended to init
|
2016-03-07 22:54:46 -08:00
|
|
|
func walkexprlist(s []*Node, init *Nodes) {
|
|
|
|
|
for i := range s {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
s[i] = walkexpr(s[i], init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
func walkexprlistsafe(s []*Node, init *Nodes) {
|
|
|
|
|
for i, n := range s {
|
|
|
|
|
s[i] = safeexpr(n, init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
s[i] = walkexpr(s[i], init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
func walkexprlistcheap(s []*Node, init *Nodes) {
|
|
|
|
|
for i, n := range s {
|
|
|
|
|
s[i] = cheapexpr(n, init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
s[i] = walkexpr(s[i], init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-21 17:31:21 +10:00
|
|
|
// convFuncName builds the runtime function name for interface conversion.
|
|
|
|
|
// It also reports whether the function expects the data by address.
|
|
|
|
|
// Not all names are possible. For example, we never generate convE2E or convE2I.
|
|
|
|
|
func convFuncName(from, to *types.Type) (fnname string, needsaddr bool) {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
tkind := to.Tie()
|
|
|
|
|
switch from.Tie() {
|
2016-03-04 22:02:27 +00:00
|
|
|
case 'I':
|
2018-10-29 13:54:24 -07:00
|
|
|
if tkind == 'I' {
|
2018-04-21 17:31:21 +10:00
|
|
|
return "convI2I", false
|
2016-03-04 22:02:27 +00:00
|
|
|
}
|
|
|
|
|
case 'T':
|
2018-10-29 13:54:24 -07:00
|
|
|
switch {
|
|
|
|
|
case from.Size() == 2 && from.Align == 2:
|
|
|
|
|
return "convT16", false
|
2020-08-22 14:07:30 -07:00
|
|
|
case from.Size() == 4 && from.Align == 4 && !from.HasPointers():
|
2018-10-29 13:54:24 -07:00
|
|
|
return "convT32", false
|
2020-08-22 14:07:30 -07:00
|
|
|
case from.Size() == 8 && from.Align == types.Types[TUINT64].Align && !from.HasPointers():
|
2018-10-29 13:54:24 -07:00
|
|
|
return "convT64", false
|
2018-11-06 16:27:58 -08:00
|
|
|
}
|
|
|
|
|
if sc := from.SoleComponent(); sc != nil {
|
|
|
|
|
switch {
|
|
|
|
|
case sc.IsString():
|
|
|
|
|
return "convTstring", false
|
|
|
|
|
case sc.IsSlice():
|
|
|
|
|
return "convTslice", false
|
|
|
|
|
}
|
2018-10-29 13:54:24 -07:00
|
|
|
}
|
|
|
|
|
|
2016-03-04 22:02:27 +00:00
|
|
|
switch tkind {
|
|
|
|
|
case 'E':
|
2020-08-22 14:07:30 -07:00
|
|
|
if !from.HasPointers() {
|
2018-04-21 17:31:21 +10:00
|
|
|
return "convT2Enoptr", true
|
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals
Prior to this CL, all runtime conversions
from a concrete value to an interface went
through one of two runtime calls: convT2E or convT2I.
However, in practice, basic types are very common.
Specializing convT2x for those basic types allows
for a more efficient implementation for those types.
For basic scalars and strings, allocation and copying
can use the same methods as normal code.
For pointer-free types, allocation can occur without
zeroing, and copying can take place without GC calls.
For slices, copying is cheaper and simpler.
This CL adds twelve runtime routines:
convT2E16, convT2I16
convT2E32, convT2I32
convT2E64, convT2I64
convT2Estring, convT2Istring
convT2Eslice, convT2Islice
convT2Enoptr, convT2Inoptr
While compiling make.bash, 93% of all convT2x calls
are now to one of these specialized convT2x call.
Within specialized convT2x routines, it is cheap to check
for a zero value, in a way that it is not in general.
When we detect a zero value there, we return a pointer
to zeroVal, rather than allocating.
name old time/op new time/op delta
ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56)
ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60)
ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57)
ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60)
ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59)
ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57)
ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60)
ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57)
ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58)
ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55)
ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52)
ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59)
Benchmarks on a real program (the compiler):
name old time/op new time/op delta
Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26)
Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26)
GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30)
Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27)
Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28)
GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30)
Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30)
Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30)
XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29)
name old user-ns/op new user-ns/op delta
Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28)
Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27)
GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30)
Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28)
Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29)
GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30)
Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30)
Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30)
XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30)
name old alloc/op new alloc/op delta
Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29)
Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30)
GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30)
Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29)
Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30)
GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28)
Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30)
Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28)
XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30)
name old allocs/op new allocs/op delta
Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30)
Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30)
GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30)
Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29)
Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30)
GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28)
Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30)
Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29)
XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30)
Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f
Reviewed-on: https://go-review.googlesource.com/36476
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
|
|
|
}
|
2018-04-21 17:31:21 +10:00
|
|
|
return "convT2E", true
|
2016-03-04 22:02:27 +00:00
|
|
|
case 'I':
|
2020-08-22 14:07:30 -07:00
|
|
|
if !from.HasPointers() {
|
2018-04-21 17:31:21 +10:00
|
|
|
return "convT2Inoptr", true
|
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals
Prior to this CL, all runtime conversions
from a concrete value to an interface went
through one of two runtime calls: convT2E or convT2I.
However, in practice, basic types are very common.
Specializing convT2x for those basic types allows
for a more efficient implementation for those types.
For basic scalars and strings, allocation and copying
can use the same methods as normal code.
For pointer-free types, allocation can occur without
zeroing, and copying can take place without GC calls.
For slices, copying is cheaper and simpler.
This CL adds twelve runtime routines:
convT2E16, convT2I16
convT2E32, convT2I32
convT2E64, convT2I64
convT2Estring, convT2Istring
convT2Eslice, convT2Islice
convT2Enoptr, convT2Inoptr
While compiling make.bash, 93% of all convT2x calls
are now to one of these specialized convT2x call.
Within specialized convT2x routines, it is cheap to check
for a zero value, in a way that it is not in general.
When we detect a zero value there, we return a pointer
to zeroVal, rather than allocating.
name old time/op new time/op delta
ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56)
ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60)
ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57)
ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60)
ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59)
ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57)
ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60)
ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57)
ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58)
ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55)
ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52)
ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59)
Benchmarks on a real program (the compiler):
name old time/op new time/op delta
Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26)
Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26)
GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30)
Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27)
Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28)
GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30)
Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30)
Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30)
XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29)
name old user-ns/op new user-ns/op delta
Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28)
Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27)
GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30)
Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28)
Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29)
GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30)
Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30)
Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30)
XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30)
name old alloc/op new alloc/op delta
Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29)
Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30)
GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30)
Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29)
Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30)
GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28)
Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30)
Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28)
XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30)
name old allocs/op new allocs/op delta
Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30)
Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30)
GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30)
Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29)
Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30)
GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28)
Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30)
Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29)
XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30)
Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f
Reviewed-on: https://go-review.googlesource.com/36476
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
|
|
|
}
|
2018-04-21 17:31:21 +10:00
|
|
|
return "convT2I", true
|
2016-03-04 22:02:27 +00:00
|
|
|
}
|
|
|
|
|
}
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
Fatalf("unknown conv func %c2%c", from.Tie(), to.Tie())
|
2016-03-04 22:02:27 +00:00
|
|
|
panic("unreachable")
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
// The result of walkexpr MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = walkexpr(n.Left, init)
|
|
|
|
|
func walkexpr(n *Node, init *Nodes) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n == nil {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-04-27 17:18:02 -07:00
|
|
|
// Eagerly checkwidth all expressions for the back end.
|
|
|
|
|
if n.Type != nil && !n.Type.WidthCalculated() {
|
|
|
|
|
switch n.Type.Etype {
|
|
|
|
|
case TBLANK, TNIL, TIDEAL:
|
|
|
|
|
default:
|
|
|
|
|
checkwidth(n.Type)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
if init == &n.Ninit {
|
|
|
|
|
// not okay to use n->ninit when walking n,
|
|
|
|
|
// because we might replace n with some other node
|
|
|
|
|
// and would lose the init list.
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("walkexpr init == &n->ninit")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
if n.Ninit.Len() != 0 {
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.Ninit.Slice())
|
|
|
|
|
init.AppendNodes(&n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
lno := setlineno(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.w > 1 {
|
2018-05-24 13:31:40 -07:00
|
|
|
Dump("before walk expr", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-04-25 18:02:43 -07:00
|
|
|
if n.Typecheck() != 1 {
|
2016-08-31 15:22:36 -07:00
|
|
|
Fatalf("missed typecheck: %+v", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: prevent untyped types from reaching walk
We already require expressions to have already been typechecked before
reaching walk. Moreover, all untyped expressions should have been
converted to their default type by walk.
However, in practice, we've been somewhat sloppy and inconsistent
about ensuring this. In particular, a lot of AST rewrites ended up
leaving untyped bool expressions scattered around. These likely aren't
harmful in practice, but it seems worth cleaning up.
The two most common cases addressed by this CL are:
1) When generating OIF and OFOR nodes, we would often typecheck the
conditional expression, but not apply defaultlit to force it to the
expression's default type.
2) When rewriting string comparisons into more fundamental primitives,
we were simply overwriting r.Type with the desired type, which didn't
propagate the type to nested subexpressions. These are fixed by
utilizing finishcompare, which correctly handles this (and is already
used by other comparison lowering rewrites).
Lastly, walkexpr is extended to assert that it's not called on untyped
expressions.
Fixes #23834.
Change-Id: Icbd29648a293555e4015d3b06a95a24ccbd3f790
Reviewed-on: https://go-review.googlesource.com/98337
Reviewed-by: Robert Griesemer <gri@golang.org>
2018-03-02 15:20:49 -08:00
|
|
|
if n.Type.IsUntyped() {
|
|
|
|
|
Fatalf("expression has untyped type: %+v", n)
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-25 18:14:12 -07:00
|
|
|
if n.Op == ONAME && n.Class() == PAUTOHEAP {
|
2018-11-18 08:34:38 -08:00
|
|
|
nn := nod(ODEREF, n.Name.Param.Heapaddr, nil)
|
|
|
|
|
nn = typecheck(nn, ctxExpr)
|
2016-08-11 09:54:04 -04:00
|
|
|
nn = walkexpr(nn, init)
|
2020-04-16 21:25:15 -07:00
|
|
|
nn.Left.MarkNonNil()
|
2016-08-11 09:54:04 -04:00
|
|
|
return nn
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
}
|
|
|
|
|
|
2015-09-20 23:28:34 +02:00
|
|
|
opswitch:
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
|
|
|
|
Dump("walk", n)
|
2016-09-09 21:08:46 -07:00
|
|
|
Fatalf("walkexpr: switch 1 unknown op %+S", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-04-05 16:43:08 -07:00
|
|
|
case ONONAME, OEMPTY, OGETG, ONEWOBJ:
|
2017-03-24 16:15:10 -07:00
|
|
|
|
2020-11-13 20:38:21 -08:00
|
|
|
case OTYPE, ONAME, OLITERAL, ONIL:
|
2017-03-24 16:15:10 -07:00
|
|
|
// TODO(mdempsky): Just return n; see discussion on CL 38655.
|
2017-03-28 07:12:57 -07:00
|
|
|
// Perhaps refactor to use Node.mayBeShared for these instead.
|
2017-04-13 06:11:36 -07:00
|
|
|
// If these return early, make sure to still call
|
|
|
|
|
// stringsym for constant strings.
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
case ONOT, ONEG, OPLUS, OBITNOT, OREAL, OIMAG, ODOTMETH, ODOTINTER,
|
|
|
|
|
ODEREF, OSPTR, OITAB, OIDATA, OADDR:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: defer lowering OANDNOT until SSA
Currently, "x &^ y" gets rewriten into "x & ^y" during walk. It adds
unnecessary complexity to other parts, which must aware about this.
Instead, we can just implement "&^" in the conversion to SSA, so "&^"
can be handled like other binary operators.
However, this CL does not pass toolstash-check. It seems that implements
"&^" in the conversion to SSA causes registers allocation change.
With the parent:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ "".n(SP), CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ DX, CX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, CX
With this CL:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ "".n(SP), DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ CX, DX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, DX
Change-Id: I80acf8496a91be4804fb7ef3df04c19baae2754c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264660
Trust: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Run-TryBot: Cuong Manh Le <cuong.manhle.vn@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-10-24 10:40:09 +07:00
|
|
|
case OEFACE, OAND, OANDNOT, OSUB, OMUL, OADD, OOR, OXOR, OLSH, ORSH:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2017-03-24 16:22:08 -07:00
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-10 06:58:36 -08:00
|
|
|
case ODOT, ODOTPTR:
|
2015-02-13 14:40:36 -05:00
|
|
|
usefield(n)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-27 14:48:24 -07:00
|
|
|
case ODOTTYPE, ODOTTYPE2:
|
|
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
// Set up interface type addresses for back end.
|
|
|
|
|
n.Right = typename(n.Type)
|
|
|
|
|
if n.Op == ODOTTYPE {
|
|
|
|
|
n.Right.Right = typename(n.Left.Type)
|
|
|
|
|
}
|
2017-03-28 13:52:33 -07:00
|
|
|
if !n.Type.IsInterface() && !n.Left.Type.IsEmptyInterface() {
|
|
|
|
|
n.List.Set1(itabname(n.Type, n.Left.Type))
|
|
|
|
|
}
|
2017-03-27 14:48:24 -07:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OLEN, OCAP:
|
2018-04-24 15:13:08 +02:00
|
|
|
if isRuneCount(n) {
|
|
|
|
|
// Replace len([]rune(string)) with runtime.countrunes(string).
|
|
|
|
|
n = mkcall("countrunes", n.Type, init, conv(n.Left.Left, types.Types[TSTRING]))
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// replace len(*[10]int) with 10.
|
|
|
|
|
// delayed until now to preserve side effects.
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Left.Type
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-30 15:09:25 -07:00
|
|
|
if t.IsPtr() {
|
2016-03-30 10:57:47 -07:00
|
|
|
t = t.Elem()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-30 14:45:47 -07:00
|
|
|
if t.IsArray() {
|
2015-02-13 14:40:36 -05:00
|
|
|
safeexpr(n.Left, init)
|
2018-03-31 16:49:07 -07:00
|
|
|
setintconst(n, t.NumElem())
|
2017-04-25 18:02:43 -07:00
|
|
|
n.SetTypecheck(1)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-02-04 21:20:23 -08:00
|
|
|
case OCOMPLEX:
|
|
|
|
|
// Use results from call expression as arguments for complex.
|
|
|
|
|
if n.Left == nil && n.Right == nil {
|
2016-03-08 15:10:26 -08:00
|
|
|
n.Left = n.List.First()
|
|
|
|
|
n.Right = n.List.Second()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-10-10 16:47:47 -07:00
|
|
|
case OEQ, ONE, OLT, OLE, OGT, OGE:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkcompare(n, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OANDAND, OOROR:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-27 12:34:45 -04:00
|
|
|
// cannot put side effects from n.Right on init,
|
|
|
|
|
// because they cannot run before n.Left is checked.
|
|
|
|
|
// save elsewhere and store on the eventual n.Right.
|
2016-03-07 22:54:46 -08:00
|
|
|
var ll Nodes
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right = walkexpr(n.Right, &ll)
|
|
|
|
|
n.Right = addinit(n.Right, ll.Slice())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OPRINT, OPRINTN:
|
2015-02-13 14:40:36 -05:00
|
|
|
n = walkprint(n, init)
|
|
|
|
|
|
|
|
|
|
case OPANIC:
|
|
|
|
|
n = mkcall("gopanic", nil, init, n.Left)
|
|
|
|
|
|
|
|
|
|
case ORECOVER:
|
2018-03-08 21:00:36 +00:00
|
|
|
n = mkcall("gorecover", n.Type, init, nod(OADDR, nodfp, nil))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-24 16:15:10 -07:00
|
|
|
case OCLOSUREVAR, OCFUNC:
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
case OCALLINTER, OCALLFUNC, OCALLMETH:
|
|
|
|
|
if n.Op == OCALLINTER {
|
|
|
|
|
usemethod(n)
|
2020-09-21 20:44:53 -04:00
|
|
|
markUsedIfaceMethod(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
if n.Op == OCALLFUNC && n.Left.Op == OCLOSURE {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Transform direct call of a closure to call of a normal function.
|
|
|
|
|
// transformclosure already did all preparation work.
|
|
|
|
|
|
2015-07-23 14:17:07 -04:00
|
|
|
// Prepend captured variables to argument list.
|
[dev.regabi] cmd/compile: clean up Node.Func
The original meaning of type Func was "extra fields factored out
of a few cases of type Node having to do with functions",
but those specific cases didn't necessarily have any relation.
A typical declared function is represented by an ODCLFUNC Node
at its declaration and an ONAME node at its uses, and both those
have a .Func field, but they are *different* Funcs.
Similarly, a closure is represented both by an OCLOSURE Node for
the value itself and an ODCLFUNC Node for the underlying function
implementing the closure. Those too have *different* Funcs,
and the Func.Closure field in one points to the other and vice versa.
This has led to no end of confusion over the years.
This CL elevates type Func to be the canonical identifier for
a given Go function.
This looks like a trivial CL but in fact is the result of a lot of
scaffolding and rewriting, discarded once the result was achieved, to
separate out the three different kinds of Func nodes into three
separate fields, limited in use to each specific Node type, to
understand which Func fields are used by which Node types and what the
possible overlaps are. There were a few overlaps, most notably around
closures, which led to more fields being added to type Func to keep
them separate even though there is now a single Func instead of two
different ones for each function.
A future CL can and should change Curfn to be a *Func instead of
a *Node, finally eliminating the confusion about whether Curfn
is an ODCLFUNC node (as it is most of the time) or an ONAME node
(as it is when type-checking an inlined function body).
Although sizeof_test.go makes it look like Func is growing by two
words, there are now half as many Funcs in a running compilation,
so the memory footprint has actually been reduced substantially.
Change-Id: I598bd96c95728093dc769a835d48f2154a406a61
Reviewed-on: https://go-review.googlesource.com/c/go/+/272253
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-11-16 17:00:10 -05:00
|
|
|
n.List.Prepend(n.Left.Func.ClosureEnter.Slice()...)
|
|
|
|
|
n.Left.Func.ClosureEnter.Set(nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Replace OCLOSURE with ONAME/PFUNC.
|
[dev.regabi] cmd/compile: clean up Node.Func
The original meaning of type Func was "extra fields factored out
of a few cases of type Node having to do with functions",
but those specific cases didn't necessarily have any relation.
A typical declared function is represented by an ODCLFUNC Node
at its declaration and an ONAME node at its uses, and both those
have a .Func field, but they are *different* Funcs.
Similarly, a closure is represented both by an OCLOSURE Node for
the value itself and an ODCLFUNC Node for the underlying function
implementing the closure. Those too have *different* Funcs,
and the Func.Closure field in one points to the other and vice versa.
This has led to no end of confusion over the years.
This CL elevates type Func to be the canonical identifier for
a given Go function.
This looks like a trivial CL but in fact is the result of a lot of
scaffolding and rewriting, discarded once the result was achieved, to
separate out the three different kinds of Func nodes into three
separate fields, limited in use to each specific Node type, to
understand which Func fields are used by which Node types and what the
possible overlaps are. There were a few overlaps, most notably around
closures, which led to more fields being added to type Func to keep
them separate even though there is now a single Func instead of two
different ones for each function.
A future CL can and should change Curfn to be a *Func instead of
a *Node, finally eliminating the confusion about whether Curfn
is an ODCLFUNC node (as it is most of the time) or an ONAME node
(as it is when type-checking an inlined function body).
Although sizeof_test.go makes it look like Func is growing by two
words, there are now half as many Funcs in a running compilation,
so the memory footprint has actually been reduced substantially.
Change-Id: I598bd96c95728093dc769a835d48f2154a406a61
Reviewed-on: https://go-review.googlesource.com/c/go/+/272253
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-11-16 17:00:10 -05:00
|
|
|
n.Left = n.Left.Func.Nname
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Update type of OCALLFUNC node.
|
|
|
|
|
// Output arguments had not changed, but their offsets could.
|
2017-05-02 09:16:22 -07:00
|
|
|
if n.Left.Type.NumResults() == 1 {
|
2016-03-13 23:02:38 -07:00
|
|
|
n.Type = n.Left.Type.Results().Field(0).Type
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-03-08 16:31:28 -08:00
|
|
|
n.Type = n.Left.Type.Results()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
walkCall(n, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
case OAS, OASOP:
|
2016-03-07 22:54:46 -08:00
|
|
|
init.AppendNodes(&n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: avoid mapaccess at m[k]=append(m[k]..
Currently rvalue m[k] is transformed during walk into:
tmp1 := *mapaccess(m, k)
tmp2 := append(tmp1, ...)
*mapassign(m, k) = tmp2
However, this is suboptimal, as we could instead produce just:
tmp := mapassign(m, k)
*tmp := append(*tmp, ...)
Optimization is possible only if during Order it may tell that m[k] is
exactly the same at left and right part of assignment. It doesn't work:
1) m[f(k)] = append(m[f(k)], ...)
2) sink, m[k] = sink, append(m[k]...)
3) m[k] = append(..., m[k],...)
Benchmark:
name old time/op new time/op delta
MapAppendAssign/Int32/256-8 33.5ns ± 3% 22.4ns ±10% -33.24% (p=0.000 n=16+18)
MapAppendAssign/Int32/65536-8 68.2ns ± 6% 48.5ns ±29% -28.90% (p=0.000 n=20+20)
MapAppendAssign/Int64/256-8 34.3ns ± 4% 23.3ns ± 5% -32.23% (p=0.000 n=17+18)
MapAppendAssign/Int64/65536-8 65.9ns ± 7% 61.2ns ±19% -7.06% (p=0.002 n=18+20)
MapAppendAssign/Str/256-8 116ns ±12% 79ns ±16% -31.70% (p=0.000 n=20+19)
MapAppendAssign/Str/65536-8 134ns ±15% 111ns ±45% -16.95% (p=0.000 n=19+20)
name old alloc/op new alloc/op delta
MapAppendAssign/Int32/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=19+18)
MapAppendAssign/Int32/65536-8 27.0B ± 0% 20.7B ±30% -23.33% (p=0.000 n=20+20)
MapAppendAssign/Int64/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=20+17)
MapAppendAssign/Int64/65536-8 27.0B ± 0% 27.0B ± 0% ~ (all equal)
MapAppendAssign/Str/256-8 94.0B ± 0% 78.0B ± 0% -17.02% (p=0.000 n=20+16)
MapAppendAssign/Str/65536-8 54.0B ± 0% 54.0B ± 0% ~ (all equal)
Fixes #24364
Updates #5147
Change-Id: Id257d052b75b9a445b4885dc571bf06ce6f6b409
Reviewed-on: https://go-review.googlesource.com/100838
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-03-14 01:16:43 -07:00
|
|
|
// Recognize m[k] = append(m[k], ...) so we can reuse
|
|
|
|
|
// the mapassign call.
|
|
|
|
|
mapAppend := n.Left.Op == OINDEXMAP && n.Right.Op == OAPPEND
|
|
|
|
|
if mapAppend && !samesafeexpr(n.Left, n.Right.List.First()) {
|
|
|
|
|
Fatalf("not same expressions: %v != %v", n.Left, n.Right.List.First())
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Left = safeexpr(n.Left, init)
|
|
|
|
|
|
cmd/compile: avoid mapaccess at m[k]=append(m[k]..
Currently rvalue m[k] is transformed during walk into:
tmp1 := *mapaccess(m, k)
tmp2 := append(tmp1, ...)
*mapassign(m, k) = tmp2
However, this is suboptimal, as we could instead produce just:
tmp := mapassign(m, k)
*tmp := append(*tmp, ...)
Optimization is possible only if during Order it may tell that m[k] is
exactly the same at left and right part of assignment. It doesn't work:
1) m[f(k)] = append(m[f(k)], ...)
2) sink, m[k] = sink, append(m[k]...)
3) m[k] = append(..., m[k],...)
Benchmark:
name old time/op new time/op delta
MapAppendAssign/Int32/256-8 33.5ns ± 3% 22.4ns ±10% -33.24% (p=0.000 n=16+18)
MapAppendAssign/Int32/65536-8 68.2ns ± 6% 48.5ns ±29% -28.90% (p=0.000 n=20+20)
MapAppendAssign/Int64/256-8 34.3ns ± 4% 23.3ns ± 5% -32.23% (p=0.000 n=17+18)
MapAppendAssign/Int64/65536-8 65.9ns ± 7% 61.2ns ±19% -7.06% (p=0.002 n=18+20)
MapAppendAssign/Str/256-8 116ns ±12% 79ns ±16% -31.70% (p=0.000 n=20+19)
MapAppendAssign/Str/65536-8 134ns ±15% 111ns ±45% -16.95% (p=0.000 n=19+20)
name old alloc/op new alloc/op delta
MapAppendAssign/Int32/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=19+18)
MapAppendAssign/Int32/65536-8 27.0B ± 0% 20.7B ±30% -23.33% (p=0.000 n=20+20)
MapAppendAssign/Int64/256-8 47.0B ± 0% 46.0B ± 0% -2.13% (p=0.000 n=20+17)
MapAppendAssign/Int64/65536-8 27.0B ± 0% 27.0B ± 0% ~ (all equal)
MapAppendAssign/Str/256-8 94.0B ± 0% 78.0B ± 0% -17.02% (p=0.000 n=20+16)
MapAppendAssign/Str/65536-8 54.0B ± 0% 54.0B ± 0% ~ (all equal)
Fixes #24364
Updates #5147
Change-Id: Id257d052b75b9a445b4885dc571bf06ce6f6b409
Reviewed-on: https://go-review.googlesource.com/100838
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-03-14 01:16:43 -07:00
|
|
|
if mapAppend {
|
|
|
|
|
n.Right.List.SetFirst(n.Left)
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
if n.Op == OASOP {
|
|
|
|
|
// Rewrite x op= y into x = x op y.
|
|
|
|
|
n.Right = nod(n.SubOp(), n.Left, n.Right)
|
2018-11-18 08:34:38 -08:00
|
|
|
n.Right = typecheck(n.Right, ctxExpr)
|
cmd/compile: avoid extra mapaccess in "m[k] op= r"
Currently, order desugars map assignment operations like
m[k] op= r
into
m[k] = m[k] op r
which in turn is transformed during walk into:
tmp := *mapaccess(m, k)
tmp = tmp op r
*mapassign(m, k) = tmp
However, this is suboptimal, as we could instead produce just:
*mapassign(m, k) op= r
One complication though is if "r == 0", then "m[k] /= r" and "m[k] %=
r" will panic, and they need to do so *before* calling mapassign,
otherwise we may insert a new zero-value element into the map.
It would be spec compliant to just emit the "r != 0" check before
calling mapassign (see #23735), but currently these checks aren't
generated until SSA construction. For now, it's simpler to continue
desugaring /= and %= into two map indexing operations.
Fixes #23661.
Change-Id: I46e3739d9adef10e92b46fdd78b88d5aabe68952
Reviewed-on: https://go-review.googlesource.com/91557
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2018-02-01 21:33:56 -08:00
|
|
|
|
|
|
|
|
n.Op = OAS
|
|
|
|
|
n.ResetAux()
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if oaslit(n, init) {
|
2015-09-20 23:28:34 +02:00
|
|
|
break
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-10-18 10:26:28 -04:00
|
|
|
if n.Right == nil {
|
|
|
|
|
// TODO(austin): Check all "implicit zeroing"
|
2015-09-20 23:28:34 +02:00
|
|
|
break
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-04-08 13:39:10 +01:00
|
|
|
if !instrumenting && isZero(n.Right) {
|
2016-12-19 08:19:50 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
switch n.Right.Op {
|
|
|
|
|
default:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ORECV:
|
2015-03-27 12:34:45 -04:00
|
|
|
// x = <-c; n.Left is x, n.Right.Left is c.
|
2019-11-05 23:56:35 +07:00
|
|
|
// order.stmt made sure x is addressable.
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right.Left = walkexpr(n.Right.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
n1 := nod(OADDR, n.Left, nil)
|
2015-02-23 16:07:24 -05:00
|
|
|
r := n.Right.Left // the channel
|
2017-03-18 15:55:41 +00:00
|
|
|
n = mkcall1(chanfn("chanrecv1", 2, r.Type), nil, init, r, n1)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkexpr(n, init)
|
2015-09-20 23:28:34 +02:00
|
|
|
break opswitch
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
|
|
|
|
|
case OAPPEND:
|
|
|
|
|
// x = append(...)
|
|
|
|
|
r := n.Right
|
2017-02-27 19:56:38 +02:00
|
|
|
if r.Type.Elem().NotInHeap() {
|
2020-08-27 14:05:52 -07:00
|
|
|
yyerror("%v can't be allocated in Go; it is incomplete (or unallocatable)", r.Type.Elem())
|
cmd/compile: add go:notinheap type pragma
This adds a //go:notinheap pragma for declarations of types that must
not be heap allocated. We ensure these rules by disallowing new(T),
make([]T), append([]T), or implicit allocation of T, by disallowing
conversions to notinheap types, and by propagating notinheap to any
struct or array that contains notinheap elements.
The utility of this pragma is that we can eliminate write barriers for
writes to pointers to go:notinheap types, since the write barrier is
guaranteed to be a no-op. This will let us mark several scheduler and
memory allocator structures as go:notinheap, which will let us
disallow write barriers in the scheduler and memory allocator much
more thoroughly and also eliminate some problematic hybrid write
barriers.
This also makes go:nowritebarrierrec and go:yeswritebarrierrec much
more powerful. Currently we use go:nowritebarrier all over the place,
but it's almost never what you actually want: when write barriers are
illegal, they're typically illegal for a whole dynamic scope. Partly
this is because go:nowritebarrier has been around longer, but it's
also because go:nowritebarrierrec couldn't be used in situations that
had no-op write barriers or where some nested scope did allow write
barriers. go:notinheap eliminates many no-op write barriers and
go:yeswritebarrierrec makes it possible to opt back in to write
barriers, so these two changes will let us use go:nowritebarrierrec
far more liberally.
This updates #13386, which is about controlling pointers from non-GC'd
memory to GC'd memory. That would require some additional pragma (or
pragmas), but could build on this pragma.
Change-Id: I6314f8f4181535dd166887c9ec239977b54940bd
Reviewed-on: https://go-review.googlesource.com/30939
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-10-11 22:53:27 -04:00
|
|
|
}
|
2018-04-26 18:30:11 +02:00
|
|
|
switch {
|
|
|
|
|
case isAppendOfMake(r):
|
|
|
|
|
// x = append(y, make([]T, y)...)
|
|
|
|
|
r = extendslice(r, init)
|
2018-11-18 08:34:38 -08:00
|
|
|
case r.IsDDD():
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
r = appendslice(r, init) // also works for append(slice, string).
|
2018-04-26 18:30:11 +02:00
|
|
|
default:
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
r = walkappend(r, init, n)
|
|
|
|
|
}
|
|
|
|
|
n.Right = r
|
|
|
|
|
if r.Op == OAPPEND {
|
|
|
|
|
// Left in place for back end.
|
|
|
|
|
// Do not add a new write barrier.
|
2017-03-27 14:48:24 -07:00
|
|
|
// Set up address of type for back end.
|
|
|
|
|
r.Left = typename(r.Type.Elem())
|
2015-09-20 23:28:34 +02:00
|
|
|
break opswitch
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
}
|
|
|
|
|
// Otherwise, lowered for race detector.
|
|
|
|
|
// Treat as ordinary assignment.
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Left != nil && n.Right != nil {
|
2016-03-26 08:17:43 -07:00
|
|
|
n = convas(n, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OAS2:
|
2016-03-07 22:54:46 -08:00
|
|
|
init.AppendNodes(&n.Ninit)
|
|
|
|
|
walkexprlistsafe(n.List.Slice(), init)
|
|
|
|
|
walkexprlistsafe(n.Rlist.Slice(), init)
|
2016-03-08 10:26:20 -08:00
|
|
|
ll := ascompatee(OAS, n.List.Slice(), n.Rlist.Slice(), init)
|
2015-02-13 14:40:36 -05:00
|
|
|
ll = reorder3(ll)
|
|
|
|
|
n = liststmt(ll)
|
|
|
|
|
|
2016-10-06 15:43:47 -04:00
|
|
|
// a,b,... = fn()
|
2015-02-13 14:40:36 -05:00
|
|
|
case OAS2FUNC:
|
2016-03-07 22:54:46 -08:00
|
|
|
init.AppendNodes(&n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-09-27 15:59:03 -07:00
|
|
|
r := n.Right
|
2016-03-07 22:54:46 -08:00
|
|
|
walkexprlistsafe(n.List.Slice(), init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = walkexpr(r, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-06 15:43:47 -04:00
|
|
|
if isIntrinsicCall(r) {
|
2019-09-27 15:59:03 -07:00
|
|
|
n.Right = r
|
2016-10-06 15:43:47 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-10-27 09:23:48 -07:00
|
|
|
init.Append(r)
|
2016-10-06 15:43:47 -04:00
|
|
|
|
2017-08-09 16:13:09 +09:00
|
|
|
ll := ascompatet(n.List, r.Type)
|
2016-10-27 09:23:48 -07:00
|
|
|
n = liststmt(ll)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-24 21:38:58 +02:00
|
|
|
// x, y = <-c
|
2019-10-12 02:43:29 +07:00
|
|
|
// order.stmt made sure x is addressable or blank.
|
2015-02-13 14:40:36 -05:00
|
|
|
case OAS2RECV:
|
2016-03-07 22:54:46 -08:00
|
|
|
init.AppendNodes(&n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-09-27 15:59:03 -07:00
|
|
|
r := n.Right
|
2016-03-07 22:54:46 -08:00
|
|
|
walkexprlistsafe(n.List.Slice(), init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r.Left = walkexpr(r.Left, init)
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 *Node
|
2018-04-08 13:39:10 +01:00
|
|
|
if n.List.First().isBlank() {
|
2015-02-13 14:40:36 -05:00
|
|
|
n1 = nodnil()
|
|
|
|
|
} else {
|
2016-09-16 11:00:54 +10:00
|
|
|
n1 = nod(OADDR, n.List.First(), nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
fn := chanfn("chanrecv2", 2, r.Left.Type)
|
2016-08-26 10:50:12 -07:00
|
|
|
ok := n.List.Second()
|
2019-10-12 02:43:29 +07:00
|
|
|
call := mkcall1(fn, types.Types[TBOOL], init, r.Left, n1)
|
2016-09-16 11:00:54 +10:00
|
|
|
n = nod(OAS, ok, call)
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxStmt)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-24 16:22:08 -07:00
|
|
|
// a,b = m[i]
|
2015-02-13 14:40:36 -05:00
|
|
|
case OAS2MAPR:
|
2016-03-07 22:54:46 -08:00
|
|
|
init.AppendNodes(&n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-09-27 15:59:03 -07:00
|
|
|
r := n.Right
|
2016-03-07 22:54:46 -08:00
|
|
|
walkexprlistsafe(n.List.Slice(), init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r.Left = walkexpr(r.Left, init)
|
|
|
|
|
r.Right = walkexpr(r.Right, init)
|
2015-02-23 16:07:24 -05:00
|
|
|
t := r.Left.Type
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-03-14 11:11:28 -07:00
|
|
|
fast := mapfast(t)
|
2015-02-23 16:07:24 -05:00
|
|
|
var key *Node
|
2017-03-14 11:11:28 -07:00
|
|
|
if fast != mapslow {
|
2015-02-13 14:40:36 -05:00
|
|
|
// fast versions take key by value
|
|
|
|
|
key = r.Right
|
|
|
|
|
} else {
|
|
|
|
|
// standard version takes key by reference
|
2019-11-05 23:56:35 +07:00
|
|
|
// order.expr made sure key is addressable.
|
2016-09-16 11:00:54 +10:00
|
|
|
key = nod(OADDR, r.Right, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// from:
|
|
|
|
|
// a,b = m[i]
|
|
|
|
|
// to:
|
|
|
|
|
// var,b = mapaccess2*(t, m, i)
|
|
|
|
|
// a = *var
|
2016-03-08 15:10:26 -08:00
|
|
|
a := n.List.First()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-07-23 23:27:28 +05:30
|
|
|
if w := t.Elem().Width; w <= zeroValSize {
|
2017-03-14 11:11:28 -07:00
|
|
|
fn := mapfn(mapaccess2[fast], t)
|
2016-04-19 08:31:04 -07:00
|
|
|
r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key)
|
|
|
|
|
} else {
|
|
|
|
|
fn := mapfn("mapaccess2_fat", t)
|
|
|
|
|
z := zeroaddr(w)
|
|
|
|
|
r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key, z)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// mapaccess2* returns a typed bool, but due to spec changes,
|
|
|
|
|
// the boolean result of i.(T) is now untyped so we make it the
|
|
|
|
|
// same type as the variable on the lhs.
|
2018-04-08 13:39:10 +01:00
|
|
|
if ok := n.List.Second(); !ok.isBlank() && ok.Type.IsBoolean() {
|
2016-08-26 10:50:12 -07:00
|
|
|
r.Type.Field(1).Type = ok.Type
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2019-09-27 15:59:03 -07:00
|
|
|
n.Right = r
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Op = OAS2FUNC
|
|
|
|
|
|
|
|
|
|
// don't generate a = *var if a is _
|
2018-04-08 13:39:10 +01:00
|
|
|
if !a.isBlank() {
|
2018-04-24 13:53:35 -07:00
|
|
|
var_ := temp(types.NewPtr(t.Elem()))
|
2017-04-25 18:02:43 -07:00
|
|
|
var_.SetTypecheck(1)
|
2020-04-16 21:25:15 -07:00
|
|
|
var_.MarkNonNil() // mapaccess always returns a non-nil pointer
|
2017-02-19 15:57:58 +01:00
|
|
|
n.List.SetFirst(var_)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkexpr(n, init)
|
2016-03-07 22:54:46 -08:00
|
|
|
init.Append(n)
|
2018-11-18 08:34:38 -08:00
|
|
|
n = nod(OAS, a, nod(ODEREF, var_, nil))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxStmt)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkexpr(n, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ODELETE:
|
2016-03-07 22:54:46 -08:00
|
|
|
init.AppendNodes(&n.Ninit)
|
2016-03-08 15:10:26 -08:00
|
|
|
map_ := n.List.First()
|
|
|
|
|
key := n.List.Second()
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
map_ = walkexpr(map_, init)
|
|
|
|
|
key = walkexpr(key, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
t := map_.Type
|
2017-03-14 11:11:28 -07:00
|
|
|
fast := mapfast(t)
|
|
|
|
|
if fast == mapslow {
|
2019-11-05 23:56:35 +07:00
|
|
|
// order.stmt made sure key is addressable.
|
2017-03-14 11:11:28 -07:00
|
|
|
key = nod(OADDR, key, nil)
|
|
|
|
|
}
|
|
|
|
|
n = mkcall1(mapfndel(mapdelete[fast], t), nil, init, typename(t), map_, key)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OAS2DOTTYPE:
|
2016-03-07 22:54:46 -08:00
|
|
|
walkexprlistsafe(n.List.Slice(), init)
|
2019-09-27 15:59:03 -07:00
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OCONVIFACE:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-05 15:42:14 -08:00
|
|
|
fromType := n.Left.Type
|
|
|
|
|
toType := n.Type
|
|
|
|
|
|
2020-09-20 23:29:20 -04:00
|
|
|
if !fromType.IsInterface() && !Curfn.Func.Nname.isBlank() { // skip unnamed functions (func _())
|
|
|
|
|
markTypeUsedInInterface(fromType, Curfn.Func.lsym)
|
[dev.link] cmd/compile, cmd/link: remove dead methods if type is not used in interface
Currently, a method of a reachable type is live if it matches a
method of a reachable interface. In fact, we only need to retain
the method if the type is actually converted to an interface. If
the type is never converted to an interface, there is no way to
call the method through an interface method call (but the type
descriptor could still be used, e.g. in calling
runtime.newobject).
A type can be used in an interface in two ways:
- directly converted to interface. (Any interface counts, as it
is possible to convert one interface to another.)
- obtained by reflection from a related type (e.g. obtaining an
interface of T from []T).
For the former, we let the compiler emit a marker on the type
descriptor symbol when it is converted to an interface. In the
linker, we only need to check methods of marked types.
For the latter, when the linker visits a marked type, it needs to
visit all its "child" types as marked (i.e. potentially could be
converted to interface).
This reduces binary size:
cmd/compile 18792016 18706096 (-0.5%)
cmd/go 14120572 13398948 (-5.1%)
Change-Id: I4465c7eeabf575f4dc84017214c610fa05ae31fd
Reviewed-on: https://go-review.googlesource.com/c/go/+/237298
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
2020-06-08 18:38:59 -04:00
|
|
|
}
|
|
|
|
|
|
2018-11-05 15:42:14 -08:00
|
|
|
// typeword generates the type word of the interface value.
|
|
|
|
|
typeword := func() *Node {
|
|
|
|
|
if toType.IsEmptyInterface() {
|
|
|
|
|
return typename(fromType)
|
2016-03-18 16:20:20 -07:00
|
|
|
}
|
2018-11-05 15:42:14 -08:00
|
|
|
return itabname(fromType, toType)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Optimize convT2E or convT2I as a two-word copy when T is pointer-shaped.
|
|
|
|
|
if isdirectiface(fromType) {
|
|
|
|
|
l := nod(OEFACE, typeword(), n.Left)
|
|
|
|
|
l.Type = toType
|
2017-04-25 18:02:43 -07:00
|
|
|
l.SetTypecheck(n.Typecheck())
|
2015-02-13 14:40:36 -05:00
|
|
|
n = l
|
2015-09-20 23:28:34 +02:00
|
|
|
break
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: convert constants to interfaces without allocating
The order pass is responsible for ensuring that
values passed to runtime functions, including
convT2E/convT2I, are addressable.
Prior to this CL, this was always accomplished
by creating a temp, which frequently escaped to
the heap, causing allocations, perhaps most
notably in code like:
fmt.Println(1, 2, 3) // allocates three times
None of the runtime routines modify the contents
of the pointers they receive, so in the case of
constants, instead of creating a temp value,
we can create a static value.
(Marking the static value as read-only provides
protection against accidental attempts by the runtime
to modify the constant data.)
This improves code generation for code like:
panic("abc")
c <- 2 // c is a chan int
which can now simply refer to "abc" and 2,
rather than going by way of a temporary.
It also allows us to optimize convT2E/convT2I,
by recognizing static readonly values
and directly constructing the interface.
This CL adds ~0.5% to binary size, despite
decreasing the size of many functions,
because it also adds many static symbols.
This binary size regression could be recovered in
future (but currently unplanned) work.
There is a lot of content-duplication in these
symbols; this statement generates six new symbols,
three containing an int 1 and three containing
a pointer to the string "a":
fmt.Println(1, 1, 1, "a", "a", "a")
These symbols could be made content-addressable.
Furthermore, these symbols are small, so the
alignment and naming overhead is large.
As with the go.strings section, these symbols
could be hidden and have their alignment reduced.
The changes to test/live.go make it impossible
(at least with current optimization techniques)
to place the values being passed to the runtime
in static symbols, preserving autotmp creation.
Fixes #18704
Benchmarks from fmt and go-kit's logging package:
github.com/go-kit/kit/log
name old time/op new time/op delta
JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10)
JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10)
Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9)
OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10)
TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9)
TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10)
LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9)
LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10)
NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10)
NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10)
ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10)
ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10)
JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10)
Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10)
OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10)
TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10)
TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10)
ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10)
JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10)
Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10)
ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
fmt
name old time/op new time/op delta
SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7)
SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10)
SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10)
SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10)
SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10)
SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9)
SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8)
SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10)
SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10)
SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10)
SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10)
SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9)
SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10)
SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10)
SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10)
SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10)
ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10)
FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10)
FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10)
FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10)
ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8)
ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10)
ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9)
name old alloc/op new alloc/op delta
SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfEmpty-8 0.00B 0.00B ~ (all equal)
SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10)
SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10)
SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10)
SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10)
SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10)
SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal)
SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal)
SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal)
ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10)
FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal)
ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10)
ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal)
name old allocs/op new allocs/op delta
SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfEmpty-8 0.00 0.00 ~ (all equal)
SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal)
SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal)
ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00 0.00 ~ (all equal)
ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal)
ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5
Reviewed-on: https://go-review.googlesource.com/35554
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
|
|
|
|
2020-03-03 21:03:40 +00:00
|
|
|
if staticuint64s == nil {
|
|
|
|
|
staticuint64s = newname(Runtimepkg.Lookup("staticuint64s"))
|
|
|
|
|
staticuint64s.SetClass(PEXTERN)
|
|
|
|
|
// The actual type is [256]uint64, but we use [256*8]uint8 so we can address
|
|
|
|
|
// individual bytes.
|
|
|
|
|
staticuint64s.Type = types.NewArray(types.Types[TUINT8], 256*8)
|
2017-03-30 13:19:18 -07:00
|
|
|
zerobase = newname(Runtimepkg.Lookup("zerobase"))
|
2017-04-25 18:14:12 -07:00
|
|
|
zerobase.SetClass(PEXTERN)
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
zerobase.Type = types.Types[TUINTPTR]
|
2017-01-21 19:52:09 -08:00
|
|
|
}
|
|
|
|
|
|
2017-01-22 10:17:12 -08:00
|
|
|
// Optimize convT2{E,I} for many cases in which T is not pointer-shaped,
|
|
|
|
|
// by using an existing addressable value identical to n.Left
|
|
|
|
|
// or creating one on the stack.
|
cmd/compile: convert constants to interfaces without allocating
The order pass is responsible for ensuring that
values passed to runtime functions, including
convT2E/convT2I, are addressable.
Prior to this CL, this was always accomplished
by creating a temp, which frequently escaped to
the heap, causing allocations, perhaps most
notably in code like:
fmt.Println(1, 2, 3) // allocates three times
None of the runtime routines modify the contents
of the pointers they receive, so in the case of
constants, instead of creating a temp value,
we can create a static value.
(Marking the static value as read-only provides
protection against accidental attempts by the runtime
to modify the constant data.)
This improves code generation for code like:
panic("abc")
c <- 2 // c is a chan int
which can now simply refer to "abc" and 2,
rather than going by way of a temporary.
It also allows us to optimize convT2E/convT2I,
by recognizing static readonly values
and directly constructing the interface.
This CL adds ~0.5% to binary size, despite
decreasing the size of many functions,
because it also adds many static symbols.
This binary size regression could be recovered in
future (but currently unplanned) work.
There is a lot of content-duplication in these
symbols; this statement generates six new symbols,
three containing an int 1 and three containing
a pointer to the string "a":
fmt.Println(1, 1, 1, "a", "a", "a")
These symbols could be made content-addressable.
Furthermore, these symbols are small, so the
alignment and naming overhead is large.
As with the go.strings section, these symbols
could be hidden and have their alignment reduced.
The changes to test/live.go make it impossible
(at least with current optimization techniques)
to place the values being passed to the runtime
in static symbols, preserving autotmp creation.
Fixes #18704
Benchmarks from fmt and go-kit's logging package:
github.com/go-kit/kit/log
name old time/op new time/op delta
JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10)
JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10)
Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9)
OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10)
TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9)
TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10)
LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9)
LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10)
NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10)
NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10)
ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10)
ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10)
JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10)
Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10)
OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10)
TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10)
TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10)
ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10)
JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10)
Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10)
ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
fmt
name old time/op new time/op delta
SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7)
SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10)
SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10)
SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10)
SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10)
SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9)
SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8)
SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10)
SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10)
SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10)
SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10)
SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9)
SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10)
SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10)
SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10)
SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10)
ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10)
FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10)
FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10)
FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10)
ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8)
ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10)
ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9)
name old alloc/op new alloc/op delta
SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfEmpty-8 0.00B 0.00B ~ (all equal)
SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10)
SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10)
SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10)
SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10)
SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10)
SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal)
SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal)
SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal)
ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10)
FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal)
ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10)
ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal)
name old allocs/op new allocs/op delta
SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfEmpty-8 0.00 0.00 ~ (all equal)
SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal)
SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal)
ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00 0.00 ~ (all equal)
ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal)
ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5
Reviewed-on: https://go-review.googlesource.com/35554
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
|
|
|
var value *Node
|
|
|
|
|
switch {
|
2018-11-05 15:42:14 -08:00
|
|
|
case fromType.Size() == 0:
|
2017-01-22 10:17:12 -08:00
|
|
|
// n.Left is zero-sized. Use zerobase.
|
2017-02-22 21:19:29 -08:00
|
|
|
cheapexpr(n.Left, init) // Evaluate n.Left for side-effects. See issue 19246.
|
2017-01-22 10:17:12 -08:00
|
|
|
value = zerobase
|
2018-11-05 15:42:14 -08:00
|
|
|
case fromType.IsBoolean() || (fromType.Size() == 1 && fromType.IsInteger()):
|
2020-03-03 21:03:40 +00:00
|
|
|
// n.Left is a bool/byte. Use staticuint64s[n.Left * 8] on little-endian
|
|
|
|
|
// and staticuint64s[n.Left * 8 + 7] on big-endian.
|
2017-02-24 17:21:54 -05:00
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2020-03-03 21:03:40 +00:00
|
|
|
// byteindex widens n.Left so that the multiplication doesn't overflow.
|
|
|
|
|
index := nod(OLSH, byteindex(n.Left), nodintconst(3))
|
|
|
|
|
if thearch.LinkArch.ByteOrder == binary.BigEndian {
|
|
|
|
|
index = nod(OADD, index, nodintconst(7))
|
|
|
|
|
}
|
|
|
|
|
value = nod(OINDEX, staticuint64s, index)
|
2017-02-27 19:56:38 +02:00
|
|
|
value.SetBounded(true)
|
2017-04-25 18:14:12 -07:00
|
|
|
case n.Left.Class() == PEXTERN && n.Left.Name != nil && n.Left.Name.Readonly():
|
2017-01-22 10:17:12 -08:00
|
|
|
// n.Left is a readonly global; use it directly.
|
|
|
|
|
value = n.Left
|
2018-11-05 15:42:14 -08:00
|
|
|
case !fromType.IsInterface() && n.Esc == EscNone && fromType.Width <= 1024:
|
2017-01-22 10:17:12 -08:00
|
|
|
// n.Left does not escape. Use a stack temporary initialized to n.Left.
|
2018-11-05 15:42:14 -08:00
|
|
|
value = temp(fromType)
|
2018-11-18 08:34:38 -08:00
|
|
|
init.Append(typecheck(nod(OAS, value, n.Left), ctxStmt))
|
cmd/compile: convert constants to interfaces without allocating
The order pass is responsible for ensuring that
values passed to runtime functions, including
convT2E/convT2I, are addressable.
Prior to this CL, this was always accomplished
by creating a temp, which frequently escaped to
the heap, causing allocations, perhaps most
notably in code like:
fmt.Println(1, 2, 3) // allocates three times
None of the runtime routines modify the contents
of the pointers they receive, so in the case of
constants, instead of creating a temp value,
we can create a static value.
(Marking the static value as read-only provides
protection against accidental attempts by the runtime
to modify the constant data.)
This improves code generation for code like:
panic("abc")
c <- 2 // c is a chan int
which can now simply refer to "abc" and 2,
rather than going by way of a temporary.
It also allows us to optimize convT2E/convT2I,
by recognizing static readonly values
and directly constructing the interface.
This CL adds ~0.5% to binary size, despite
decreasing the size of many functions,
because it also adds many static symbols.
This binary size regression could be recovered in
future (but currently unplanned) work.
There is a lot of content-duplication in these
symbols; this statement generates six new symbols,
three containing an int 1 and three containing
a pointer to the string "a":
fmt.Println(1, 1, 1, "a", "a", "a")
These symbols could be made content-addressable.
Furthermore, these symbols are small, so the
alignment and naming overhead is large.
As with the go.strings section, these symbols
could be hidden and have their alignment reduced.
The changes to test/live.go make it impossible
(at least with current optimization techniques)
to place the values being passed to the runtime
in static symbols, preserving autotmp creation.
Fixes #18704
Benchmarks from fmt and go-kit's logging package:
github.com/go-kit/kit/log
name old time/op new time/op delta
JSONLoggerSimple-8 1.91µs ± 2% 2.11µs ±22% ~ (p=1.000 n=9+10)
JSONLoggerContextual-8 2.60µs ± 6% 2.43µs ± 2% -6.29% (p=0.000 n=9+10)
Discard-8 101ns ± 2% 34ns ±14% -66.33% (p=0.000 n=10+9)
OneWith-8 161ns ± 1% 102ns ±16% -36.78% (p=0.000 n=10+10)
TwoWith-8 175ns ± 3% 106ns ± 7% -39.36% (p=0.000 n=10+9)
TenWith-8 293ns ± 3% 227ns ±15% -22.44% (p=0.000 n=9+10)
LogfmtLoggerSimple-8 704ns ± 2% 608ns ± 2% -13.65% (p=0.000 n=10+9)
LogfmtLoggerContextual-8 962ns ± 1% 860ns ±17% -10.57% (p=0.003 n=9+10)
NopLoggerSimple-8 188ns ± 1% 120ns ± 1% -36.39% (p=0.000 n=9+10)
NopLoggerContextual-8 379ns ± 1% 243ns ± 0% -35.77% (p=0.000 n=9+10)
ValueBindingTimestamp-8 577ns ± 1% 499ns ± 1% -13.51% (p=0.000 n=10+10)
ValueBindingCaller-8 898ns ± 2% 844ns ± 2% -6.00% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
JSONLoggerSimple-8 904B ± 0% 872B ± 0% -3.54% (p=0.000 n=10+10)
JSONLoggerContextual-8 1.20kB ± 0% 1.14kB ± 0% -5.33% (p=0.000 n=10+10)
Discard-8 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10)
OneWith-8 96.0B ± 0% 64.0B ± 0% -33.33% (p=0.000 n=10+10)
TwoWith-8 160B ± 0% 128B ± 0% -20.00% (p=0.000 n=10+10)
TenWith-8 672B ± 0% 640B ± 0% -4.76% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
NopLoggerSimple-8 128B ± 0% 96B ± 0% -25.00% (p=0.000 n=10+10)
NopLoggerContextual-8 304B ± 0% 240B ± 0% -21.05% (p=0.000 n=10+10)
ValueBindingTimestamp-8 159B ± 0% 127B ± 0% -20.13% (p=0.000 n=10+10)
ValueBindingCaller-8 112B ± 0% 80B ± 0% -28.57% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
JSONLoggerSimple-8 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10)
JSONLoggerContextual-8 25.0 ± 0% 21.0 ± 0% -16.00% (p=0.000 n=10+10)
Discard-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
OneWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TwoWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
TenWith-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
LogfmtLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
LogfmtLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
NopLoggerSimple-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
NopLoggerContextual-8 7.00 ± 0% 3.00 ± 0% -57.14% (p=0.000 n=10+10)
ValueBindingTimestamp-8 5.00 ± 0% 3.00 ± 0% -40.00% (p=0.000 n=10+10)
ValueBindingCaller-8 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
fmt
name old time/op new time/op delta
SprintfPadding-8 88.9ns ± 3% 79.1ns ± 1% -11.09% (p=0.000 n=10+7)
SprintfEmpty-8 12.6ns ± 3% 12.8ns ± 3% ~ (p=0.136 n=10+10)
SprintfString-8 38.7ns ± 5% 26.9ns ± 6% -30.65% (p=0.000 n=10+10)
SprintfTruncateString-8 56.7ns ± 2% 47.0ns ± 3% -17.05% (p=0.000 n=10+10)
SprintfQuoteString-8 164ns ± 2% 153ns ± 2% -7.01% (p=0.000 n=10+10)
SprintfInt-8 38.9ns ±15% 26.5ns ± 2% -31.93% (p=0.000 n=10+9)
SprintfIntInt-8 60.3ns ± 9% 38.2ns ± 1% -36.67% (p=0.000 n=10+8)
SprintfPrefixedInt-8 58.6ns ±13% 51.2ns ±11% -12.66% (p=0.001 n=10+10)
SprintfFloat-8 71.4ns ± 3% 64.2ns ± 3% -10.08% (p=0.000 n=8+10)
SprintfComplex-8 175ns ± 3% 159ns ± 2% -9.03% (p=0.000 n=10+10)
SprintfBoolean-8 33.5ns ± 4% 25.7ns ± 5% -23.28% (p=0.000 n=10+10)
SprintfHexString-8 65.3ns ± 3% 51.7ns ± 5% -20.86% (p=0.000 n=10+9)
SprintfHexBytes-8 67.2ns ± 5% 67.9ns ± 4% ~ (p=0.383 n=10+10)
SprintfBytes-8 129ns ± 7% 124ns ± 7% ~ (p=0.074 n=9+10)
SprintfStringer-8 127ns ± 4% 126ns ± 8% ~ (p=0.506 n=9+10)
SprintfStructure-8 357ns ± 3% 359ns ± 3% ~ (p=0.469 n=10+10)
ManyArgs-8 203ns ± 6% 126ns ± 3% -37.94% (p=0.000 n=10+10)
FprintInt-8 119ns ±10% 74ns ± 3% -37.54% (p=0.000 n=10+10)
FprintfBytes-8 122ns ± 4% 120ns ± 3% ~ (p=0.124 n=10+10)
FprintIntNoAlloc-8 78.2ns ± 5% 74.1ns ± 3% -5.28% (p=0.000 n=10+10)
ScanInts-8 349µs ± 1% 349µs ± 0% ~ (p=0.606 n=9+8)
ScanRecursiveInt-8 43.8ms ± 7% 40.1ms ± 2% -8.42% (p=0.000 n=10+10)
ScanRecursiveIntReaderWrapper-8 43.5ms ± 4% 40.4ms ± 2% -7.16% (p=0.000 n=10+9)
name old alloc/op new alloc/op delta
SprintfPadding-8 24.0B ± 0% 16.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfEmpty-8 0.00B 0.00B ~ (all equal)
SprintfString-8 21.0B ± 0% 5.0B ± 0% -76.19% (p=0.000 n=10+10)
SprintfTruncateString-8 32.0B ± 0% 16.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfInt-8 16.0B ± 0% 1.0B ± 0% -93.75% (p=0.000 n=10+10)
SprintfIntInt-8 24.0B ± 0% 3.0B ± 0% -87.50% (p=0.000 n=10+10)
SprintfPrefixedInt-8 72.0B ± 0% 64.0B ± 0% -11.11% (p=0.000 n=10+10)
SprintfFloat-8 16.0B ± 0% 8.0B ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 48.0B ± 0% 32.0B ± 0% -33.33% (p=0.000 n=10+10)
SprintfBoolean-8 8.00B ± 0% 4.00B ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 96.0B ± 0% 80.0B ± 0% -16.67% (p=0.000 n=10+10)
SprintfHexBytes-8 112B ± 0% 112B ± 0% ~ (all equal)
SprintfBytes-8 96.0B ± 0% 96.0B ± 0% ~ (all equal)
SprintfStringer-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
SprintfStructure-8 256B ± 0% 256B ± 0% ~ (all equal)
ManyArgs-8 80.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
FprintInt-8 8.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10)
FprintfBytes-8 32.0B ± 0% 32.0B ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00B 0.00B ~ (all equal)
ScanInts-8 15.2kB ± 0% 15.2kB ± 0% ~ (p=0.248 n=9+10)
ScanRecursiveInt-8 21.6kB ± 0% 21.6kB ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 21.7kB ± 0% 21.7kB ± 0% ~ (all equal)
name old allocs/op new allocs/op delta
SprintfPadding-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfEmpty-8 0.00 0.00 ~ (all equal)
SprintfString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfTruncateString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfQuoteString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfIntInt-8 3.00 ± 0% 1.00 ± 0% -66.67% (p=0.000 n=10+10)
SprintfPrefixedInt-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfFloat-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfComplex-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfBoolean-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexString-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10)
SprintfHexBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfBytes-8 2.00 ± 0% 2.00 ± 0% ~ (all equal)
SprintfStringer-8 4.00 ± 0% 4.00 ± 0% ~ (all equal)
SprintfStructure-8 7.00 ± 0% 7.00 ± 0% ~ (all equal)
ManyArgs-8 8.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintInt-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
FprintfBytes-8 1.00 ± 0% 1.00 ± 0% ~ (all equal)
FprintIntNoAlloc-8 0.00 0.00 ~ (all equal)
ScanInts-8 1.60k ± 0% 1.60k ± 0% ~ (all equal)
ScanRecursiveInt-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
ScanRecursiveIntReaderWrapper-8 1.71k ± 0% 1.71k ± 0% ~ (all equal)
Change-Id: I7ba72a25fea4140a0ba40a9f443103ed87cc69b5
Reviewed-on: https://go-review.googlesource.com/35554
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-01-21 13:41:06 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if value != nil {
|
2017-01-22 10:17:12 -08:00
|
|
|
// Value is identical to n.Left.
|
|
|
|
|
// Construct the interface directly: {type/itab, &value}.
|
2018-11-18 08:34:38 -08:00
|
|
|
l := nod(OEFACE, typeword(), typecheck(nod(OADDR, value, nil), ctxExpr))
|
2018-11-05 15:42:14 -08:00
|
|
|
l.Type = toType
|
2017-04-25 18:02:43 -07:00
|
|
|
l.SetTypecheck(n.Typecheck())
|
2016-09-17 15:04:36 -07:00
|
|
|
n = l
|
|
|
|
|
break
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-25 23:34:35 +00:00
|
|
|
// Implement interface to empty interface conversion.
|
|
|
|
|
// tmp = i.itab
|
|
|
|
|
// if tmp != nil {
|
|
|
|
|
// tmp = tmp.type
|
|
|
|
|
// }
|
|
|
|
|
// e = iface{tmp, i.data}
|
2018-11-05 15:42:14 -08:00
|
|
|
if toType.IsEmptyInterface() && fromType.IsInterface() && !fromType.IsEmptyInterface() {
|
2016-10-25 23:34:35 +00:00
|
|
|
// Evaluate the input interface.
|
2018-11-05 15:42:14 -08:00
|
|
|
c := temp(fromType)
|
2016-10-25 23:34:35 +00:00
|
|
|
init.Append(nod(OAS, c, n.Left))
|
|
|
|
|
|
|
|
|
|
// Get the itab out of the interface.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
tmp := temp(types.NewPtr(types.Types[TUINT8]))
|
2018-11-18 08:34:38 -08:00
|
|
|
init.Append(nod(OAS, tmp, typecheck(nod(OITAB, c, nil), ctxExpr)))
|
2016-10-25 23:34:35 +00:00
|
|
|
|
|
|
|
|
// Get the type out of the itab.
|
2018-11-18 08:34:38 -08:00
|
|
|
nif := nod(OIF, typecheck(nod(ONE, tmp, nodnil()), ctxExpr), nil)
|
2016-10-25 23:34:35 +00:00
|
|
|
nif.Nbody.Set1(nod(OAS, tmp, itabType(tmp)))
|
|
|
|
|
init.Append(nif)
|
|
|
|
|
|
|
|
|
|
// Build the result.
|
2020-04-13 23:29:17 -07:00
|
|
|
e := nod(OEFACE, tmp, ifaceData(n.Pos, c, types.NewPtr(types.Types[TUINT8])))
|
2018-11-05 15:42:14 -08:00
|
|
|
e.Type = toType // assign type manually, typecheck doesn't understand OEFACE.
|
2017-04-25 18:02:43 -07:00
|
|
|
e.SetTypecheck(1)
|
2016-10-25 23:34:35 +00:00
|
|
|
n = e
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-05 15:42:14 -08:00
|
|
|
fnname, needsaddr := convFuncName(fromType, toType)
|
2018-10-29 13:54:24 -07:00
|
|
|
|
2018-11-05 15:42:14 -08:00
|
|
|
if !needsaddr && !fromType.IsInterface() {
|
2018-10-29 13:54:24 -07:00
|
|
|
// Use a specialized conversion routine that only returns a data pointer.
|
|
|
|
|
// ptr = convT2X(val)
|
|
|
|
|
// e = iface{typ/tab, ptr}
|
|
|
|
|
fn := syslook(fnname)
|
2018-11-05 15:42:14 -08:00
|
|
|
dowidth(fromType)
|
|
|
|
|
fn = substArgTypes(fn, fromType)
|
2018-10-29 13:54:24 -07:00
|
|
|
dowidth(fn.Type)
|
|
|
|
|
call := nod(OCALL, fn, nil)
|
|
|
|
|
call.List.Set1(n.Left)
|
2018-11-18 08:34:38 -08:00
|
|
|
call = typecheck(call, ctxExpr)
|
2018-10-29 13:54:24 -07:00
|
|
|
call = walkexpr(call, init)
|
|
|
|
|
call = safeexpr(call, init)
|
2018-11-05 15:42:14 -08:00
|
|
|
e := nod(OEFACE, typeword(), call)
|
|
|
|
|
e.Type = toType
|
2018-10-29 13:54:24 -07:00
|
|
|
e.SetTypecheck(1)
|
|
|
|
|
n = e
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-05 15:42:14 -08:00
|
|
|
var tab *Node
|
|
|
|
|
if fromType.IsInterface() {
|
|
|
|
|
// convI2I
|
|
|
|
|
tab = typename(toType)
|
2016-03-18 17:21:33 -07:00
|
|
|
} else {
|
2018-11-05 15:42:14 -08:00
|
|
|
// convT2x
|
|
|
|
|
tab = typeword()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-04-21 17:31:21 +10:00
|
|
|
v := n.Left
|
|
|
|
|
if needsaddr {
|
|
|
|
|
// Types of large or unknown size are passed by reference.
|
|
|
|
|
// Orderexpr arranged for n.Left to be a temporary for all
|
|
|
|
|
// the conversions it could see. Comparison of an interface
|
2015-02-13 14:40:36 -05:00
|
|
|
// with a non-interface, especially in a switch on interface value
|
2019-11-05 23:56:35 +07:00
|
|
|
// with non-interface cases, is not visible to order.stmt, so we
|
2015-02-13 14:40:36 -05:00
|
|
|
// have to fall back on allocating a temp here.
|
2018-04-21 17:31:21 +10:00
|
|
|
if !islvalue(v) {
|
|
|
|
|
v = copyexpr(v, v.Type, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-04-21 17:31:21 +10:00
|
|
|
v = nod(OADDR, v, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-11-05 15:42:14 -08:00
|
|
|
dowidth(fromType)
|
2018-04-21 17:31:21 +10:00
|
|
|
fn := syslook(fnname)
|
2018-11-05 15:42:14 -08:00
|
|
|
fn = substArgTypes(fn, fromType, toType)
|
2015-02-13 14:40:36 -05:00
|
|
|
dowidth(fn.Type)
|
2016-09-16 11:00:54 +10:00
|
|
|
n = nod(OCALL, fn, nil)
|
2018-11-05 15:42:14 -08:00
|
|
|
n.List.Set2(tab, v)
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxExpr)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = walkexpr(n, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OCONV, OCONVNOP:
|
2018-08-20 21:31:49 +01:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2019-10-17 16:31:19 -07:00
|
|
|
if n.Op == OCONVNOP && checkPtr(Curfn, 1) {
|
2020-09-09 12:06:18 +07:00
|
|
|
if n.Type.IsPtr() && n.Left.Type.IsUnsafePtr() { // unsafe.Pointer to *T
|
2019-10-17 14:29:16 -07:00
|
|
|
n = walkCheckPtrAlignment(n, init, nil)
|
2019-02-12 19:40:42 -08:00
|
|
|
break
|
|
|
|
|
}
|
2020-09-09 12:09:26 +07:00
|
|
|
if n.Type.IsUnsafePtr() && n.Left.Type.IsUintptr() { // uintptr to unsafe.Pointer
|
2019-02-12 19:40:42 -08:00
|
|
|
n = walkCheckPtrArithmetic(n, init)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-08-20 21:31:49 +01:00
|
|
|
param, result := rtconvfn(n.Left.Type, n.Type)
|
|
|
|
|
if param == Txxx {
|
2018-04-29 22:17:23 +09:00
|
|
|
break
|
2017-11-10 18:08:48 +01:00
|
|
|
}
|
2018-08-20 21:31:49 +01:00
|
|
|
fn := basicnames[param] + "to" + basicnames[result]
|
|
|
|
|
n = conv(mkcall(fn, types.Types[result], init, conv(n.Left, types.Types[param])), n.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case ODIV, OMOD:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// rewrite complex div into function call.
|
2015-09-24 23:21:18 +02:00
|
|
|
et := n.Left.Type.Etype
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 00:33:29 +10:00
|
|
|
if isComplex[et] && n.Op == ODIV {
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Type
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = mkcall("complex128div", types.Types[TCOMPLEX128], init, conv(n.Left, types.Types[TCOMPLEX128]), conv(n.Right, types.Types[TCOMPLEX128]))
|
2015-02-13 14:40:36 -05:00
|
|
|
n = conv(n, t)
|
2015-09-20 23:28:34 +02:00
|
|
|
break
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Nothing to do for float divisions.
|
2016-09-16 00:33:29 +10:00
|
|
|
if isFloat[et] {
|
2015-09-20 23:28:34 +02:00
|
|
|
break
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-02-13 16:00:09 -08:00
|
|
|
// rewrite 64-bit div and mod on 32-bit architectures.
|
|
|
|
|
// TODO: Remove this code once we can introduce
|
|
|
|
|
// runtime calls late in SSA processing.
|
|
|
|
|
if Widthreg < 8 && (et == TINT64 || et == TUINT64) {
|
|
|
|
|
if n.Right.Op == OLITERAL {
|
2020-10-25 11:52:29 +01:00
|
|
|
// Leave div/mod by constant powers of 2 or small 16-bit constants.
|
2017-02-13 16:00:09 -08:00
|
|
|
// The SSA backend will handle those.
|
|
|
|
|
switch et {
|
|
|
|
|
case TINT64:
|
2020-10-12 15:02:59 +02:00
|
|
|
c := n.Right.Int64Val()
|
2017-02-13 16:00:09 -08:00
|
|
|
if c < 0 {
|
|
|
|
|
c = -c
|
|
|
|
|
}
|
|
|
|
|
if c != 0 && c&(c-1) == 0 {
|
|
|
|
|
break opswitch
|
|
|
|
|
}
|
|
|
|
|
case TUINT64:
|
2020-10-12 15:02:59 +02:00
|
|
|
c := uint64(n.Right.Int64Val())
|
2020-10-25 11:52:29 +01:00
|
|
|
if c < 1<<16 {
|
|
|
|
|
break opswitch
|
|
|
|
|
}
|
2017-02-13 16:00:09 -08:00
|
|
|
if c != 0 && c&(c-1) == 0 {
|
|
|
|
|
break opswitch
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-03-06 12:02:24 -08:00
|
|
|
var fn string
|
2015-02-13 14:40:36 -05:00
|
|
|
if et == TINT64 {
|
2015-03-06 12:02:24 -08:00
|
|
|
fn = "int64"
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2015-03-06 12:02:24 -08:00
|
|
|
fn = "uint64"
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if n.Op == ODIV {
|
2015-03-06 12:02:24 -08:00
|
|
|
fn += "div"
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2015-03-06 12:02:24 -08:00
|
|
|
fn += "mod"
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = mkcall(fn, n.Type, init, conv(n.Left, types.Types[et]), conv(n.Right, types.Types[et]))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OINDEX:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// save the original node for bounds checking elision.
|
|
|
|
|
// If it was a ODIV/OMOD walk might rewrite it.
|
2015-02-23 16:07:24 -05:00
|
|
|
r := n.Right
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// if range of type cannot exceed static array bound,
|
|
|
|
|
// disable bounds check.
|
2017-02-27 19:56:38 +02:00
|
|
|
if n.Bounded() {
|
2015-09-20 23:28:34 +02:00
|
|
|
break
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Left.Type
|
2016-03-30 15:09:25 -07:00
|
|
|
if t != nil && t.IsPtr() {
|
2016-03-30 10:57:47 -07:00
|
|
|
t = t.Elem()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-30 14:45:47 -07:00
|
|
|
if t.IsArray() {
|
2017-02-27 19:56:38 +02:00
|
|
|
n.SetBounded(bounded(r, t.NumElem()))
|
2020-11-23 13:42:43 -08:00
|
|
|
if Debug.m != 0 && n.Bounded() && !Isconst(n.Right, constant.Int) {
|
2015-02-13 14:40:36 -05:00
|
|
|
Warn("index bounds check elided")
|
|
|
|
|
}
|
2017-02-27 19:56:38 +02:00
|
|
|
if smallintconst(n.Right) && !n.Bounded() {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("index out of bounds")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2020-11-23 13:42:43 -08:00
|
|
|
} else if Isconst(n.Left, constant.String) {
|
2020-10-12 15:02:59 +02:00
|
|
|
n.SetBounded(bounded(r, int64(len(n.Left.StringVal()))))
|
2020-11-23 13:42:43 -08:00
|
|
|
if Debug.m != 0 && n.Bounded() && !Isconst(n.Right, constant.Int) {
|
2015-02-13 14:40:36 -05:00
|
|
|
Warn("index bounds check elided")
|
|
|
|
|
}
|
2017-02-27 19:56:38 +02:00
|
|
|
if smallintconst(n.Right) && !n.Bounded() {
|
2015-10-10 15:24:34 -04:00
|
|
|
yyerror("index out of bounds")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(n.Right, constant.Int) {
|
|
|
|
|
if n.Right.Val().U.(*Mpint).CmpInt64(0) < 0 || doesoverflow(n.Right.Val(), types.Types[TINT]) {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("index out of bounds")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OINDEXMAP:
|
2016-10-11 08:36:38 -07:00
|
|
|
// Replace m[k] with *map{access1,assign}(maptype, m, &k)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
n.Right = walkexpr(n.Right, init)
|
2016-10-11 08:36:38 -07:00
|
|
|
map_ := n.Left
|
|
|
|
|
key := n.Right
|
|
|
|
|
t := map_.Type
|
2018-03-08 04:18:18 -08:00
|
|
|
if n.IndexMapLValue() {
|
2016-10-11 08:36:38 -07:00
|
|
|
// This m[k] expression is on the left-hand side of an assignment.
|
2017-03-14 11:11:28 -07:00
|
|
|
fast := mapfast(t)
|
|
|
|
|
if fast == mapslow {
|
2017-03-12 14:47:59 -07:00
|
|
|
// standard version takes key by reference.
|
2019-11-05 23:56:35 +07:00
|
|
|
// order.expr made sure key is addressable.
|
2017-03-12 14:47:59 -07:00
|
|
|
key = nod(OADDR, key, nil)
|
|
|
|
|
}
|
2017-03-14 11:11:28 -07:00
|
|
|
n = mkcall1(mapfn(mapassign[fast], t), nil, init, typename(t), map_, key)
|
2016-10-11 08:36:38 -07:00
|
|
|
} else {
|
|
|
|
|
// m[k] is not the target of an assignment.
|
2017-03-14 11:11:28 -07:00
|
|
|
fast := mapfast(t)
|
|
|
|
|
if fast == mapslow {
|
2016-10-11 08:36:38 -07:00
|
|
|
// standard version takes key by reference.
|
2019-11-05 23:56:35 +07:00
|
|
|
// order.expr made sure key is addressable.
|
2016-10-11 08:36:38 -07:00
|
|
|
key = nod(OADDR, key, nil)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-07-23 23:27:28 +05:30
|
|
|
if w := t.Elem().Width; w <= zeroValSize {
|
2018-04-24 13:53:35 -07:00
|
|
|
n = mkcall1(mapfn(mapaccess1[fast], t), types.NewPtr(t.Elem()), init, typename(t), map_, key)
|
2016-10-11 08:36:38 -07:00
|
|
|
} else {
|
|
|
|
|
z := zeroaddr(w)
|
2018-04-24 13:53:35 -07:00
|
|
|
n = mkcall1(mapfn("mapaccess1_fat", t), types.NewPtr(t.Elem()), init, typename(t), map_, key, z)
|
2016-10-11 08:36:38 -07:00
|
|
|
}
|
2016-04-19 08:31:04 -07:00
|
|
|
}
|
2018-04-24 13:53:35 -07:00
|
|
|
n.Type = types.NewPtr(t.Elem())
|
2020-04-16 21:25:15 -07:00
|
|
|
n.MarkNonNil() // mapaccess1* and mapassign always return non-nil pointers.
|
2018-11-18 08:34:38 -08:00
|
|
|
n = nod(ODEREF, n, nil)
|
2018-04-24 13:53:35 -07:00
|
|
|
n.Type = t.Elem()
|
2017-04-25 18:02:43 -07:00
|
|
|
n.SetTypecheck(1)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ORECV:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("walkexpr ORECV") // should see inside OAS only
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-10-14 22:28:58 +02:00
|
|
|
case OSLICEHEADER:
|
|
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
n.List.SetFirst(walkexpr(n.List.First(), init))
|
|
|
|
|
n.List.SetSecond(walkexpr(n.List.Second(), init))
|
|
|
|
|
|
2016-04-21 11:55:33 -07:00
|
|
|
case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
|
2020-09-09 12:06:18 +07:00
|
|
|
checkSlice := checkPtr(Curfn, 1) && n.Op == OSLICE3ARR && n.Left.Op == OCONVNOP && n.Left.Left.Type.IsUnsafePtr()
|
2019-10-17 14:29:16 -07:00
|
|
|
if checkSlice {
|
|
|
|
|
n.Left.Left = walkexpr(n.Left.Left, init)
|
|
|
|
|
} else {
|
|
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
}
|
2016-04-21 11:55:33 -07:00
|
|
|
low, high, max := n.SliceBounds()
|
|
|
|
|
low = walkexpr(low, init)
|
2018-04-08 13:39:10 +01:00
|
|
|
if low != nil && isZero(low) {
|
2016-04-21 11:55:33 -07:00
|
|
|
// Reduce x[0:j] to x[:j] and x[0:j:k] to x[:j:k].
|
|
|
|
|
low = nil
|
|
|
|
|
}
|
|
|
|
|
high = walkexpr(high, init)
|
|
|
|
|
max = walkexpr(max, init)
|
|
|
|
|
n.SetSliceBounds(low, high, max)
|
2019-10-17 14:29:16 -07:00
|
|
|
if checkSlice {
|
|
|
|
|
n.Left = walkCheckPtrAlignment(n.Left, init, max)
|
|
|
|
|
}
|
2016-04-21 11:55:33 -07:00
|
|
|
if n.Op.IsSlice3() {
|
|
|
|
|
if max != nil && max.Op == OCAP && samesafeexpr(n.Left, max.Left) {
|
|
|
|
|
// Reduce x[i:j:cap(x)] to x[i:j].
|
|
|
|
|
if n.Op == OSLICE3 {
|
|
|
|
|
n.Op = OSLICE
|
|
|
|
|
} else {
|
|
|
|
|
n.Op = OSLICEARR
|
|
|
|
|
}
|
|
|
|
|
n = reduceSlice(n)
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
2016-04-21 11:55:33 -07:00
|
|
|
} else {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
n = reduceSlice(n)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ONEW:
|
2020-08-21 20:20:12 -07:00
|
|
|
if n.Type.Elem().NotInHeap() {
|
2020-08-27 14:05:52 -07:00
|
|
|
yyerror("%v can't be allocated in Go; it is incomplete (or unallocatable)", n.Type.Elem())
|
2020-08-21 20:20:12 -07:00
|
|
|
}
|
2015-05-20 15:16:34 -04:00
|
|
|
if n.Esc == EscNone {
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
if n.Type.Elem().Width >= maxImplicitStackVarSize {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("large ONEW with EscNone: %v", n)
|
2015-05-20 15:16:34 -04:00
|
|
|
}
|
2016-03-30 10:57:47 -07:00
|
|
|
r := temp(n.Type.Elem())
|
2016-09-16 11:00:54 +10:00
|
|
|
r = nod(OAS, r, nil) // zero temp
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxStmt)
|
2016-03-07 22:54:46 -08:00
|
|
|
init.Append(r)
|
2016-09-16 11:00:54 +10:00
|
|
|
r = nod(OADDR, r.Left, nil)
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxExpr)
|
2015-02-13 14:40:36 -05:00
|
|
|
n = r
|
|
|
|
|
} else {
|
2016-03-30 10:57:47 -07:00
|
|
|
n = callnew(n.Type.Elem())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OADDSTR:
|
|
|
|
|
n = addstr(n, init)
|
|
|
|
|
|
|
|
|
|
case OAPPEND:
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
// order should make sure we only see OAS(node, OAPPEND), which we handle above.
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("append outside assignment")
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OCOPY:
|
2016-09-21 09:44:40 -07:00
|
|
|
n = copyany(n, init, instrumenting && !compiling_runtime)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// cannot use chanfn - closechan takes any, not chan any
|
|
|
|
|
case OCLOSE:
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook("closechan")
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
fn = substArgTypes(fn, n.Left.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
n = mkcall1(fn, nil, init, n.Left)
|
|
|
|
|
|
|
|
|
|
case OMAKECHAN:
|
2017-08-13 20:03:02 +02:00
|
|
|
// When size fits into int, use makechan instead of
|
|
|
|
|
// makechan64, which is faster and shorter on 32 bit platforms.
|
|
|
|
|
size := n.Left
|
|
|
|
|
fnname := "makechan64"
|
|
|
|
|
argtype := types.Types[TINT64]
|
|
|
|
|
|
|
|
|
|
// Type checking guarantees that TIDEAL size is positive and fits in an int.
|
|
|
|
|
// The case of size overflow when converting TUINT or TUINTPTR to TINT
|
|
|
|
|
// will be handled by the negative range checks in makechan during runtime.
|
2020-11-23 13:42:43 -08:00
|
|
|
if size.Type.IsKind(TIDEAL) || size.Type.Size() <= types.Types[TUINT].Size() {
|
2017-08-13 20:03:02 +02:00
|
|
|
fnname = "makechan"
|
|
|
|
|
argtype = types.Types[TINT]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n = mkcall1(chanfn(fnname, 1, n.Type), n.Type, init, typename(n.Type), conv(size, argtype))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OMAKEMAP:
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Type
|
2017-08-16 23:36:58 +02:00
|
|
|
hmapType := hmap(t)
|
2017-08-31 01:00:39 +02:00
|
|
|
hint := n.Left
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-08-16 23:36:58 +02:00
|
|
|
// var h *hmap
|
|
|
|
|
var h *Node
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Esc == EscNone {
|
2017-08-31 01:00:39 +02:00
|
|
|
// Allocate hmap on stack.
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-08-16 23:36:58 +02:00
|
|
|
// var hv hmap
|
|
|
|
|
hv := temp(hmapType)
|
|
|
|
|
zero := nod(OAS, hv, nil)
|
2018-11-18 08:34:38 -08:00
|
|
|
zero = typecheck(zero, ctxStmt)
|
2017-08-16 23:36:58 +02:00
|
|
|
init.Append(zero)
|
|
|
|
|
// h = &hv
|
|
|
|
|
h = nod(OADDR, hv, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-08-31 01:00:39 +02:00
|
|
|
// Allocate one bucket pointed to by hmap.buckets on stack if hint
|
|
|
|
|
// is not larger than BUCKETSIZE. In case hint is larger than
|
|
|
|
|
// BUCKETSIZE runtime.makemap will allocate the buckets on the heap.
|
2019-04-22 13:37:08 -07:00
|
|
|
// Maximum key and elem size is 128 bytes, larger objects
|
2015-02-13 14:40:36 -05:00
|
|
|
// are stored with an indirection. So max bucket size is 2048+eps.
|
2020-11-23 13:42:43 -08:00
|
|
|
if !Isconst(hint, constant.Int) ||
|
2018-09-18 01:13:04 +03:00
|
|
|
hint.Val().U.(*Mpint).CmpInt64(BUCKETSIZE) <= 0 {
|
2020-04-08 03:08:21 +07:00
|
|
|
|
|
|
|
|
// In case hint is larger than BUCKETSIZE runtime.makemap
|
|
|
|
|
// will allocate the buckets on the heap, see #20184
|
|
|
|
|
//
|
|
|
|
|
// if hint <= BUCKETSIZE {
|
|
|
|
|
// var bv bmap
|
|
|
|
|
// b = &bv
|
|
|
|
|
// h.buckets = b
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
nif := nod(OIF, nod(OLE, hint, nodintconst(BUCKETSIZE)), nil)
|
|
|
|
|
nif.SetLikely(true)
|
|
|
|
|
|
2017-08-31 01:00:39 +02:00
|
|
|
// var bv bmap
|
|
|
|
|
bv := temp(bmap(t))
|
|
|
|
|
zero = nod(OAS, bv, nil)
|
2020-04-08 03:08:21 +07:00
|
|
|
nif.Nbody.Append(zero)
|
2017-08-31 01:00:39 +02:00
|
|
|
|
|
|
|
|
// b = &bv
|
|
|
|
|
b := nod(OADDR, bv, nil)
|
|
|
|
|
|
|
|
|
|
// h.buckets = b
|
|
|
|
|
bsym := hmapType.Field(5).Sym // hmap.buckets see reflect.go:hmap
|
|
|
|
|
na := nod(OAS, nodSym(ODOT, h, bsym), b)
|
2020-04-08 03:08:21 +07:00
|
|
|
nif.Nbody.Append(na)
|
|
|
|
|
|
|
|
|
|
nif = typecheck(nif, ctxStmt)
|
|
|
|
|
nif = walkstmt(nif)
|
|
|
|
|
init.Append(nif)
|
2017-08-31 01:00:39 +02:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(hint, constant.Int) && hint.Val().U.(*Mpint).CmpInt64(BUCKETSIZE) <= 0 {
|
2017-09-02 18:46:59 +02:00
|
|
|
// Handling make(map[any]any) and
|
|
|
|
|
// make(map[any]any, hint) where hint <= BUCKETSIZE
|
|
|
|
|
// special allows for faster map initialization and
|
|
|
|
|
// improves binary size by using calls with fewer arguments.
|
|
|
|
|
// For hint <= BUCKETSIZE overLoadFactor(hint, 0) is false
|
|
|
|
|
// and no buckets will be allocated by makemap. Therefore,
|
|
|
|
|
// no buckets need to be allocated in this code path.
|
|
|
|
|
if n.Esc == EscNone {
|
|
|
|
|
// Only need to initialize h.hash0 since
|
|
|
|
|
// hmap h has been allocated on the stack already.
|
|
|
|
|
// h.hash0 = fastrand()
|
|
|
|
|
rand := mkcall("fastrand", types.Types[TUINT32], init)
|
|
|
|
|
hashsym := hmapType.Field(4).Sym // hmap.hash0 see reflect.go:hmap
|
|
|
|
|
a := nod(OAS, nodSym(ODOT, h, hashsym), rand)
|
2018-11-18 08:34:38 -08:00
|
|
|
a = typecheck(a, ctxStmt)
|
2017-09-02 18:46:59 +02:00
|
|
|
a = walkexpr(a, init)
|
|
|
|
|
init.Append(a)
|
2018-05-10 13:25:39 +02:00
|
|
|
n = convnop(h, t)
|
2017-09-02 18:46:59 +02:00
|
|
|
} else {
|
|
|
|
|
// Call runtime.makehmap to allocate an
|
|
|
|
|
// hmap on the heap and initialize hmap's hash0 field.
|
|
|
|
|
fn := syslook("makemap_small")
|
2018-04-24 13:53:35 -07:00
|
|
|
fn = substArgTypes(fn, t.Key(), t.Elem())
|
2017-09-02 18:46:59 +02:00
|
|
|
n = mkcall1(fn, n.Type, init)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if n.Esc != EscNone {
|
|
|
|
|
h = nodnil()
|
|
|
|
|
}
|
|
|
|
|
// Map initialization with a variable or large hint is
|
|
|
|
|
// more complicated. We therefore generate a call to
|
2019-09-08 19:36:13 +03:00
|
|
|
// runtime.makemap to initialize hmap and allocate the
|
2017-09-02 18:46:59 +02:00
|
|
|
// map buckets.
|
|
|
|
|
|
|
|
|
|
// When hint fits into int, use makemap instead of
|
|
|
|
|
// makemap64, which is faster and shorter on 32 bit platforms.
|
|
|
|
|
fnname := "makemap64"
|
|
|
|
|
argtype := types.Types[TINT64]
|
2017-08-14 10:16:21 +02:00
|
|
|
|
2017-09-02 18:46:59 +02:00
|
|
|
// Type checking guarantees that TIDEAL hint is positive and fits in an int.
|
|
|
|
|
// See checkmake call in TMAP case of OMAKE case in OpSwitch in typecheck1 function.
|
|
|
|
|
// The case of hint overflow when converting TUINT or TUINTPTR to TINT
|
|
|
|
|
// will be handled by the negative range checks in makemap during runtime.
|
2020-11-23 13:42:43 -08:00
|
|
|
if hint.Type.IsKind(TIDEAL) || hint.Type.Size() <= types.Types[TUINT].Size() {
|
2017-09-02 18:46:59 +02:00
|
|
|
fnname = "makemap"
|
|
|
|
|
argtype = types.Types[TINT]
|
|
|
|
|
}
|
2017-08-14 10:16:21 +02:00
|
|
|
|
2017-09-02 18:46:59 +02:00
|
|
|
fn := syslook(fnname)
|
2018-04-24 13:53:35 -07:00
|
|
|
fn = substArgTypes(fn, hmapType, t.Key(), t.Elem())
|
2017-09-02 18:46:59 +02:00
|
|
|
n = mkcall1(fn, n.Type, init, typename(n.Type), conv(hint, argtype), h)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OMAKESLICE:
|
2015-02-23 16:07:24 -05:00
|
|
|
l := n.Left
|
|
|
|
|
r := n.Right
|
2015-02-13 14:40:36 -05:00
|
|
|
if r == nil {
|
|
|
|
|
r = safeexpr(l, init)
|
|
|
|
|
l = r
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Type
|
2020-08-21 20:20:12 -07:00
|
|
|
if t.Elem().NotInHeap() {
|
2020-08-27 14:05:52 -07:00
|
|
|
yyerror("%v can't be allocated in Go; it is incomplete (or unallocatable)", t.Elem())
|
2020-08-21 20:20:12 -07:00
|
|
|
}
|
2015-05-20 15:16:34 -04:00
|
|
|
if n.Esc == EscNone {
|
2020-10-01 12:03:27 +02:00
|
|
|
if why := heapAllocReason(n); why != "" {
|
|
|
|
|
Fatalf("%v has EscNone, but %v", n, why)
|
2015-05-20 15:16:34 -04:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
// var arr [r]T
|
|
|
|
|
// n = arr[:l]
|
2018-11-28 14:34:45 -08:00
|
|
|
i := indexconst(r)
|
|
|
|
|
if i < 0 {
|
|
|
|
|
Fatalf("walkexpr: invalid index %v", r)
|
|
|
|
|
}
|
2020-03-29 01:19:50 +07:00
|
|
|
|
2020-04-11 21:49:10 +07:00
|
|
|
// cap is constrained to [0,2^31) or [0,2^63) depending on whether
|
|
|
|
|
// we're in 32-bit or 64-bit systems. So it's safe to do:
|
2020-04-01 05:27:49 +07:00
|
|
|
//
|
|
|
|
|
// if uint64(len) > cap {
|
|
|
|
|
// if len < 0 { panicmakeslicelen() }
|
|
|
|
|
// panicmakeslicecap()
|
|
|
|
|
// }
|
|
|
|
|
nif := nod(OIF, nod(OGT, conv(l, types.Types[TUINT64]), nodintconst(i)), nil)
|
|
|
|
|
niflen := nod(OIF, nod(OLT, l, nodintconst(0)), nil)
|
|
|
|
|
niflen.Nbody.Set1(mkcall("panicmakeslicelen", nil, init))
|
|
|
|
|
nif.Nbody.Append(niflen, mkcall("panicmakeslicecap", nil, init))
|
2020-03-29 01:19:50 +07:00
|
|
|
nif = typecheck(nif, ctxStmt)
|
|
|
|
|
init.Append(nif)
|
|
|
|
|
|
2018-11-28 14:34:45 -08:00
|
|
|
t = types.NewArray(t.Elem(), i) // [r]T
|
2015-02-23 16:07:24 -05:00
|
|
|
var_ := temp(t)
|
2016-09-16 11:00:54 +10:00
|
|
|
a := nod(OAS, var_, nil) // zero temp
|
2018-11-18 08:34:38 -08:00
|
|
|
a = typecheck(a, ctxStmt)
|
2016-03-07 22:54:46 -08:00
|
|
|
init.Append(a)
|
2016-09-16 11:00:54 +10:00
|
|
|
r := nod(OSLICE, var_, nil) // arr[:l]
|
2016-04-21 11:55:33 -07:00
|
|
|
r.SetSliceBounds(nil, l, nil)
|
|
|
|
|
r = conv(r, n.Type) // in case n.Type is named.
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxExpr)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = walkexpr(r, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
n = r
|
|
|
|
|
} else {
|
2016-08-25 14:17:52 +02:00
|
|
|
// n escapes; set up a call to makeslice.
|
|
|
|
|
// When len and cap can fit into int, use makeslice instead of
|
|
|
|
|
// makeslice64, which is faster and shorter on 32 bit platforms.
|
|
|
|
|
|
|
|
|
|
len, cap := l, r
|
|
|
|
|
|
|
|
|
|
fnname := "makeslice64"
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
argtype := types.Types[TINT64]
|
2016-08-25 14:17:52 +02:00
|
|
|
|
2017-08-14 10:35:17 +02:00
|
|
|
// Type checking guarantees that TIDEAL len/cap are positive and fit in an int.
|
2016-08-25 14:17:52 +02:00
|
|
|
// The case of len or cap overflow when converting TUINT or TUINTPTR to TINT
|
|
|
|
|
// will be handled by the negative range checks in makeslice during runtime.
|
2020-11-23 13:42:43 -08:00
|
|
|
if (len.Type.IsKind(TIDEAL) || len.Type.Size() <= types.Types[TUINT].Size()) &&
|
|
|
|
|
(cap.Type.IsKind(TIDEAL) || cap.Type.Size() <= types.Types[TUINT].Size()) {
|
2016-08-25 14:17:52 +02:00
|
|
|
fnname = "makeslice"
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
argtype = types.Types[TINT]
|
2016-08-25 14:17:52 +02:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-02 14:38:33 +01:00
|
|
|
m := nod(OSLICEHEADER, nil, nil)
|
|
|
|
|
m.Type = t
|
|
|
|
|
|
2016-08-25 14:17:52 +02:00
|
|
|
fn := syslook(fnname)
|
2018-11-02 14:38:33 +01:00
|
|
|
m.Left = mkcall1(fn, types.Types[TUNSAFEPTR], init, typename(t.Elem()), conv(len, argtype), conv(cap, argtype))
|
2020-04-16 21:25:15 -07:00
|
|
|
m.Left.MarkNonNil()
|
2018-11-02 14:38:33 +01:00
|
|
|
m.List.Set2(conv(len, types.Types[TINT]), conv(cap, types.Types[TINT]))
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
m = typecheck(m, ctxExpr)
|
2018-11-02 14:38:33 +01:00
|
|
|
m = walkexpr(m, init)
|
|
|
|
|
n = m
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-10-23 13:50:07 +02:00
|
|
|
case OMAKESLICECOPY:
|
|
|
|
|
if n.Esc == EscNone {
|
|
|
|
|
Fatalf("OMAKESLICECOPY with EscNone: %v", n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
t := n.Type
|
|
|
|
|
if t.Elem().NotInHeap() {
|
2020-08-27 14:05:52 -07:00
|
|
|
yyerror("%v can't be allocated in Go; it is incomplete (or unallocatable)", t.Elem())
|
2018-10-23 13:50:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
length := conv(n.Left, types.Types[TINT])
|
|
|
|
|
copylen := nod(OLEN, n.Right, nil)
|
|
|
|
|
copyptr := nod(OSPTR, n.Right, nil)
|
|
|
|
|
|
2020-08-22 14:07:30 -07:00
|
|
|
if !t.Elem().HasPointers() && n.Bounded() {
|
2018-10-23 13:50:07 +02:00
|
|
|
// When len(to)==len(from) and elements have no pointers:
|
|
|
|
|
// replace make+copy with runtime.mallocgc+runtime.memmove.
|
|
|
|
|
|
|
|
|
|
// We do not check for overflow of len(to)*elem.Width here
|
|
|
|
|
// since len(from) is an existing checked slice capacity
|
|
|
|
|
// with same elem.Width for the from slice.
|
|
|
|
|
size := nod(OMUL, conv(length, types.Types[TUINTPTR]), conv(nodintconst(t.Elem().Width), types.Types[TUINTPTR]))
|
|
|
|
|
|
|
|
|
|
// instantiate mallocgc(size uintptr, typ *byte, needszero bool) unsafe.Pointer
|
|
|
|
|
fn := syslook("mallocgc")
|
|
|
|
|
sh := nod(OSLICEHEADER, nil, nil)
|
|
|
|
|
sh.Left = mkcall1(fn, types.Types[TUNSAFEPTR], init, size, nodnil(), nodbool(false))
|
|
|
|
|
sh.Left.MarkNonNil()
|
|
|
|
|
sh.List.Set2(length, length)
|
|
|
|
|
sh.Type = t
|
|
|
|
|
|
|
|
|
|
s := temp(t)
|
|
|
|
|
r := typecheck(nod(OAS, s, sh), ctxStmt)
|
|
|
|
|
r = walkexpr(r, init)
|
|
|
|
|
init.Append(r)
|
|
|
|
|
|
|
|
|
|
// instantiate memmove(to *any, frm *any, size uintptr)
|
|
|
|
|
fn = syslook("memmove")
|
|
|
|
|
fn = substArgTypes(fn, t.Elem(), t.Elem())
|
|
|
|
|
ncopy := mkcall1(fn, nil, init, nod(OSPTR, s, nil), copyptr, size)
|
|
|
|
|
ncopy = typecheck(ncopy, ctxStmt)
|
|
|
|
|
ncopy = walkexpr(ncopy, init)
|
|
|
|
|
init.Append(ncopy)
|
|
|
|
|
|
|
|
|
|
n = s
|
|
|
|
|
} else { // Replace make+copy with runtime.makeslicecopy.
|
|
|
|
|
// instantiate makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer
|
|
|
|
|
fn := syslook("makeslicecopy")
|
|
|
|
|
s := nod(OSLICEHEADER, nil, nil)
|
|
|
|
|
s.Left = mkcall1(fn, types.Types[TUNSAFEPTR], init, typename(t.Elem()), length, copylen, conv(copyptr, types.Types[TUNSAFEPTR]))
|
|
|
|
|
s.Left.MarkNonNil()
|
|
|
|
|
s.List.Set2(length, length)
|
|
|
|
|
s.Type = t
|
|
|
|
|
n = typecheck(s, ctxExpr)
|
|
|
|
|
n = walkexpr(n, init)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
case ORUNESTR:
|
2015-02-23 16:07:24 -05:00
|
|
|
a := nodnil()
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Esc == EscNone {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewArray(types.Types[TUINT8], 4)
|
2019-04-22 17:04:59 -07:00
|
|
|
a = nod(OADDR, temp(t), nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
// intstring(*[4]byte, rune)
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = mkcall("intstring", n.Type, init, a, conv(n.Left, types.Types[TINT64]))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-04-22 17:04:59 -07:00
|
|
|
case OBYTES2STR, ORUNES2STR:
|
2015-02-23 16:07:24 -05:00
|
|
|
a := nodnil()
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Esc == EscNone {
|
|
|
|
|
// Create temporary buffer for string on stack.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
|
2016-09-16 11:00:54 +10:00
|
|
|
a = nod(OADDR, temp(t), nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2019-04-22 17:04:59 -07:00
|
|
|
if n.Op == ORUNES2STR {
|
2020-01-31 21:01:55 -08:00
|
|
|
// slicerunetostring(*[32]byte, []rune) string
|
|
|
|
|
n = mkcall("slicerunetostring", n.Type, init, a, n.Left)
|
|
|
|
|
} else {
|
|
|
|
|
// slicebytetostring(*[32]byte, ptr *byte, n int) string
|
|
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2020-09-14 16:30:43 +02:00
|
|
|
ptr, len := n.Left.backingArrayPtrLen()
|
2020-01-31 21:01:55 -08:00
|
|
|
n = mkcall("slicebytetostring", n.Type, init, a, ptr, len)
|
2019-04-22 17:04:59 -07:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
case OBYTES2STRTMP:
|
2016-09-10 22:44:00 +02:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
if !instrumenting {
|
2018-11-18 08:34:38 -08:00
|
|
|
// Let the backend handle OBYTES2STRTMP directly
|
2016-09-10 22:44:00 +02:00
|
|
|
// to avoid a function call to slicebytetostringtmp.
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-01-31 21:01:55 -08:00
|
|
|
// slicebytetostringtmp(ptr *byte, n int) string
|
|
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2020-09-14 16:30:43 +02:00
|
|
|
ptr, len := n.Left.backingArrayPtrLen()
|
2020-01-31 21:01:55 -08:00
|
|
|
n = mkcall("slicebytetostringtmp", n.Type, init, ptr, len)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
case OSTR2BYTES:
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
s := n.Left
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(s, constant.String) {
|
2020-10-12 15:02:59 +02:00
|
|
|
sc := s.StringVal()
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
|
|
|
|
|
// Allocate a [n]byte of the right size.
|
|
|
|
|
t := types.NewArray(types.Types[TUINT8], int64(len(sc)))
|
|
|
|
|
var a *Node
|
2019-06-05 14:53:28 -04:00
|
|
|
if n.Esc == EscNone && len(sc) <= int(maxImplicitStackVarSize) {
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
a = nod(OADDR, temp(t), nil)
|
|
|
|
|
} else {
|
|
|
|
|
a = callnew(t)
|
|
|
|
|
}
|
|
|
|
|
p := temp(t.PtrTo()) // *[n]byte
|
2018-11-18 08:34:38 -08:00
|
|
|
init.Append(typecheck(nod(OAS, p, a), ctxStmt))
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
|
|
|
|
|
// Copy from the static string data to the [n]byte.
|
|
|
|
|
if len(sc) > 0 {
|
|
|
|
|
as := nod(OAS,
|
2018-11-18 08:34:38 -08:00
|
|
|
nod(ODEREF, p, nil),
|
|
|
|
|
nod(ODEREF, convnop(nod(OSPTR, s, nil), t.PtrTo()), nil))
|
|
|
|
|
as = typecheck(as, ctxStmt)
|
2018-10-13 15:48:17 -07:00
|
|
|
as = walkstmt(as)
|
|
|
|
|
init.Append(as)
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Slice the [n]byte to a []byte.
|
|
|
|
|
n.Op = OSLICEARR
|
|
|
|
|
n.Left = p
|
|
|
|
|
n = walkexpr(n, init)
|
|
|
|
|
break
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2019-04-22 17:04:59 -07:00
|
|
|
a := nodnil()
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Esc == EscNone {
|
|
|
|
|
// Create temporary buffer for slice on stack.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
|
2016-09-16 11:00:54 +10:00
|
|
|
a = nod(OADDR, temp(t), nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2019-04-22 17:04:59 -07:00
|
|
|
// stringtoslicebyte(*32[byte], string) []byte
|
cmd/compile: make []byte("...") more efficient
Do []byte(string) conversions more efficiently when the string
is a constant. Instead of calling stringtobyteslice, allocate
just the space we need and encode the initialization directly.
[]byte("foo") rewrites to the following pseudocode:
var s [3]byte // on heap or stack, depending on whether b escapes
s = *(*[3]byte)(&"foo"[0]) // initialize s from the string
b = s[:]
which generates this assembly:
0x001d 00029 (tmp1.go:9) LEAQ type.[3]uint8(SB), AX
0x0024 00036 (tmp1.go:9) MOVQ AX, (SP)
0x0028 00040 (tmp1.go:9) CALL runtime.newobject(SB)
0x002d 00045 (tmp1.go:9) MOVQ 8(SP), AX
0x0032 00050 (tmp1.go:9) MOVBLZX go.string."foo"+2(SB), CX
0x0039 00057 (tmp1.go:9) MOVWLZX go.string."foo"(SB), DX
0x0040 00064 (tmp1.go:9) MOVW DX, (AX)
0x0043 00067 (tmp1.go:9) MOVB CL, 2(AX)
// Then the slice is b = {AX, 3, 3}
The generated code is still not optimal, as it still does load/store
from read-only memory instead of constant stores. Next CL...
Update #26498
Fixes #10170
Change-Id: I4b990b19f9a308f60c8f4f148934acffefe0a5bd
Reviewed-on: https://go-review.googlesource.com/c/140698
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-08 17:46:45 -07:00
|
|
|
n = mkcall("stringtoslicebyte", n.Type, init, a, conv(s, types.Types[TSTRING]))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
case OSTR2BYTESTMP:
|
2016-10-27 23:31:38 +02:00
|
|
|
// []byte(string) conversion that creates a slice
|
|
|
|
|
// referring to the actual string bytes.
|
|
|
|
|
// This conversion is handled later by the backend and
|
|
|
|
|
// is only for use by internal compiler optimizations
|
|
|
|
|
// that know that the slice won't be mutated.
|
|
|
|
|
// The only such case today is:
|
|
|
|
|
// for i, c := range []byte(string)
|
|
|
|
|
n.Left = walkexpr(n.Left, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
case OSTR2RUNES:
|
2015-02-23 16:07:24 -05:00
|
|
|
a := nodnil()
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Esc == EscNone {
|
|
|
|
|
// Create temporary buffer for slice on stack.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewArray(types.Types[TINT32], tmpstringbufsize)
|
2016-09-16 11:00:54 +10:00
|
|
|
a = nod(OADDR, temp(t), nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2019-04-22 17:04:59 -07:00
|
|
|
// stringtoslicerune(*[32]rune, string) []rune
|
2018-01-14 04:05:21 +09:00
|
|
|
n = mkcall("stringtoslicerune", n.Type, init, a, conv(n.Left, types.Types[TSTRING]))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-06-19 07:20:28 -07:00
|
|
|
case OARRAYLIT, OSLICELIT, OMAPLIT, OSTRUCTLIT, OPTRLIT:
|
2017-05-10 12:48:17 -07:00
|
|
|
if isStaticCompositeLiteral(n) && !canSSAType(n.Type) {
|
2016-04-18 11:17:55 -07:00
|
|
|
// n can be directly represented in the read-only data section.
|
|
|
|
|
// Make direct reference to the static data. See issue 12841.
|
2020-07-15 20:01:32 -04:00
|
|
|
vstat := readonlystaticname(n.Type)
|
2016-06-19 14:12:59 -07:00
|
|
|
fixedlit(inInitFunction, initKindStatic, n, vstat, init)
|
2016-04-18 11:17:55 -07:00
|
|
|
n = vstat
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxExpr)
|
2016-04-18 11:17:55 -07:00
|
|
|
break
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
var_ := temp(n.Type)
|
2016-09-04 09:34:03 -07:00
|
|
|
anylit(n, var_, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
n = var_
|
|
|
|
|
|
|
|
|
|
case OSEND:
|
2015-02-23 16:07:24 -05:00
|
|
|
n1 := n.Right
|
2016-03-30 10:57:47 -07:00
|
|
|
n1 = assignconv(n1, n.Left.Type.Elem(), "chan send")
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n1 = walkexpr(n1, init)
|
2016-09-16 11:00:54 +10:00
|
|
|
n1 = nod(OADDR, n1, nil)
|
2017-03-18 15:55:41 +00:00
|
|
|
n = mkcall1(chanfn("chansend1", 2, n.Left.Type), nil, init, n.Left, n1)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OCLOSURE:
|
|
|
|
|
n = walkclosure(n, init)
|
|
|
|
|
|
|
|
|
|
case OCALLPART:
|
|
|
|
|
n = walkpartialcall(n, init)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expressions that are constant at run time but not
|
|
|
|
|
// considered const by the language spec are not turned into
|
|
|
|
|
// constants until walk. For example, if n is y%1 == 0, the
|
|
|
|
|
// walk of y%1 may have replaced it by 0.
|
|
|
|
|
// Check whether n with its updated args is itself now a constant.
|
2015-02-23 16:07:24 -05:00
|
|
|
t := n.Type
|
2015-02-13 14:40:36 -05:00
|
|
|
evconst(n)
|
2017-03-24 13:30:19 -07:00
|
|
|
if n.Type != t {
|
|
|
|
|
Fatalf("evconst changed Type: %v had type %v, now %v", n, t, n.Type)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Op == OLITERAL {
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxExpr)
|
2017-04-13 06:11:36 -07:00
|
|
|
// Emit string symbol now to avoid emitting
|
|
|
|
|
// any concurrently during the backend.
|
|
|
|
|
if s, ok := n.Val().U.(string); ok {
|
2017-11-08 09:43:56 +01:00
|
|
|
_ = stringsym(n.Pos, s)
|
2017-04-13 06:11:36 -07:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-03 13:38:49 -08:00
|
|
|
updateHasCall(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.w != 0 && n != nil {
|
2018-05-24 13:31:40 -07:00
|
|
|
Dump("after walk expr", n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lineno = lno
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
[dev.link] cmd/compile, cmd/link: remove dead methods if type is not used in interface
Currently, a method of a reachable type is live if it matches a
method of a reachable interface. In fact, we only need to retain
the method if the type is actually converted to an interface. If
the type is never converted to an interface, there is no way to
call the method through an interface method call (but the type
descriptor could still be used, e.g. in calling
runtime.newobject).
A type can be used in an interface in two ways:
- directly converted to interface. (Any interface counts, as it
is possible to convert one interface to another.)
- obtained by reflection from a related type (e.g. obtaining an
interface of T from []T).
For the former, we let the compiler emit a marker on the type
descriptor symbol when it is converted to an interface. In the
linker, we only need to check methods of marked types.
For the latter, when the linker visits a marked type, it needs to
visit all its "child" types as marked (i.e. potentially could be
converted to interface).
This reduces binary size:
cmd/compile 18792016 18706096 (-0.5%)
cmd/go 14120572 13398948 (-5.1%)
Change-Id: I4465c7eeabf575f4dc84017214c610fa05ae31fd
Reviewed-on: https://go-review.googlesource.com/c/go/+/237298
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
2020-06-08 18:38:59 -04:00
|
|
|
// markTypeUsedInInterface marks that type t is converted to an interface.
|
|
|
|
|
// This information is used in the linker in dead method elimination.
|
2020-09-20 23:29:20 -04:00
|
|
|
func markTypeUsedInInterface(t *types.Type, from *obj.LSym) {
|
|
|
|
|
tsym := typenamesym(t).Linksym()
|
|
|
|
|
// Emit a marker relocation. The linker will know the type is converted
|
|
|
|
|
// to an interface if "from" is reachable.
|
|
|
|
|
r := obj.Addrel(from)
|
|
|
|
|
r.Sym = tsym
|
|
|
|
|
r.Type = objabi.R_USEIFACE
|
[dev.link] cmd/compile, cmd/link: remove dead methods if type is not used in interface
Currently, a method of a reachable type is live if it matches a
method of a reachable interface. In fact, we only need to retain
the method if the type is actually converted to an interface. If
the type is never converted to an interface, there is no way to
call the method through an interface method call (but the type
descriptor could still be used, e.g. in calling
runtime.newobject).
A type can be used in an interface in two ways:
- directly converted to interface. (Any interface counts, as it
is possible to convert one interface to another.)
- obtained by reflection from a related type (e.g. obtaining an
interface of T from []T).
For the former, we let the compiler emit a marker on the type
descriptor symbol when it is converted to an interface. In the
linker, we only need to check methods of marked types.
For the latter, when the linker visits a marked type, it needs to
visit all its "child" types as marked (i.e. potentially could be
converted to interface).
This reduces binary size:
cmd/compile 18792016 18706096 (-0.5%)
cmd/go 14120572 13398948 (-5.1%)
Change-Id: I4465c7eeabf575f4dc84017214c610fa05ae31fd
Reviewed-on: https://go-review.googlesource.com/c/go/+/237298
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
2020-06-08 18:38:59 -04:00
|
|
|
}
|
|
|
|
|
|
2020-09-21 20:44:53 -04:00
|
|
|
// markUsedIfaceMethod marks that an interface method is used in the current
|
|
|
|
|
// function. n is OCALLINTER node.
|
|
|
|
|
func markUsedIfaceMethod(n *Node) {
|
|
|
|
|
ityp := n.Left.Left.Type
|
|
|
|
|
tsym := typenamesym(ityp).Linksym()
|
|
|
|
|
r := obj.Addrel(Curfn.Func.lsym)
|
|
|
|
|
r.Sym = tsym
|
|
|
|
|
// n.Left.Xoffset is the method index * Widthptr (the offset of code pointer
|
|
|
|
|
// in itab).
|
|
|
|
|
midx := n.Left.Xoffset / int64(Widthptr)
|
|
|
|
|
r.Add = ifaceMethodOffset(ityp, midx)
|
|
|
|
|
r.Type = objabi.R_USEIFACEMETHOD
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-20 21:31:49 +01:00
|
|
|
// rtconvfn returns the parameter and result types that will be used by a
|
|
|
|
|
// runtime function to convert from type src to type dst. The runtime function
|
|
|
|
|
// name can be derived from the names of the returned types.
|
|
|
|
|
//
|
|
|
|
|
// If no such function is necessary, it returns (Txxx, Txxx).
|
|
|
|
|
func rtconvfn(src, dst *types.Type) (param, result types.EType) {
|
|
|
|
|
if thearch.SoftFloat {
|
|
|
|
|
return Txxx, Txxx
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch thearch.LinkArch.Family {
|
|
|
|
|
case sys.ARM, sys.MIPS:
|
|
|
|
|
if src.IsFloat() {
|
|
|
|
|
switch dst.Etype {
|
|
|
|
|
case TINT64, TUINT64:
|
|
|
|
|
return TFLOAT64, dst.Etype
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if dst.IsFloat() {
|
|
|
|
|
switch src.Etype {
|
|
|
|
|
case TINT64, TUINT64:
|
|
|
|
|
return src.Etype, TFLOAT64
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case sys.I386:
|
|
|
|
|
if src.IsFloat() {
|
|
|
|
|
switch dst.Etype {
|
|
|
|
|
case TINT64, TUINT64:
|
|
|
|
|
return TFLOAT64, dst.Etype
|
|
|
|
|
case TUINT32, TUINT, TUINTPTR:
|
|
|
|
|
return TFLOAT64, TUINT32
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if dst.IsFloat() {
|
|
|
|
|
switch src.Etype {
|
|
|
|
|
case TINT64, TUINT64:
|
|
|
|
|
return src.Etype, TFLOAT64
|
|
|
|
|
case TUINT32, TUINT, TUINTPTR:
|
|
|
|
|
return TUINT32, TFLOAT64
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return Txxx, Txxx
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-21 11:55:33 -07:00
|
|
|
// TODO(josharian): combine this with its caller and simplify
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
func reduceSlice(n *Node) *Node {
|
2016-04-21 11:55:33 -07:00
|
|
|
low, high, max := n.SliceBounds()
|
|
|
|
|
if high != nil && high.Op == OLEN && samesafeexpr(n.Left, high.Left) {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
// Reduce x[i:len(x)] to x[i:].
|
2016-04-21 11:55:33 -07:00
|
|
|
high = nil
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
2016-04-21 11:55:33 -07:00
|
|
|
n.SetSliceBounds(low, high, max)
|
|
|
|
|
if (n.Op == OSLICE || n.Op == OSLICESTR) && low == nil && high == nil {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
// Reduce x[:] to x.
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: omit slice operation")
|
|
|
|
|
}
|
|
|
|
|
return n.Left
|
|
|
|
|
}
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-09 16:13:09 +09:00
|
|
|
func ascompatee1(l *Node, r *Node, init *Nodes) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
// convas will turn map assigns into function calls,
|
|
|
|
|
// making it impossible for reorder3 to work.
|
2016-09-16 11:00:54 +10:00
|
|
|
n := nod(OAS, l, r)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if l.Op == OINDEXMAP {
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return convas(n, init)
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-08 10:26:20 -08:00
|
|
|
func ascompatee(op Op, nl, nr []*Node, init *Nodes) []*Node {
|
2015-10-22 09:51:12 +09:00
|
|
|
// check assign expression list to
|
2017-09-15 10:24:47 +09:00
|
|
|
// an expression list. called in
|
2015-10-22 09:51:12 +09:00
|
|
|
// expr-list = expr-list
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// ensure order of evaluation for function calls
|
2016-03-08 15:10:26 -08:00
|
|
|
for i := range nl {
|
|
|
|
|
nl[i] = safeexpr(nl[i], init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-08 15:10:26 -08:00
|
|
|
for i1 := range nr {
|
|
|
|
|
nr[i1] = safeexpr(nr[i1], init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
var nn []*Node
|
2016-03-09 12:39:36 -08:00
|
|
|
i := 0
|
|
|
|
|
for ; i < len(nl); i++ {
|
|
|
|
|
if i >= len(nr) {
|
|
|
|
|
break
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
// Do not generate 'x = x' during return. See issue 4014.
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if op == ORETURN && samesafeexpr(nl[i], nr[i]) {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2017-08-09 16:13:09 +09:00
|
|
|
nn = append(nn, ascompatee1(nl[i], nr[i], init))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// cannot happen: caller checked that lists had same length
|
2016-03-09 12:39:36 -08:00
|
|
|
if i < len(nl) || i < len(nr) {
|
2016-03-08 10:26:20 -08:00
|
|
|
var nln, nrn Nodes
|
|
|
|
|
nln.Set(nl)
|
|
|
|
|
nrn.Set(nr)
|
2017-04-23 05:10:21 -07:00
|
|
|
Fatalf("error in shape across %+v %v %+v / %d %d [%s]", nln, op, nrn, len(nl), len(nr), Curfn.funcname())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
return nn
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-24 13:31:03 -07:00
|
|
|
// fncall reports whether assigning an rvalue of type rt to an lvalue l might involve a function call.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func fncall(l *Node, rt *types.Type) bool {
|
2017-03-03 13:38:49 -08:00
|
|
|
if l.HasCall() || l.Op == OINDEXMAP {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-10-18 15:24:50 -07:00
|
|
|
if types.Identical(l.Type, rt) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-05-24 13:31:03 -07:00
|
|
|
// There might be a conversion required, which might involve a runtime call.
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-09 19:32:10 -08:00
|
|
|
// check assign type list to
|
2017-09-15 10:24:47 +09:00
|
|
|
// an expression list. called in
|
2016-03-09 19:32:10 -08:00
|
|
|
// expr-list = func()
|
2017-08-09 16:13:09 +09:00
|
|
|
func ascompatet(nl Nodes, nr *types.Type) []*Node {
|
2017-04-05 19:38:21 -07:00
|
|
|
if nl.Len() != nr.NumFields() {
|
|
|
|
|
Fatalf("ascompatet: assignment count mismatch: %d = %d", nl.Len(), nr.NumFields())
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-10-27 09:23:48 -07:00
|
|
|
var nn, mm Nodes
|
2017-04-05 19:38:21 -07:00
|
|
|
for i, l := range nl.Slice() {
|
2018-04-08 13:39:10 +01:00
|
|
|
if l.isBlank() {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2017-04-05 19:38:21 -07:00
|
|
|
r := nr.Field(i)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-05-24 13:31:03 -07:00
|
|
|
// Any assignment to an lvalue that might cause a function call must be
|
|
|
|
|
// deferred until all the returned values have been read.
|
2015-02-17 22:13:49 -05:00
|
|
|
if fncall(l, r.Type) {
|
2016-03-09 19:32:10 -08:00
|
|
|
tmp := temp(r.Type)
|
2018-11-18 08:34:38 -08:00
|
|
|
tmp = typecheck(tmp, ctxExpr)
|
2016-09-16 11:00:54 +10:00
|
|
|
a := nod(OAS, l, tmp)
|
2016-10-27 09:23:48 -07:00
|
|
|
a = convas(a, &mm)
|
|
|
|
|
mm.Append(a)
|
2015-02-13 14:40:36 -05:00
|
|
|
l = tmp
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-05 16:43:08 -07:00
|
|
|
res := nod(ORESULT, nil, nil)
|
|
|
|
|
res.Xoffset = Ctxt.FixedFrameSize() + r.Offset
|
|
|
|
|
res.Type = r.Type
|
|
|
|
|
res.SetTypecheck(1)
|
|
|
|
|
|
|
|
|
|
a := nod(OAS, l, res)
|
2016-10-27 09:23:48 -07:00
|
|
|
a = convas(a, &nn)
|
2017-03-03 13:38:49 -08:00
|
|
|
updateHasCall(a)
|
|
|
|
|
if a.HasCall() {
|
2015-02-13 14:40:36 -05:00
|
|
|
Dump("ascompatet ucount", a)
|
2017-04-05 19:38:21 -07:00
|
|
|
Fatalf("ascompatet: too many function calls evaluating parameters")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-10-27 09:23:48 -07:00
|
|
|
nn.Append(a)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-10-27 09:23:48 -07:00
|
|
|
return append(nn.Slice(), mm.Slice()...)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// package all the arguments that match a ... T parameter into a []T.
|
2020-04-21 15:37:29 -07:00
|
|
|
func mkdotargslice(typ *types.Type, args []*Node) *Node {
|
|
|
|
|
var n *Node
|
2017-02-22 14:43:21 -08:00
|
|
|
if len(args) == 0 {
|
2020-04-21 15:37:29 -07:00
|
|
|
n = nodnil()
|
2017-02-22 14:43:21 -08:00
|
|
|
n.Type = typ
|
2020-04-21 15:37:29 -07:00
|
|
|
} else {
|
|
|
|
|
n = nod(OCOMPLIT, nil, typenod(typ))
|
|
|
|
|
n.List.Append(args...)
|
cmd/compile: use fixVariadicCall in escape analysis
This CL uses fixVariadicCall before escape analyzing function calls.
This has a number of benefits, though also some minor obstacles:
Most notably, it allows us to remove ODDDARG along with the logic
involved in setting it up, manipulating EscHoles, and later copying
its escape analysis flags to the actual slice argument. Instead, we
uniformly handle all variadic calls the same way. (E.g., issue31573.go
is updated because now f() and f(nil...) are handled identically.)
It also allows us to simplify handling of builtins and generic
function calls. Previously handling of calls was hairy enough to
require multiple dispatches on n.Op, whereas now the logic is uniform
enough that we can easily handle it with a single dispatch.
The downside is handling //go:uintptrescapes is now somewhat clumsy.
(It used to be clumsy, but it still is, too.) The proper fix here is
probably to stop using escape analysis tags for //go:uintptrescapes
and unsafe-uintptr, and have an earlier pass responsible for them.
Finally, note that while we now call fixVariadicCall in Escape, we
still have to call it in Order, because we don't (yet) run Escape on
all compiler-generated functions. In particular, the generated "init"
function for initializing package-level variables can contain calls to
variadic functions and isn't escape analyzed.
Passes toolstash-check -race.
Change-Id: I4cdb92a393ac487910aeee58a5cb8c1500eef881
Reviewed-on: https://go-review.googlesource.com/c/go/+/229759
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
2020-04-21 19:48:02 -07:00
|
|
|
n.SetImplicit(true)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxExpr)
|
2017-02-22 14:43:21 -08:00
|
|
|
if n.Type == nil {
|
|
|
|
|
Fatalf("mkdotargslice: typecheck failed")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-02-22 14:43:21 -08:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2020-04-21 15:37:29 -07:00
|
|
|
// fixVariadicCall rewrites calls to variadic functions to use an
|
|
|
|
|
// explicit ... argument if one is not already present.
|
cmd/compile: move fixVariadicCall from walk to order
This CL moves fixVariadicCall from mid-Walk of function calls to
early-Order, in preparation for moving it even earlier in the future.
Notably, rewriting variadic calls this early introduces two
compilation output changes:
1. Previously, Order visited the ODDDARG before the rest of the
arguments list, whereas the natural time to visit it is at the end of
the list (as we visit arguments left-to-right, and the ... argument is
the rightmost one). Changing this ordering permutes the autotmp
allocation order, which in turn permutes autotmp naming and stack
offsets.
2. Previously, Walk separately walked all of the variadic arguments
before walking the entire slice literal, whereas the more natural
thing to do is just walk the entire slice literal. This triggers
slightly different code paths for composite literal construction in
some cases.
Neither of these have semantic impact. They simply mean we're now
compiling f(a,b,c) the same way as we were already compiling
f([]T{a,b,c}...).
Change-Id: I40ccc5725697a116370111ebe746b2639562fe87
Reviewed-on: https://go-review.googlesource.com/c/go/+/229601
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
2020-04-21 15:59:11 -07:00
|
|
|
func fixVariadicCall(call *Node) {
|
2020-04-21 15:37:29 -07:00
|
|
|
fntype := call.Left.Type
|
|
|
|
|
if !fntype.IsVariadic() || call.IsDDD() {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
vi := fntype.NumParams() - 1
|
|
|
|
|
vt := fntype.Params().Field(vi).Type
|
|
|
|
|
|
|
|
|
|
args := call.List.Slice()
|
|
|
|
|
extra := args[vi:]
|
|
|
|
|
slice := mkdotargslice(vt, extra)
|
|
|
|
|
for i := range extra {
|
|
|
|
|
extra[i] = nil // allow GC
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
call.List.Set(append(args[:vi], slice))
|
|
|
|
|
call.SetIsDDD(true)
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
func walkCall(n *Node, init *Nodes) {
|
|
|
|
|
if n.Rlist.Len() != 0 {
|
|
|
|
|
return // already walked
|
2017-02-22 14:43:21 -08:00
|
|
|
}
|
2020-04-21 15:37:29 -07:00
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
params := n.Left.Type.Params()
|
|
|
|
|
args := n.List.Slice()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: move fixVariadicCall from walk to order
This CL moves fixVariadicCall from mid-Walk of function calls to
early-Order, in preparation for moving it even earlier in the future.
Notably, rewriting variadic calls this early introduces two
compilation output changes:
1. Previously, Order visited the ODDDARG before the rest of the
arguments list, whereas the natural time to visit it is at the end of
the list (as we visit arguments left-to-right, and the ... argument is
the rightmost one). Changing this ordering permutes the autotmp
allocation order, which in turn permutes autotmp naming and stack
offsets.
2. Previously, Walk separately walked all of the variadic arguments
before walking the entire slice literal, whereas the more natural
thing to do is just walk the entire slice literal. This triggers
slightly different code paths for composite literal construction in
some cases.
Neither of these have semantic impact. They simply mean we're now
compiling f(a,b,c) the same way as we were already compiling
f([]T{a,b,c}...).
Change-Id: I40ccc5725697a116370111ebe746b2639562fe87
Reviewed-on: https://go-review.googlesource.com/c/go/+/229601
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
2020-04-21 15:59:11 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
walkexprlist(args, init)
|
|
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
// If this is a method call, add the receiver at the beginning of the args.
|
|
|
|
|
if n.Op == OCALLMETH {
|
|
|
|
|
withRecv := make([]*Node, len(args)+1)
|
|
|
|
|
withRecv[0] = n.Left.Left
|
|
|
|
|
n.Left.Left = nil
|
|
|
|
|
copy(withRecv[1:], args)
|
|
|
|
|
args = withRecv
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
// For any argument whose evaluation might require a function call,
|
|
|
|
|
// store that argument into a temporary variable,
|
|
|
|
|
// to prevent that calls from clobbering arguments already on the stack.
|
|
|
|
|
// When instrumenting, all arguments might require function calls.
|
|
|
|
|
var tempAssigns []*Node
|
|
|
|
|
for i, arg := range args {
|
|
|
|
|
updateHasCall(arg)
|
2018-10-25 09:49:53 -07:00
|
|
|
// Determine param type.
|
|
|
|
|
var t *types.Type
|
|
|
|
|
if n.Op == OCALLMETH {
|
|
|
|
|
if i == 0 {
|
|
|
|
|
t = n.Left.Type.Recv().Type
|
|
|
|
|
} else {
|
|
|
|
|
t = params.Field(i - 1).Type
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
t = params.Field(i).Type
|
|
|
|
|
}
|
|
|
|
|
if instrumenting || fncall(arg, t) {
|
2019-09-06 17:55:35 +07:00
|
|
|
// make assignment of fncall to tempAt
|
2018-10-25 09:49:53 -07:00
|
|
|
tmp := temp(t)
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
a := nod(OAS, tmp, arg)
|
2018-10-25 09:49:53 -07:00
|
|
|
a = convas(a, init)
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
tempAssigns = append(tempAssigns, a)
|
|
|
|
|
// replace arg with temp
|
|
|
|
|
args[i] = tmp
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n.List.Set(tempAssigns)
|
|
|
|
|
n.Rlist.Set(args)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// generate code for print
|
2016-03-07 22:54:46 -08:00
|
|
|
func walkprint(nn *Node, init *Nodes) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Hoist all the argument evaluation up before the lock.
|
2017-05-19 17:12:57 -07:00
|
|
|
walkexprlistcheap(nn.List.Slice(), init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-05-21 15:18:08 -07:00
|
|
|
// For println, add " " between elements and "\n" at the end.
|
|
|
|
|
if nn.Op == OPRINTN {
|
|
|
|
|
s := nn.List.Slice()
|
|
|
|
|
t := make([]*Node, 0, len(s)*2)
|
|
|
|
|
for i, n := range s {
|
2017-09-08 16:25:25 -07:00
|
|
|
if i != 0 {
|
2017-09-08 11:47:16 -07:00
|
|
|
t = append(t, nodstr(" "))
|
2017-05-21 15:18:08 -07:00
|
|
|
}
|
2017-09-08 16:25:25 -07:00
|
|
|
t = append(t, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-09-08 11:47:16 -07:00
|
|
|
t = append(t, nodstr("\n"))
|
2017-05-21 15:18:08 -07:00
|
|
|
nn.List.Set(t)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: collapse runs of string constants in walkprint
This reduces the code footprint of code like:
println("foo=", foo, "bar=", bar)
which is fairly common in the runtime.
Prior to this change, this makes function calls to print each of:
"foo=", " ", foo, " ", "bar=", " ", bar, "\n"
After this change, this prints:
"foo= ", foo, " bar= ", bar, "\n"
This shrinks the hello world binary by 0.4%.
More importantly, this improves the instruction
density of important runtime routines.
Change-Id: I8971bdf5382fbaaf4a82bad4442f9da07c28d395
Reviewed-on: https://go-review.googlesource.com/55098
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-05-22 06:40:06 -07:00
|
|
|
// Collapse runs of constant strings.
|
|
|
|
|
s := nn.List.Slice()
|
|
|
|
|
t := make([]*Node, 0, len(s))
|
|
|
|
|
for i := 0; i < len(s); {
|
|
|
|
|
var strs []string
|
2020-11-23 13:42:43 -08:00
|
|
|
for i < len(s) && Isconst(s[i], constant.String) {
|
2020-10-12 15:02:59 +02:00
|
|
|
strs = append(strs, s[i].StringVal())
|
cmd/compile: collapse runs of string constants in walkprint
This reduces the code footprint of code like:
println("foo=", foo, "bar=", bar)
which is fairly common in the runtime.
Prior to this change, this makes function calls to print each of:
"foo=", " ", foo, " ", "bar=", " ", bar, "\n"
After this change, this prints:
"foo= ", foo, " bar= ", bar, "\n"
This shrinks the hello world binary by 0.4%.
More importantly, this improves the instruction
density of important runtime routines.
Change-Id: I8971bdf5382fbaaf4a82bad4442f9da07c28d395
Reviewed-on: https://go-review.googlesource.com/55098
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-05-22 06:40:06 -07:00
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
if len(strs) > 0 {
|
|
|
|
|
t = append(t, nodstr(strings.Join(strs, "")))
|
|
|
|
|
}
|
|
|
|
|
if i < len(s) {
|
|
|
|
|
t = append(t, s[i])
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
nn.List.Set(t)
|
|
|
|
|
|
2017-05-21 15:18:08 -07:00
|
|
|
calls := []*Node{mkcall("printlock", nil, init)}
|
|
|
|
|
for i, n := range nn.List.Slice() {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Op == OLITERAL {
|
2020-11-13 18:33:19 -08:00
|
|
|
if n.Type == types.UntypedRune {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = defaultlit(n, types.Runetype)
|
2020-11-13 18:33:19 -08:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-11-23 13:42:43 -08:00
|
|
|
switch n.Val().Kind() {
|
|
|
|
|
case constant.Int:
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = defaultlit(n, types.Types[TINT64])
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-11-23 13:42:43 -08:00
|
|
|
case constant.Float:
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = defaultlit(n, types.Types[TFLOAT64])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Op != OLITERAL && n.Type != nil && n.Type.Etype == TIDEAL {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
n = defaultlit(n, types.Types[TINT64])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = defaultlit(n, nil)
|
2017-05-19 17:18:50 -07:00
|
|
|
nn.List.SetIndex(i, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Type == nil || n.Type.Etype == TFORW {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-19 17:12:57 -07:00
|
|
|
var on *Node
|
2017-05-19 17:18:50 -07:00
|
|
|
switch n.Type.Etype {
|
|
|
|
|
case TINTER:
|
2016-04-01 13:36:24 -07:00
|
|
|
if n.Type.IsEmptyInterface() {
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printeface")
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printiface")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
on = substArgTypes(on, n.Type) // any-1
|
2020-10-27 14:15:00 -07:00
|
|
|
case TPTR:
|
|
|
|
|
if n.Type.Elem().NotInHeap() {
|
|
|
|
|
on = syslook("printuintptr")
|
|
|
|
|
n = nod(OCONV, n, nil)
|
|
|
|
|
n.Type = types.Types[TUNSAFEPTR]
|
|
|
|
|
n = nod(OCONV, n, nil)
|
|
|
|
|
n.Type = types.Types[TUINTPTR]
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
case TCHAN, TMAP, TFUNC, TUNSAFEPTR:
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printpointer")
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
on = substArgTypes(on, n.Type) // any-1
|
2017-05-19 17:18:50 -07:00
|
|
|
case TSLICE:
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printslice")
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
on = substArgTypes(on, n.Type) // any-1
|
2017-09-14 23:06:20 -07:00
|
|
|
case TUINT, TUINT8, TUINT16, TUINT32, TUINT64, TUINTPTR:
|
2017-05-19 17:18:50 -07:00
|
|
|
if isRuntimePkg(n.Type.Sym.Pkg) && n.Type.Sym.Name == "hex" {
|
|
|
|
|
on = syslook("printhex")
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2017-05-19 17:18:50 -07:00
|
|
|
on = syslook("printuint")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-09-14 23:06:20 -07:00
|
|
|
case TINT, TINT8, TINT16, TINT32, TINT64:
|
2017-05-19 17:18:50 -07:00
|
|
|
on = syslook("printint")
|
|
|
|
|
case TFLOAT32, TFLOAT64:
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printfloat")
|
2017-05-19 17:18:50 -07:00
|
|
|
case TCOMPLEX64, TCOMPLEX128:
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printcomplex")
|
2017-05-19 17:18:50 -07:00
|
|
|
case TBOOL:
|
2016-03-04 15:19:06 -08:00
|
|
|
on = syslook("printbool")
|
2017-05-19 17:18:50 -07:00
|
|
|
case TSTRING:
|
2017-05-21 15:18:08 -07:00
|
|
|
cs := ""
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(n, constant.String) {
|
2020-10-12 15:02:59 +02:00
|
|
|
cs = n.StringVal()
|
2017-05-21 15:18:08 -07:00
|
|
|
}
|
|
|
|
|
switch cs {
|
|
|
|
|
case " ":
|
|
|
|
|
on = syslook("printsp")
|
|
|
|
|
case "\n":
|
|
|
|
|
on = syslook("printnl")
|
|
|
|
|
default:
|
|
|
|
|
on = syslook("printstring")
|
|
|
|
|
}
|
2017-05-19 17:12:57 -07:00
|
|
|
default:
|
2015-02-13 14:40:36 -05:00
|
|
|
badtype(OPRINT, n.Type, nil)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-19 17:12:57 -07:00
|
|
|
r := nod(OCALL, on, nil)
|
2017-05-21 15:18:08 -07:00
|
|
|
if params := on.Type.Params().FieldSlice(); len(params) > 0 {
|
|
|
|
|
t := params[0].Type
|
2018-10-18 15:24:50 -07:00
|
|
|
if !types.Identical(t, n.Type) {
|
2017-05-21 15:18:08 -07:00
|
|
|
n = nod(OCONV, n, nil)
|
|
|
|
|
n.Type = t
|
|
|
|
|
}
|
|
|
|
|
r.List.Append(n)
|
|
|
|
|
}
|
2016-03-07 22:54:46 -08:00
|
|
|
calls = append(calls, r)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
calls = append(calls, mkcall("printunlock", nil, init))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
typecheckslice(calls, ctxStmt)
|
2015-02-13 14:40:36 -05:00
|
|
|
walkexprlist(calls, init)
|
|
|
|
|
|
2017-05-19 17:12:57 -07:00
|
|
|
r := nod(OEMPTY, nil, nil)
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxStmt)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = walkexpr(r, init)
|
2016-03-08 15:10:26 -08:00
|
|
|
r.Ninit.Set(calls)
|
2015-02-13 14:40:36 -05:00
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func callnew(t *types.Type) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
dowidth(t)
|
2019-03-17 07:14:12 -07:00
|
|
|
n := nod(ONEWOBJ, typename(t), nil)
|
|
|
|
|
n.Type = types.NewPtr(t)
|
|
|
|
|
n.SetTypecheck(1)
|
2020-04-16 21:25:15 -07:00
|
|
|
n.MarkNonNil()
|
2019-03-17 07:14:12 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-01 21:46:08 -08:00
|
|
|
// isReflectHeaderDataField reports whether l is an expression p.Data
|
|
|
|
|
// where p has type reflect.SliceHeader or reflect.StringHeader.
|
|
|
|
|
func isReflectHeaderDataField(l *Node) bool {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
if l.Type != types.Types[TUINTPTR] {
|
2017-03-01 21:46:08 -08:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
var tsym *types.Sym
|
2017-03-01 21:46:08 -08:00
|
|
|
switch l.Op {
|
|
|
|
|
case ODOT:
|
|
|
|
|
tsym = l.Left.Type.Sym
|
|
|
|
|
case ODOTPTR:
|
|
|
|
|
tsym = l.Left.Type.Elem().Sym
|
|
|
|
|
default:
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if tsym == nil || l.Sym.Name != "Data" || tsym.Pkg.Path != "reflect" {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return tsym.Name == "SliceHeader" || tsym.Name == "StringHeader"
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
func convas(n *Node, init *Nodes) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Op != OAS {
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("convas: not OAS %v", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-10-20 11:56:31 +01:00
|
|
|
defer updateHasCall(n)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-04-25 18:02:43 -07:00
|
|
|
n.SetTypecheck(1)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if n.Left == nil || n.Right == nil {
|
2017-10-20 11:56:31 +01:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-10-20 11:56:31 +01:00
|
|
|
lt := n.Left.Type
|
|
|
|
|
rt := n.Right.Type
|
2015-02-13 14:40:36 -05:00
|
|
|
if lt == nil || rt == nil {
|
2017-10-20 11:56:31 +01:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-04-08 13:39:10 +01:00
|
|
|
if n.Left.isBlank() {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right = defaultlit(n.Right, nil)
|
2017-10-20 11:56:31 +01:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-10-18 15:24:50 -07:00
|
|
|
if !types.Identical(lt, rt) {
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Right = assignconv(n.Right, lt, "assignment")
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2017-03-28 12:10:38 -07:00
|
|
|
dowidth(n.Right.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// from ascompat[ee]
|
|
|
|
|
// a,b = c,d
|
|
|
|
|
// simultaneous assignment. there cannot
|
|
|
|
|
// be later use of an earlier lvalue.
|
|
|
|
|
//
|
|
|
|
|
// function calls have been removed.
|
2016-03-07 22:54:46 -08:00
|
|
|
func reorder3(all []*Node) []*Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
// If a needed expression may be affected by an
|
|
|
|
|
// earlier assignment, make an early copy of that
|
|
|
|
|
// expression and use the copy instead.
|
2016-03-07 22:54:46 -08:00
|
|
|
var early []*Node
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
var mapinit Nodes
|
|
|
|
|
for i, n := range all {
|
2017-09-11 21:23:44 +02:00
|
|
|
l := n.Left
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Save subexpressions needed on left side.
|
|
|
|
|
// Drill through non-dereferences.
|
|
|
|
|
for {
|
|
|
|
|
if l.Op == ODOT || l.Op == OPAREN {
|
|
|
|
|
l = l.Left
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-30 14:45:47 -07:00
|
|
|
if l.Op == OINDEX && l.Left.Type.IsArray() {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
l.Right = reorder3save(l.Right, all, i, &early)
|
2015-02-13 14:40:36 -05:00
|
|
|
l = l.Left
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch l.Op {
|
|
|
|
|
default:
|
2016-04-27 19:34:17 +10:00
|
|
|
Fatalf("reorder3 unexpected lvalue %#v", l.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ONAME:
|
|
|
|
|
break
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OINDEX, OINDEXMAP:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
l.Left = reorder3save(l.Left, all, i, &early)
|
|
|
|
|
l.Right = reorder3save(l.Right, all, i, &early)
|
2015-02-13 14:40:36 -05:00
|
|
|
if l.Op == OINDEXMAP {
|
2016-03-07 22:54:46 -08:00
|
|
|
all[i] = convas(all[i], &mapinit)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
case ODEREF, ODOTPTR:
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
l.Left = reorder3save(l.Left, all, i, &early)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Save expression on right side.
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
all[i].Right = reorder3save(all[i].Right, all, i, &early)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
early = append(mapinit.Slice(), early...)
|
|
|
|
|
return append(early, all...)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// if the evaluation of *np would be affected by the
|
2015-06-10 15:31:53 -07:00
|
|
|
// assignments in all up to but not including the ith assignment,
|
2015-10-22 09:51:12 +09:00
|
|
|
// copy into a temporary during *early and
|
|
|
|
|
// replace *np with that temp.
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
// The result of reorder3save MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = reorder3save(n.Left, all, i, early)
|
|
|
|
|
func reorder3save(n *Node, all []*Node, i int, early *[]*Node) *Node {
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
if !aliased(n, all[:i]) {
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
q := temp(n.Type)
|
2016-09-16 11:00:54 +10:00
|
|
|
q = nod(OAS, q, n)
|
2018-11-18 08:34:38 -08:00
|
|
|
q = typecheck(q, ctxStmt)
|
2016-03-07 22:54:46 -08:00
|
|
|
*early = append(*early, q)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return q.Left
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// what's the outer value that a write to n affects?
|
|
|
|
|
// outer value means containing struct or array.
|
2015-02-13 14:40:36 -05:00
|
|
|
func outervalue(n *Node) *Node {
|
|
|
|
|
for {
|
2017-10-15 22:49:52 +01:00
|
|
|
switch n.Op {
|
|
|
|
|
case OXDOT:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("OXDOT in walk")
|
2017-10-15 22:49:52 +01:00
|
|
|
case ODOT, OPAREN, OCONVNOP:
|
2015-02-13 14:40:36 -05:00
|
|
|
n = n.Left
|
|
|
|
|
continue
|
2017-10-15 22:49:52 +01:00
|
|
|
case OINDEX:
|
|
|
|
|
if n.Left.Type != nil && n.Left.Type.IsArray() {
|
|
|
|
|
n = n.Left
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-10-20 11:56:31 +01:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
// Is it possible that the computation of r might be
|
|
|
|
|
// affected by assignments in all?
|
|
|
|
|
func aliased(r *Node, all []*Node) bool {
|
|
|
|
|
if r == nil {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-31 13:58:33 -07:00
|
|
|
// Treat all fields of a struct as referring to the whole struct.
|
|
|
|
|
// We could do better but we would have to keep track of the fields.
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
for r.Op == ODOT {
|
|
|
|
|
r = r.Left
|
2016-03-31 13:58:33 -07:00
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// Look for obvious aliasing: a variable being assigned
|
|
|
|
|
// during the all list and appearing in n.
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
// Also record whether there are any writes to addressable
|
|
|
|
|
// memory (either main memory or variables whose addresses
|
|
|
|
|
// have been taken).
|
2017-10-24 17:15:30 +01:00
|
|
|
memwrite := false
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
for _, as := range all {
|
|
|
|
|
// We can ignore assignments to blank.
|
|
|
|
|
if as.Left.isBlank() {
|
|
|
|
|
continue
|
2016-03-31 13:58:33 -07:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
l := outervalue(as.Left)
|
|
|
|
|
if l.Op != ONAME {
|
2017-10-24 17:15:30 +01:00
|
|
|
memwrite = true
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
switch l.Class() {
|
2015-02-13 14:40:36 -05:00
|
|
|
default:
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
Fatalf("unexpected class: %v, %v", l, l.Class())
|
|
|
|
|
|
|
|
|
|
case PAUTOHEAP, PEXTERN:
|
|
|
|
|
memwrite = true
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case PAUTO, PPARAM, PPARAMOUT:
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
if l.Name.Addrtaken() {
|
|
|
|
|
memwrite = true
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
if vmatch2(l, r) {
|
|
|
|
|
// Direct hit: l appears in r.
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
// The variables being written do not appear in r.
|
|
|
|
|
// However, r might refer to computed addresses
|
2015-02-13 14:40:36 -05:00
|
|
|
// that are being written.
|
|
|
|
|
|
|
|
|
|
// If no computed addresses are affected by the writes, no aliasing.
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
if !memwrite {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
// If r does not refer to computed addresses
|
|
|
|
|
// (that is, if r only refers to variables whose addresses
|
2015-02-13 14:40:36 -05:00
|
|
|
// have not been taken), no aliasing.
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
if varexpr(r) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix and improve alias detection
"aliased" is the function responsible for detecting whether we can
turn "a, b = x, y" into just "a = x; b = y", or we need to pre-compute
y and save it in a temporary variable because it might depend on a.
It currently has two issues:
1. It suboptimally treats assignments to blank as writes to heap
memory. Users generally won't write "_, b = x, y" directly, but it
comes up a lot in generated code within the compiler.
This CL changes it to ignore blank assignments.
2. When deciding whether the assigned variable might be referenced by
pointers, it mistakenly checks Class() and Name.Addrtaken() on "n"
(the *value* expression being assigned) rather than "a" (the
destination expression).
It doesn't appear to result in correctness issues (i.e.,
incorrectly reporting no aliasing when there is potential aliasing),
due to all the (overly conservative) rewrite passes before code
reaches here. But it generates unnecessary code and could have
correctness issues if we improve those other passes to be more
aggressive.
This CL fixes the misuse of "n" for "a" by renaming the variables
to "r" and "l", respectively, to make their meaning clearer.
Improving these two cases shaves 4.6kB of text from cmd/go, and 93kB
from k8s.io/kubernetes/cmd/kubelet:
text data bss dec hex filename
9732136 290072 231552 10253760 9c75c0 go.before
9727542 290072 231552 10249166 9c63ce go.after
97977637 1007051 301344 99286032 5eafc10 kubelet.before
97884549 1007051 301344 99192944 5e99070 kubelet.after
While here, this CL also collapses "memwrite" and "varwrite" into a
single variable. Logically, they're detecting the same thing: are we
assigning to a memory location that a pointer might alias. There's no
need for two variables.
Updates #6853.
Updates #23017.
Change-Id: I5a307b8e20bcd2196e85c55eb025d3f01e303008
Reviewed-on: https://go-review.googlesource.com/c/go/+/261677
Trust: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-10-12 14:02:36 -07:00
|
|
|
// Otherwise, both the writes and r refer to computed memory addresses.
|
2015-02-13 14:40:36 -05:00
|
|
|
// Assume that they might conflict.
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// does the evaluation of n only refer to variables
|
|
|
|
|
// whose addresses have not been taken?
|
|
|
|
|
// (and no other memory)
|
2015-02-17 22:13:49 -05:00
|
|
|
func varexpr(n *Node) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n == nil {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
2020-11-13 20:38:21 -08:00
|
|
|
case OLITERAL, ONIL:
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ONAME:
|
2017-04-25 18:14:12 -07:00
|
|
|
switch n.Class() {
|
2015-04-01 09:38:44 -07:00
|
|
|
case PAUTO, PPARAM, PPARAMOUT:
|
cmd/compile: move some ONAME-specific flags from Node to Name
The IsClosureVar, IsOutputParamHeapAddr, Assigned, Addrtaken,
InlFormal, and InlLocal flags are only interesting for ONAME nodes, so
it's better to set these flags on Name.flags instead of Node.flags.
Two caveats though:
1. Previously, we would set Assigned and Addrtaken on the entire
expression tree involved in an assignment or addressing operation.
However, the rest of the compiler only actually cares about knowing
whether the underlying ONAME (if any) was assigned/addressed.
2. This actually requires bumping Name.flags from bitset8 to bitset16,
whereas it doesn't allow shrinking Node.flags any. However, Name has
some trailing padding bytes, so expanding Name.flags doesn't cost any
memory.
Passes toolstash-check.
Change-Id: I7775d713566a38d5b9723360b1659b79391744c2
Reviewed-on: https://go-review.googlesource.com/c/go/+/200898
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-10-12 16:21:55 -07:00
|
|
|
if !n.Name.Addrtaken() {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OADD,
|
|
|
|
|
OSUB,
|
|
|
|
|
OOR,
|
|
|
|
|
OXOR,
|
|
|
|
|
OMUL,
|
|
|
|
|
ODIV,
|
|
|
|
|
OMOD,
|
|
|
|
|
OLSH,
|
|
|
|
|
ORSH,
|
|
|
|
|
OAND,
|
|
|
|
|
OANDNOT,
|
|
|
|
|
OPLUS,
|
2018-11-18 08:34:38 -08:00
|
|
|
ONEG,
|
|
|
|
|
OBITNOT,
|
2015-02-13 14:40:36 -05:00
|
|
|
OPAREN,
|
|
|
|
|
OANDAND,
|
|
|
|
|
OOROR,
|
|
|
|
|
OCONV,
|
|
|
|
|
OCONVNOP,
|
|
|
|
|
OCONVIFACE,
|
|
|
|
|
ODOTTYPE:
|
2015-02-17 22:13:49 -05:00
|
|
|
return varexpr(n.Left) && varexpr(n.Right)
|
cmd/compile: change ODOT and friends to use Sym, not Right
The Node type ODOT and its variants all represent a selector, with a
simple name to the right of the dot. Before this change this was
represented by using an ONAME Node in the Right field. This ONAME node
served no useful purpose. This CL changes these Node types to store the
symbol in the Sym field instead, thus not requiring allocating a Node
for each selector.
When compiling x/tools/go/types this CL eliminates nearly 5000 calls to
newname and reduces the total number of Nodes allocated by about 6.6%.
It seems to cut compilation time by 1 to 2 percent.
Getting this right was somewhat subtle, and I added two dubious changes
to produce the exact same output as before. One is to ishairy in
inl.go: the ONAME node increased the cost of ODOT and friends by 1, and
I retained that, although really ODOT is not more expensive than any
other node. The other is to varexpr in walk.go: because the ONAME in
the Right field of an ODOT has no class, varexpr would always return
false for an ODOT, although in fact for some ODOT's it seemingly ought
to return true; I added an && false for now. I will send separate CLs,
that will break toolstash -cmp, to clean these up.
This CL passes toolstash -cmp.
Change-Id: I4af8a10cc59078c436130ce472f25abc3a9b2f80
Reviewed-on: https://go-review.googlesource.com/20890
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-03-18 16:52:30 -07:00
|
|
|
|
|
|
|
|
case ODOT: // but not ODOTPTR
|
2016-03-31 13:58:33 -07:00
|
|
|
// Should have been handled in aliased.
|
|
|
|
|
Fatalf("varexpr unexpected ODOT")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Be conservative.
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// is the name l mentioned in r?
|
2015-02-17 22:13:49 -05:00
|
|
|
func vmatch2(l *Node, r *Node) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if r == nil {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
switch r.Op {
|
|
|
|
|
// match each right given left
|
|
|
|
|
case ONAME:
|
2015-02-17 22:13:49 -05:00
|
|
|
return l == r
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-11-13 20:38:21 -08:00
|
|
|
case OLITERAL, ONIL:
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if vmatch2(l, r.Left) {
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
if vmatch2(l, r.Right) {
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n := range r.List.Slice() {
|
|
|
|
|
if vmatch2(l, n) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// is any name mentioned in l also mentioned in r?
|
|
|
|
|
// called by sinit.go
|
2015-02-17 22:13:49 -05:00
|
|
|
func vmatch1(l *Node, r *Node) bool {
|
2015-10-22 09:51:12 +09:00
|
|
|
// isolate all left sides
|
2015-02-13 14:40:36 -05:00
|
|
|
if l == nil || r == nil {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
switch l.Op {
|
|
|
|
|
case ONAME:
|
2017-04-25 18:14:12 -07:00
|
|
|
switch l.Class() {
|
2016-05-25 10:29:50 -04:00
|
|
|
case PPARAM, PAUTO:
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
|
|
|
|
|
default:
|
2017-03-03 13:38:49 -08:00
|
|
|
// assignment to non-stack variable must be
|
|
|
|
|
// delayed if right has function calls.
|
|
|
|
|
if r.HasCall() {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return vmatch2(l, r)
|
|
|
|
|
|
2020-11-13 20:38:21 -08:00
|
|
|
case OLITERAL, ONIL:
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
if vmatch1(l.Left, r) {
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
if vmatch1(l.Right, r) {
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n := range l.List.Slice() {
|
|
|
|
|
if vmatch1(n, r) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-09 18:54:26 -08:00
|
|
|
// paramstoheap returns code to allocate memory for heap-escaped parameters
|
2017-04-27 09:27:52 -07:00
|
|
|
// and to copy non-result parameters' values from the stack.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func paramstoheap(params *types.Type) []*Node {
|
2016-02-26 14:28:48 -08:00
|
|
|
var nn []*Node
|
2016-03-17 01:32:18 -07:00
|
|
|
for _, t := range params.Fields().Slice() {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
v := asNode(t.Nname)
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if v != nil && v.Sym != nil && strings.HasPrefix(v.Sym.Name, "~r") { // unnamed result
|
|
|
|
|
v = nil
|
|
|
|
|
}
|
|
|
|
|
if v == nil {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if stackcopy := v.Name.Param.Stackcopy; stackcopy != nil {
|
2016-09-16 11:00:54 +10:00
|
|
|
nn = append(nn, walkstmt(nod(ODCL, v, nil)))
|
2017-04-25 18:14:12 -07:00
|
|
|
if stackcopy.Class() == PPARAM {
|
2018-11-18 08:34:38 -08:00
|
|
|
nn = append(nn, walkstmt(typecheck(nod(OAS, v, stackcopy), ctxStmt)))
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nn
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 10:38:45 -08:00
|
|
|
// zeroResults zeros the return values at the start of the function.
|
|
|
|
|
// We need to do this very early in the function. Defer might stop a
|
|
|
|
|
// panic and show the return values as they exist at the time of
|
|
|
|
|
// panic. For precise stacks, the garbage collector assumes results
|
|
|
|
|
// are always live, so we need to zero them before any allocations,
|
|
|
|
|
// even allocations to move params/results to the heap.
|
|
|
|
|
// The generated code is added to Curfn's Enter list.
|
|
|
|
|
func zeroResults() {
|
|
|
|
|
for _, f := range Curfn.Type.Results().Fields().Slice() {
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
v := asNode(f.Nname)
|
|
|
|
|
if v != nil && v.Name.Param.Heapaddr != nil {
|
2017-03-09 10:38:45 -08:00
|
|
|
// The local which points to the return value is the
|
|
|
|
|
// thing that needs zeroing. This is already handled
|
|
|
|
|
// by a Needzero annotation in plive.go:livenessepilogue.
|
|
|
|
|
continue
|
|
|
|
|
}
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
if v.isParamHeapCopy() {
|
|
|
|
|
// TODO(josharian/khr): Investigate whether we can switch to "continue" here,
|
|
|
|
|
// and document more in either case.
|
|
|
|
|
// In the review of CL 114797, Keith wrote (roughly):
|
|
|
|
|
// I don't think the zeroing below matters.
|
|
|
|
|
// The stack return value will never be marked as live anywhere in the function.
|
|
|
|
|
// It is not written to until deferreturn returns.
|
|
|
|
|
v = v.Name.Param.Stackcopy
|
|
|
|
|
}
|
2017-03-09 10:38:45 -08:00
|
|
|
// Zero the stack location containing f.
|
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
|
|
|
Curfn.Func.Enter.Append(nodl(Curfn.Pos, OAS, v, nil))
|
2017-03-09 10:38:45 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-09 18:54:26 -08:00
|
|
|
// returnsfromheap returns code to copy values for heap-escaped parameters
|
|
|
|
|
// back to the stack.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func returnsfromheap(params *types.Type) []*Node {
|
2016-02-26 14:28:48 -08:00
|
|
|
var nn []*Node
|
2016-03-17 01:32:18 -07:00
|
|
|
for _, t := range params.Fields().Slice() {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
v := asNode(t.Nname)
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
if v == nil {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
2017-04-25 18:14:12 -07:00
|
|
|
if stackcopy := v.Name.Param.Stackcopy; stackcopy != nil && stackcopy.Class() == PPARAMOUT {
|
2018-11-18 08:34:38 -08:00
|
|
|
nn = append(nn, walkstmt(typecheck(nod(OAS, stackcopy, v), ctxStmt)))
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nn
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-09 18:54:26 -08:00
|
|
|
// heapmoves generates code to handle migrating heap-escaped parameters
|
|
|
|
|
// between the stack and the heap. The generated code is added to Curfn's
|
|
|
|
|
// Enter and Exit lists.
|
2015-02-13 14:40:36 -05:00
|
|
|
func heapmoves() {
|
2015-02-23 16:07:24 -05:00
|
|
|
lno := lineno
|
2016-12-07 17:40:46 -08:00
|
|
|
lineno = Curfn.Pos
|
2016-05-25 10:01:58 -04:00
|
|
|
nn := paramstoheap(Curfn.Type.Recvs())
|
|
|
|
|
nn = append(nn, paramstoheap(Curfn.Type.Params())...)
|
|
|
|
|
nn = append(nn, paramstoheap(Curfn.Type.Results())...)
|
2016-02-26 14:28:48 -08:00
|
|
|
Curfn.Func.Enter.Append(nn...)
|
2015-03-25 19:33:01 -07:00
|
|
|
lineno = Curfn.Func.Endlineno
|
2016-03-09 18:54:26 -08:00
|
|
|
Curfn.Func.Exit.Append(returnsfromheap(Curfn.Type.Results())...)
|
2015-02-13 14:40:36 -05:00
|
|
|
lineno = lno
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func vmkcall(fn *Node, t *types.Type, init *Nodes, va []*Node) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
if fn.Type == nil || fn.Type.Etype != TFUNC {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("mkcall %v %v", fn, fn.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-05-02 09:16:22 -07:00
|
|
|
n := fn.Type.NumParams()
|
2017-08-18 18:40:12 +02:00
|
|
|
if n != len(va) {
|
|
|
|
|
Fatalf("vmkcall %v needs %v args got %v", fn, n, len(va))
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
r := nod(OCALL, fn, nil)
|
2017-08-18 18:40:12 +02:00
|
|
|
r.List.Set(va)
|
2017-05-02 09:16:22 -07:00
|
|
|
if fn.Type.NumResults() > 0 {
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxExpr|ctxMultiOK)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxStmt)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = walkexpr(r, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
r.Type = t
|
|
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func mkcall(name string, t *types.Type, init *Nodes, args ...*Node) *Node {
|
2016-03-04 15:19:06 -08:00
|
|
|
return vmkcall(syslook(name), t, init, args)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func mkcall1(fn *Node, t *types.Type, init *Nodes, args ...*Node) *Node {
|
2015-02-13 14:40:36 -05:00
|
|
|
return vmkcall(fn, t, init, args)
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func conv(n *Node, t *types.Type) *Node {
|
2018-10-18 15:24:50 -07:00
|
|
|
if types.Identical(n.Type, t) {
|
2015-02-13 14:40:36 -05:00
|
|
|
return n
|
|
|
|
|
}
|
2016-09-16 11:00:54 +10:00
|
|
|
n = nod(OCONV, n, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Type = t
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxExpr)
|
2015-02-13 14:40:36 -05:00
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-10 13:25:39 +02:00
|
|
|
// convnop converts node n to type t using the OCONVNOP op
|
2018-11-18 08:34:38 -08:00
|
|
|
// and typechecks the result with ctxExpr.
|
2018-05-10 13:25:39 +02:00
|
|
|
func convnop(n *Node, t *types.Type) *Node {
|
2019-10-17 13:16:15 -07:00
|
|
|
if types.Identical(n.Type, t) {
|
|
|
|
|
return n
|
|
|
|
|
}
|
2018-05-10 13:25:39 +02:00
|
|
|
n = nod(OCONVNOP, n, nil)
|
|
|
|
|
n.Type = t
|
2018-11-18 08:34:38 -08:00
|
|
|
n = typecheck(n, ctxExpr)
|
2018-05-10 13:25:39 +02:00
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-03 21:03:40 +00:00
|
|
|
// byteindex converts n, which is byte-sized, to an int used to index into an array.
|
|
|
|
|
// We cannot use conv, because we allow converting bool to int here,
|
2017-01-21 19:52:09 -08:00
|
|
|
// which is forbidden in user code.
|
|
|
|
|
func byteindex(n *Node) *Node {
|
2020-03-03 21:03:40 +00:00
|
|
|
// We cannot convert from bool to int directly.
|
|
|
|
|
// While converting from int8 to int is possible, it would yield
|
|
|
|
|
// the wrong result for negative values.
|
|
|
|
|
// Reinterpreting the value as an unsigned byte solves both cases.
|
|
|
|
|
if !types.Identical(n.Type, types.Types[TUINT8]) {
|
|
|
|
|
n = nod(OCONV, n, nil)
|
|
|
|
|
n.Type = types.Types[TUINT8]
|
|
|
|
|
n.SetTypecheck(1)
|
2017-01-21 19:52:09 -08:00
|
|
|
}
|
|
|
|
|
n = nod(OCONV, n, nil)
|
2020-03-03 21:03:40 +00:00
|
|
|
n.Type = types.Types[TINT]
|
2017-04-25 18:02:43 -07:00
|
|
|
n.SetTypecheck(1)
|
2017-01-21 19:52:09 -08:00
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func chanfn(name string, n int, t *types.Type) *Node {
|
2016-03-30 14:56:08 -07:00
|
|
|
if !t.IsChan() {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("chanfn %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook(name)
|
2015-03-08 13:33:49 -04:00
|
|
|
switch n {
|
|
|
|
|
default:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("chanfn %d", n)
|
2015-03-08 13:33:49 -04:00
|
|
|
case 1:
|
2016-03-30 10:57:47 -07:00
|
|
|
fn = substArgTypes(fn, t.Elem())
|
2015-03-08 13:33:49 -04:00
|
|
|
case 2:
|
2016-03-30 10:57:47 -07:00
|
|
|
fn = substArgTypes(fn, t.Elem(), t.Elem())
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
return fn
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func mapfn(name string, t *types.Type) *Node {
|
2016-03-30 14:56:08 -07:00
|
|
|
if !t.IsMap() {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("mapfn %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook(name)
|
2018-04-24 13:53:35 -07:00
|
|
|
fn = substArgTypes(fn, t.Key(), t.Elem(), t.Key(), t.Elem())
|
2015-02-13 14:40:36 -05:00
|
|
|
return fn
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func mapfndel(name string, t *types.Type) *Node {
|
2016-03-30 14:56:08 -07:00
|
|
|
if !t.IsMap() {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("mapfn %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook(name)
|
2018-04-24 13:53:35 -07:00
|
|
|
fn = substArgTypes(fn, t.Key(), t.Elem(), t.Key())
|
2015-02-13 14:40:36 -05:00
|
|
|
return fn
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-14 11:11:28 -07:00
|
|
|
const (
|
|
|
|
|
mapslow = iota
|
|
|
|
|
mapfast32
|
2017-11-21 07:14:11 -08:00
|
|
|
mapfast32ptr
|
2017-03-14 11:11:28 -07:00
|
|
|
mapfast64
|
2017-11-21 07:14:11 -08:00
|
|
|
mapfast64ptr
|
2017-03-14 11:11:28 -07:00
|
|
|
mapfaststr
|
|
|
|
|
nmapfast
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type mapnames [nmapfast]string
|
|
|
|
|
|
2017-11-21 07:14:11 -08:00
|
|
|
func mkmapnames(base string, ptr string) mapnames {
|
|
|
|
|
return mapnames{base, base + "_fast32", base + "_fast32" + ptr, base + "_fast64", base + "_fast64" + ptr, base + "_faststr"}
|
2017-02-09 14:00:23 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-21 07:14:11 -08:00
|
|
|
var mapaccess1 = mkmapnames("mapaccess1", "")
|
|
|
|
|
var mapaccess2 = mkmapnames("mapaccess2", "")
|
|
|
|
|
var mapassign = mkmapnames("mapassign", "ptr")
|
|
|
|
|
var mapdelete = mkmapnames("mapdelete", "")
|
2017-03-14 11:11:28 -07:00
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func mapfast(t *types.Type) int {
|
2019-04-22 13:37:08 -07:00
|
|
|
// Check runtime/map.go:maxElemSize before changing.
|
2018-04-24 13:53:35 -07:00
|
|
|
if t.Elem().Width > 128 {
|
2017-03-14 11:11:28 -07:00
|
|
|
return mapslow
|
2017-03-12 14:47:59 -07:00
|
|
|
}
|
|
|
|
|
switch algtype(t.Key()) {
|
|
|
|
|
case AMEM32:
|
2020-08-21 20:20:12 -07:00
|
|
|
if !t.Key().HasPointers() {
|
2017-11-21 07:14:11 -08:00
|
|
|
return mapfast32
|
|
|
|
|
}
|
|
|
|
|
if Widthptr == 4 {
|
|
|
|
|
return mapfast32ptr
|
|
|
|
|
}
|
|
|
|
|
Fatalf("small pointer %v", t.Key())
|
2017-03-12 14:47:59 -07:00
|
|
|
case AMEM64:
|
2020-08-21 20:20:12 -07:00
|
|
|
if !t.Key().HasPointers() {
|
2017-11-21 07:14:11 -08:00
|
|
|
return mapfast64
|
|
|
|
|
}
|
|
|
|
|
if Widthptr == 8 {
|
|
|
|
|
return mapfast64ptr
|
|
|
|
|
}
|
|
|
|
|
// Two-word object, at least one of which is a pointer.
|
|
|
|
|
// Use the slow path.
|
2017-03-12 14:47:59 -07:00
|
|
|
case ASTRING:
|
2017-03-14 11:11:28 -07:00
|
|
|
return mapfaststr
|
2017-03-12 14:47:59 -07:00
|
|
|
}
|
2017-03-14 11:11:28 -07:00
|
|
|
return mapslow
|
2017-03-12 14:47:59 -07:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func writebarrierfn(name string, l *types.Type, r *types.Type) *Node {
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook(name)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
fn = substArgTypes(fn, l, r)
|
2015-02-13 14:40:36 -05:00
|
|
|
return fn
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 22:54:46 -08:00
|
|
|
func addstr(n *Node, init *Nodes) *Node {
|
2019-11-05 23:56:35 +07:00
|
|
|
// order.expr rewrote OADDSTR to have a list of strings.
|
2016-03-08 15:10:26 -08:00
|
|
|
c := n.List.Len()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if c < 2 {
|
2017-03-14 09:46:45 -07:00
|
|
|
Fatalf("addstr count %d too small", c)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
buf := nodnil()
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Esc == EscNone {
|
2015-02-23 16:07:24 -05:00
|
|
|
sz := int64(0)
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n1 := range n.List.Slice() {
|
|
|
|
|
if n1.Op == OLITERAL {
|
2020-10-12 15:02:59 +02:00
|
|
|
sz += int64(len(n1.StringVal()))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Don't allocate the buffer if the result won't fit.
|
|
|
|
|
if sz < tmpstringbufsize {
|
|
|
|
|
// Create temporary buffer for result string on stack.
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
|
2016-09-16 11:00:54 +10:00
|
|
|
buf = nod(OADDR, temp(t), nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// build list of string arguments
|
2016-03-03 20:48:00 -08:00
|
|
|
args := []*Node{buf}
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n2 := range n.List.Slice() {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
args = append(args, conv(n2, types.Types[TSTRING]))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-03-06 12:02:24 -08:00
|
|
|
var fn string
|
2015-02-13 14:40:36 -05:00
|
|
|
if c <= 5 {
|
|
|
|
|
// small numbers of strings use direct runtime helpers.
|
2019-11-05 23:56:35 +07:00
|
|
|
// note: order.expr knows this cutoff too.
|
2015-03-06 12:02:24 -08:00
|
|
|
fn = fmt.Sprintf("concatstring%d", c)
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
|
|
|
|
// large numbers of strings are passed to the runtime as a slice.
|
2015-03-06 12:02:24 -08:00
|
|
|
fn = "concatstrings"
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
t := types.NewSlice(types.Types[TSTRING])
|
2016-09-16 11:00:54 +10:00
|
|
|
slice := nod(OCOMPLIT, nil, typenod(t))
|
2015-05-26 23:05:35 -04:00
|
|
|
if prealloc[n] != nil {
|
|
|
|
|
prealloc[slice] = prealloc[n]
|
|
|
|
|
}
|
2016-03-08 15:10:26 -08:00
|
|
|
slice.List.Set(args[1:]) // skip buf arg
|
2016-04-23 22:59:01 -07:00
|
|
|
args = []*Node{buf, slice}
|
2015-02-13 14:40:36 -05:00
|
|
|
slice.Esc = EscNone
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-04 15:19:06 -08:00
|
|
|
cat := syslook(fn)
|
2016-09-16 11:00:54 +10:00
|
|
|
r := nod(OCALL, cat, nil)
|
2016-03-08 15:10:26 -08:00
|
|
|
r.List.Set(args)
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxExpr)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
r = walkexpr(r, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
r.Type = n.Type
|
|
|
|
|
|
|
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-26 18:30:11 +02:00
|
|
|
func walkAppendArgs(n *Node, init *Nodes) {
|
|
|
|
|
walkexprlistsafe(n.List.Slice(), init)
|
|
|
|
|
|
|
|
|
|
// walkexprlistsafe will leave OINDEX (s[n]) alone if both s
|
|
|
|
|
// and n are name or literal, but those may index the slice we're
|
|
|
|
|
// modifying here. Fix explicitly.
|
|
|
|
|
ls := n.List.Slice()
|
|
|
|
|
for i1, n1 := range ls {
|
|
|
|
|
ls[i1] = cheapexpr(n1, init)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// expand append(l1, l2...) to
|
|
|
|
|
// init {
|
|
|
|
|
// s := l1
|
2016-03-13 15:22:45 -07:00
|
|
|
// n := len(s) + len(l2)
|
|
|
|
|
// // Compare as uint so growslice can panic on overflow.
|
|
|
|
|
// if uint(n) > uint(cap(s)) {
|
|
|
|
|
// s = growslice(s, n)
|
2015-02-13 14:40:36 -05:00
|
|
|
// }
|
2016-03-13 15:22:45 -07:00
|
|
|
// s = s[:n]
|
2015-02-13 14:40:36 -05:00
|
|
|
// memmove(&s[len(l1)], &l2[0], len(l2)*sizeof(T))
|
|
|
|
|
// }
|
|
|
|
|
// s
|
|
|
|
|
//
|
|
|
|
|
// l2 is allowed to be a string.
|
2016-03-07 22:54:46 -08:00
|
|
|
func appendslice(n *Node, init *Nodes) *Node {
|
2018-04-26 18:30:11 +02:00
|
|
|
walkAppendArgs(n, init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
l1 := n.List.First()
|
|
|
|
|
l2 := n.List.Second()
|
2020-01-31 21:01:55 -08:00
|
|
|
l2 = cheapexpr(l2, init)
|
|
|
|
|
n.List.SetSecond(l2)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-05-10 09:28:04 +02:00
|
|
|
var nodes Nodes
|
2016-03-13 15:22:45 -07:00
|
|
|
|
|
|
|
|
// var s []T
|
|
|
|
|
s := temp(l1.Type)
|
2018-05-10 09:28:04 +02:00
|
|
|
nodes.Append(nod(OAS, s, l1)) // s = l1
|
|
|
|
|
|
|
|
|
|
elemtype := s.Type.Elem()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-13 15:22:45 -07:00
|
|
|
// n := len(s) + len(l2)
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
nn := temp(types.Types[TINT])
|
2018-05-10 09:28:04 +02:00
|
|
|
nodes.Append(nod(OAS, nn, nod(OADD, nod(OLEN, s, nil), nod(OLEN, l2, nil))))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-13 15:22:45 -07:00
|
|
|
// if uint(n) > uint(cap(s))
|
2016-09-16 11:00:54 +10:00
|
|
|
nif := nod(OIF, nil, nil)
|
2018-05-10 09:28:04 +02:00
|
|
|
nuint := conv(nn, types.Types[TUINT])
|
|
|
|
|
scapuint := conv(nod(OCAP, s, nil), types.Types[TUINT])
|
|
|
|
|
nif.Left = nod(OGT, nuint, scapuint)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-05-10 09:28:04 +02:00
|
|
|
// instantiate growslice(typ *type, []any, int) []any
|
2016-03-13 15:22:45 -07:00
|
|
|
fn := syslook("growslice")
|
2018-05-10 09:28:04 +02:00
|
|
|
fn = substArgTypes(fn, elemtype, elemtype)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-13 15:22:45 -07:00
|
|
|
// s = growslice(T, s, n)
|
2018-05-10 09:28:04 +02:00
|
|
|
nif.Nbody.Set1(nod(OAS, s, mkcall1(fn, s.Type, &nif.Ninit, typename(elemtype), s, nn)))
|
|
|
|
|
nodes.Append(nif)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-13 15:22:45 -07:00
|
|
|
// s = s[:n]
|
2016-09-16 11:00:54 +10:00
|
|
|
nt := nod(OSLICE, s, nil)
|
2016-04-21 11:55:33 -07:00
|
|
|
nt.SetSliceBounds(nil, nn, nil)
|
2018-06-16 01:22:07 +02:00
|
|
|
nt.SetBounded(true)
|
2018-05-10 09:28:04 +02:00
|
|
|
nodes.Append(nod(OAS, s, nt))
|
2016-03-13 15:22:45 -07:00
|
|
|
|
2018-05-10 09:28:04 +02:00
|
|
|
var ncopy *Node
|
2020-08-21 20:20:12 -07:00
|
|
|
if elemtype.HasPointers() {
|
2016-03-13 15:22:45 -07:00
|
|
|
// copy(s[len(l1):], l2)
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr1 := nod(OSLICE, s, nil)
|
2020-01-31 21:01:55 -08:00
|
|
|
nptr1.Type = s.Type
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr1.SetSliceBounds(nod(OLEN, l1, nil), nil, nil)
|
2020-01-31 21:01:55 -08:00
|
|
|
nptr1 = cheapexpr(nptr1, &nodes)
|
2018-05-10 09:28:04 +02:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
nptr2 := l2
|
2018-05-10 09:28:04 +02:00
|
|
|
|
2017-10-24 17:10:02 -04:00
|
|
|
Curfn.Func.setWBPos(n.Pos)
|
2018-05-10 09:28:04 +02:00
|
|
|
|
2020-01-31 21:01:55 -08:00
|
|
|
// instantiate typedslicecopy(typ *type, dstPtr *any, dstLen int, srcPtr *any, srcLen int) int
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook("typedslicecopy")
|
2020-01-31 21:01:55 -08:00
|
|
|
fn = substArgTypes(fn, l1.Type.Elem(), l2.Type.Elem())
|
2020-09-14 16:30:43 +02:00
|
|
|
ptr1, len1 := nptr1.backingArrayPtrLen()
|
|
|
|
|
ptr2, len2 := nptr2.backingArrayPtrLen()
|
2020-01-31 21:01:55 -08:00
|
|
|
ncopy = mkcall1(fn, types.Types[TINT], &nodes, typename(elemtype), ptr1, len1, ptr2, len2)
|
2016-09-21 09:44:40 -07:00
|
|
|
} else if instrumenting && !compiling_runtime {
|
2020-09-14 16:30:43 +02:00
|
|
|
// rely on runtime to instrument:
|
|
|
|
|
// copy(s[len(l1):], l2)
|
|
|
|
|
// l2 can be a slice or string.
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr1 := nod(OSLICE, s, nil)
|
2020-01-31 21:01:55 -08:00
|
|
|
nptr1.Type = s.Type
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr1.SetSliceBounds(nod(OLEN, l1, nil), nil, nil)
|
2020-01-31 21:01:55 -08:00
|
|
|
nptr1 = cheapexpr(nptr1, &nodes)
|
2015-02-23 16:07:24 -05:00
|
|
|
nptr2 := l2
|
2017-08-18 18:40:12 +02:00
|
|
|
|
2020-09-14 16:30:43 +02:00
|
|
|
ptr1, len1 := nptr1.backingArrayPtrLen()
|
|
|
|
|
ptr2, len2 := nptr2.backingArrayPtrLen()
|
2017-08-18 18:40:12 +02:00
|
|
|
|
2020-09-14 16:30:43 +02:00
|
|
|
fn := syslook("slicecopy")
|
|
|
|
|
fn = substArgTypes(fn, ptr1.Type.Elem(), ptr2.Type.Elem())
|
|
|
|
|
ncopy = mkcall1(fn, types.Types[TINT], &nodes, ptr1, len1, ptr2, len2, nodintconst(elemtype.Width))
|
2015-02-13 14:40:36 -05:00
|
|
|
} else {
|
|
|
|
|
// memmove(&s[len(l1)], &l2[0], len(l2)*sizeof(T))
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr1 := nod(OINDEX, s, nod(OLEN, l1, nil))
|
2017-02-27 19:56:38 +02:00
|
|
|
nptr1.SetBounded(true)
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr1 = nod(OADDR, nptr1, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
nptr2 := nod(OSPTR, l2, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-05-10 09:28:04 +02:00
|
|
|
nwid := cheapexpr(conv(nod(OLEN, l2, nil), types.Types[TUINTPTR]), &nodes)
|
|
|
|
|
nwid = nod(OMUL, nwid, nodintconst(elemtype.Width))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-05-10 09:28:04 +02:00
|
|
|
// instantiate func memmove(to *any, frm *any, length uintptr)
|
|
|
|
|
fn := syslook("memmove")
|
|
|
|
|
fn = substArgTypes(fn, elemtype, elemtype)
|
|
|
|
|
ncopy = mkcall1(fn, nil, &nodes, nptr1, nptr2, nwid)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-05-10 09:28:04 +02:00
|
|
|
ln := append(nodes.Slice(), ncopy)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
typecheckslice(ln, ctxStmt)
|
2018-05-10 09:28:04 +02:00
|
|
|
walkstmtlist(ln)
|
|
|
|
|
init.Append(ln...)
|
2015-02-13 14:40:36 -05:00
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-26 18:30:11 +02:00
|
|
|
// isAppendOfMake reports whether n is of the form append(x , make([]T, y)...).
|
|
|
|
|
// isAppendOfMake assumes n has already been typechecked.
|
|
|
|
|
func isAppendOfMake(n *Node) bool {
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.N != 0 || instrumenting {
|
2018-04-26 18:30:11 +02:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Typecheck() == 0 {
|
|
|
|
|
Fatalf("missing typecheck: %+v", n)
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
if n.Op != OAPPEND || !n.IsDDD() || n.List.Len() != 2 {
|
2018-04-26 18:30:11 +02:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
second := n.List.Second()
|
2018-05-06 20:18:52 +02:00
|
|
|
if second.Op != OMAKESLICE || second.Right != nil {
|
2018-04-26 18:30:11 +02:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-17 01:15:53 +07:00
|
|
|
// y must be either an integer constant or the largest possible positive value
|
|
|
|
|
// of variable y needs to fit into an uint.
|
|
|
|
|
|
|
|
|
|
// typecheck made sure that constant arguments to make are not negative and fit into an int.
|
|
|
|
|
|
|
|
|
|
// The care of overflow of the len argument to make will be handled by an explicit check of int(len) < 0 during runtime.
|
2018-04-26 18:30:11 +02:00
|
|
|
y := second.Left
|
2020-11-23 13:42:43 -08:00
|
|
|
if !Isconst(y, constant.Int) && y.Type.Size() > types.Types[TUINT].Size() {
|
2018-04-26 18:30:11 +02:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// extendslice rewrites append(l1, make([]T, l2)...) to
|
|
|
|
|
// init {
|
2019-09-13 09:58:35 +07:00
|
|
|
// if l2 >= 0 { // Empty if block here for more meaningful node.SetLikely(true)
|
|
|
|
|
// } else {
|
2018-04-26 18:30:11 +02:00
|
|
|
// panicmakeslicelen()
|
|
|
|
|
// }
|
|
|
|
|
// s := l1
|
|
|
|
|
// n := len(s) + l2
|
|
|
|
|
// // Compare n and s as uint so growslice can panic on overflow of len(s) + l2.
|
|
|
|
|
// // cap is a positive int and n can become negative when len(s) + l2
|
|
|
|
|
// // overflows int. Interpreting n when negative as uint makes it larger
|
|
|
|
|
// // than cap(s). growslice will check the int n arg and panic if n is
|
|
|
|
|
// // negative. This prevents the overflow from being undetected.
|
|
|
|
|
// if uint(n) > uint(cap(s)) {
|
|
|
|
|
// s = growslice(T, s, n)
|
|
|
|
|
// }
|
|
|
|
|
// s = s[:n]
|
|
|
|
|
// lptr := &l1[0]
|
|
|
|
|
// sptr := &s[0]
|
2020-08-22 14:07:30 -07:00
|
|
|
// if lptr == sptr || !T.HasPointers() {
|
2018-04-26 18:30:11 +02:00
|
|
|
// // growslice did not clear the whole underlying array (or did not get called)
|
|
|
|
|
// hp := &s[len(l1)]
|
|
|
|
|
// hn := l2 * sizeof(T)
|
|
|
|
|
// memclr(hp, hn)
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// s
|
|
|
|
|
func extendslice(n *Node, init *Nodes) *Node {
|
2019-06-17 01:15:53 +07:00
|
|
|
// isAppendOfMake made sure all possible positive values of l2 fit into an uint.
|
|
|
|
|
// The case of l2 overflow when converting from e.g. uint to int is handled by an explicit
|
|
|
|
|
// check of l2 < 0 at runtime which is generated below.
|
2018-04-26 18:30:11 +02:00
|
|
|
l2 := conv(n.List.Second().Left, types.Types[TINT])
|
2018-11-18 08:34:38 -08:00
|
|
|
l2 = typecheck(l2, ctxExpr)
|
2018-04-26 18:30:11 +02:00
|
|
|
n.List.SetSecond(l2) // walkAppendArgs expects l2 in n.List.Second().
|
|
|
|
|
|
|
|
|
|
walkAppendArgs(n, init)
|
|
|
|
|
|
|
|
|
|
l1 := n.List.First()
|
|
|
|
|
l2 = n.List.Second() // re-read l2, as it may have been updated by walkAppendArgs
|
|
|
|
|
|
|
|
|
|
var nodes []*Node
|
|
|
|
|
|
2019-09-13 09:58:35 +07:00
|
|
|
// if l2 >= 0 (likely happens), do nothing
|
|
|
|
|
nifneg := nod(OIF, nod(OGE, l2, nodintconst(0)), nil)
|
|
|
|
|
nifneg.SetLikely(true)
|
2018-04-26 18:30:11 +02:00
|
|
|
|
2019-09-13 09:58:35 +07:00
|
|
|
// else panicmakeslicelen()
|
|
|
|
|
nifneg.Rlist.Set1(mkcall("panicmakeslicelen", nil, init))
|
2018-04-26 18:30:11 +02:00
|
|
|
nodes = append(nodes, nifneg)
|
|
|
|
|
|
|
|
|
|
// s := l1
|
|
|
|
|
s := temp(l1.Type)
|
|
|
|
|
nodes = append(nodes, nod(OAS, s, l1))
|
|
|
|
|
|
|
|
|
|
elemtype := s.Type.Elem()
|
|
|
|
|
|
|
|
|
|
// n := len(s) + l2
|
|
|
|
|
nn := temp(types.Types[TINT])
|
|
|
|
|
nodes = append(nodes, nod(OAS, nn, nod(OADD, nod(OLEN, s, nil), l2)))
|
|
|
|
|
|
|
|
|
|
// if uint(n) > uint(cap(s))
|
2018-05-06 20:18:52 +02:00
|
|
|
nuint := conv(nn, types.Types[TUINT])
|
|
|
|
|
capuint := conv(nod(OCAP, s, nil), types.Types[TUINT])
|
2018-04-26 18:30:11 +02:00
|
|
|
nif := nod(OIF, nod(OGT, nuint, capuint), nil)
|
|
|
|
|
|
|
|
|
|
// instantiate growslice(typ *type, old []any, newcap int) []any
|
|
|
|
|
fn := syslook("growslice")
|
|
|
|
|
fn = substArgTypes(fn, elemtype, elemtype)
|
|
|
|
|
|
|
|
|
|
// s = growslice(T, s, n)
|
|
|
|
|
nif.Nbody.Set1(nod(OAS, s, mkcall1(fn, s.Type, &nif.Ninit, typename(elemtype), s, nn)))
|
|
|
|
|
nodes = append(nodes, nif)
|
|
|
|
|
|
|
|
|
|
// s = s[:n]
|
|
|
|
|
nt := nod(OSLICE, s, nil)
|
|
|
|
|
nt.SetSliceBounds(nil, nn, nil)
|
2018-06-16 01:22:07 +02:00
|
|
|
nt.SetBounded(true)
|
2018-04-26 18:30:11 +02:00
|
|
|
nodes = append(nodes, nod(OAS, s, nt))
|
|
|
|
|
|
|
|
|
|
// lptr := &l1[0]
|
|
|
|
|
l1ptr := temp(l1.Type.Elem().PtrTo())
|
|
|
|
|
tmp := nod(OSPTR, l1, nil)
|
|
|
|
|
nodes = append(nodes, nod(OAS, l1ptr, tmp))
|
|
|
|
|
|
|
|
|
|
// sptr := &s[0]
|
|
|
|
|
sptr := temp(elemtype.PtrTo())
|
|
|
|
|
tmp = nod(OSPTR, s, nil)
|
|
|
|
|
nodes = append(nodes, nod(OAS, sptr, tmp))
|
|
|
|
|
|
|
|
|
|
// hp := &s[len(l1)]
|
2018-05-10 10:10:36 +02:00
|
|
|
hp := nod(OINDEX, s, nod(OLEN, l1, nil))
|
|
|
|
|
hp.SetBounded(true)
|
|
|
|
|
hp = nod(OADDR, hp, nil)
|
2018-05-10 13:25:39 +02:00
|
|
|
hp = convnop(hp, types.Types[TUNSAFEPTR])
|
2018-04-26 18:30:11 +02:00
|
|
|
|
|
|
|
|
// hn := l2 * sizeof(elem(s))
|
2018-05-10 10:10:36 +02:00
|
|
|
hn := nod(OMUL, l2, nodintconst(elemtype.Width))
|
|
|
|
|
hn = conv(hn, types.Types[TUINTPTR])
|
2018-04-26 18:30:11 +02:00
|
|
|
|
|
|
|
|
clrname := "memclrNoHeapPointers"
|
2020-08-22 14:07:30 -07:00
|
|
|
hasPointers := elemtype.HasPointers()
|
2018-04-26 18:30:11 +02:00
|
|
|
if hasPointers {
|
|
|
|
|
clrname = "memclrHasPointers"
|
2018-12-05 15:21:17 -05:00
|
|
|
Curfn.Func.setWBPos(n.Pos)
|
2018-04-26 18:30:11 +02:00
|
|
|
}
|
2018-05-10 10:10:36 +02:00
|
|
|
|
|
|
|
|
var clr Nodes
|
|
|
|
|
clrfn := mkcall(clrname, nil, &clr, hp, hn)
|
|
|
|
|
clr.Append(clrfn)
|
2018-04-26 18:30:11 +02:00
|
|
|
|
|
|
|
|
if hasPointers {
|
|
|
|
|
// if l1ptr == sptr
|
|
|
|
|
nifclr := nod(OIF, nod(OEQ, l1ptr, sptr), nil)
|
2018-05-10 10:10:36 +02:00
|
|
|
nifclr.Nbody = clr
|
2018-04-26 18:30:11 +02:00
|
|
|
nodes = append(nodes, nifclr)
|
|
|
|
|
} else {
|
2018-05-10 10:10:36 +02:00
|
|
|
nodes = append(nodes, clr.Slice()...)
|
2018-04-26 18:30:11 +02:00
|
|
|
}
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
typecheckslice(nodes, ctxStmt)
|
2018-04-26 18:30:11 +02:00
|
|
|
walkstmtlist(nodes)
|
|
|
|
|
init.Append(nodes...)
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
// Rewrite append(src, x, y, z) so that any side effects in
|
|
|
|
|
// x, y, z (including runtime panics) are evaluated in
|
|
|
|
|
// initialization statements before the append.
|
|
|
|
|
// For normal code generation, stop there and leave the
|
|
|
|
|
// rest to cgen_append.
|
|
|
|
|
//
|
|
|
|
|
// For race detector, expand append(src, a [, b]* ) to
|
2015-02-13 14:40:36 -05:00
|
|
|
//
|
|
|
|
|
// init {
|
|
|
|
|
// s := src
|
|
|
|
|
// const argc = len(args) - 1
|
|
|
|
|
// if cap(s) - len(s) < argc {
|
2015-06-25 19:27:20 -04:00
|
|
|
// s = growslice(s, len(s)+argc)
|
2015-02-13 14:40:36 -05:00
|
|
|
// }
|
|
|
|
|
// n := len(s)
|
|
|
|
|
// s = s[:n+argc]
|
|
|
|
|
// s[n] = a
|
|
|
|
|
// s[n+1] = b
|
|
|
|
|
// ...
|
|
|
|
|
// }
|
|
|
|
|
// s
|
2016-03-07 22:54:46 -08:00
|
|
|
func walkappend(n *Node, init *Nodes, dst *Node) *Node {
|
2016-03-08 15:10:26 -08:00
|
|
|
if !samesafeexpr(dst, n.List.First()) {
|
2017-02-19 15:57:58 +01:00
|
|
|
n.List.SetFirst(safeexpr(n.List.First(), init))
|
|
|
|
|
n.List.SetFirst(walkexpr(n.List.First(), init))
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
}
|
2016-03-07 22:54:46 -08:00
|
|
|
walkexprlistsafe(n.List.Slice()[1:], init)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-10-16 14:31:49 -07:00
|
|
|
nsrc := n.List.First()
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// walkexprlistsafe will leave OINDEX (s[n]) alone if both s
|
|
|
|
|
// and n are name or literal, but those may index the slice we're
|
2016-03-01 23:21:55 +00:00
|
|
|
// modifying here. Fix explicitly.
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
// Using cheapexpr also makes sure that the evaluation
|
|
|
|
|
// of all arguments (and especially any panics) happen
|
|
|
|
|
// before we begin to modify the slice in a visible way.
|
2016-03-09 12:39:36 -08:00
|
|
|
ls := n.List.Slice()[1:]
|
|
|
|
|
for i, n := range ls {
|
2018-10-16 14:31:49 -07:00
|
|
|
n = cheapexpr(n, init)
|
2018-10-22 11:21:38 -07:00
|
|
|
if !types.Identical(n.Type, nsrc.Type.Elem()) {
|
2018-10-16 14:31:49 -07:00
|
|
|
n = assignconv(n, nsrc.Type.Elem(), "append")
|
|
|
|
|
n = walkexpr(n, init)
|
|
|
|
|
}
|
|
|
|
|
ls[i] = n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-08 15:10:26 -08:00
|
|
|
argc := n.List.Len() - 1
|
2015-02-13 14:40:36 -05:00
|
|
|
if argc < 1 {
|
|
|
|
|
return nsrc
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
// General case, with no function calls left as arguments.
|
2015-10-21 07:04:10 -07:00
|
|
|
// Leave for gen, except that instrumentation requires old form.
|
2016-09-21 09:44:40 -07:00
|
|
|
if !instrumenting || compiling_runtime {
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-03 20:48:00 -08:00
|
|
|
var l []*Node
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
ns := temp(nsrc.Type)
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, ns, nsrc)) // s = src
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-15 14:34:20 +10:00
|
|
|
na := nodintconst(int64(argc)) // const argc
|
2016-09-16 11:00:54 +10:00
|
|
|
nx := nod(OIF, nil, nil) // if cap(s) - len(s) < argc
|
|
|
|
|
nx.Left = nod(OLT, nod(OSUB, nod(OCAP, ns, nil), nod(OLEN, ns, nil)), na)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook("growslice") // growslice(<type>, old []T, mincap int) (ret []T)
|
2016-03-30 10:57:47 -07:00
|
|
|
fn = substArgTypes(fn, ns.Type.Elem(), ns.Type.Elem())
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
nx.Nbody.Set1(nod(OAS, ns,
|
2016-04-19 15:38:59 -07:00
|
|
|
mkcall1(fn, ns.Type, &nx.Ninit, typename(ns.Type.Elem()), ns,
|
2016-09-16 11:00:54 +10:00
|
|
|
nod(OADD, nod(OLEN, ns, nil), na))))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-03 20:48:00 -08:00
|
|
|
l = append(l, nx)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
nn := temp(types.Types[TINT])
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, nn, nod(OLEN, ns, nil))) // n = len(s)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
nx = nod(OSLICE, ns, nil) // ...s[:n+argc]
|
|
|
|
|
nx.SetSliceBounds(nil, nod(OADD, nn, na), nil)
|
2018-06-16 01:22:07 +02:00
|
|
|
nx.SetBounded(true)
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, ns, nx)) // s = s[:n+argc]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-03-09 12:39:36 -08:00
|
|
|
ls = n.List.Slice()[1:]
|
|
|
|
|
for i, n := range ls {
|
2016-09-16 11:00:54 +10:00
|
|
|
nx = nod(OINDEX, ns, nn) // s[n] ...
|
2017-02-27 19:56:38 +02:00
|
|
|
nx.SetBounded(true)
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, nx, n)) // s[n] = arg
|
2016-03-09 12:39:36 -08:00
|
|
|
if i+1 < len(ls) {
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, nn, nod(OADD, nn, nodintconst(1)))) // n = n + 1
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
typecheckslice(l, ctxStmt)
|
2015-02-13 14:40:36 -05:00
|
|
|
walkstmtlist(l)
|
2016-03-07 22:54:46 -08:00
|
|
|
init.Append(l...)
|
2015-02-13 14:40:36 -05:00
|
|
|
return ns
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Lower copy(a, b) to a memmove call or a runtime call.
|
|
|
|
|
//
|
|
|
|
|
// init {
|
|
|
|
|
// n := len(a)
|
|
|
|
|
// if n > len(b) { n = len(b) }
|
2018-02-14 19:05:36 -08:00
|
|
|
// if a.ptr != b.ptr { memmove(a.ptr, b.ptr, n*sizeof(elem(a))) }
|
2015-02-13 14:40:36 -05:00
|
|
|
// }
|
|
|
|
|
// n;
|
|
|
|
|
//
|
|
|
|
|
// Also works if b is a string.
|
|
|
|
|
//
|
2016-03-07 22:54:46 -08:00
|
|
|
func copyany(n *Node, init *Nodes, runtimecall bool) *Node {
|
2020-08-21 20:20:12 -07:00
|
|
|
if n.Left.Type.Elem().HasPointers() {
|
2017-10-24 17:10:02 -04:00
|
|
|
Curfn.Func.setWBPos(n.Pos)
|
2020-01-31 21:01:55 -08:00
|
|
|
fn := writebarrierfn("typedslicecopy", n.Left.Type.Elem(), n.Right.Type.Elem())
|
|
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2020-09-14 16:30:43 +02:00
|
|
|
ptrL, lenL := n.Left.backingArrayPtrLen()
|
2020-01-31 21:01:55 -08:00
|
|
|
n.Right = cheapexpr(n.Right, init)
|
2020-09-14 16:30:43 +02:00
|
|
|
ptrR, lenR := n.Right.backingArrayPtrLen()
|
2020-01-31 21:01:55 -08:00
|
|
|
return mkcall1(fn, n.Type, init, typename(n.Left.Type.Elem()), ptrL, lenL, ptrR, lenR)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-20 10:00:07 -07:00
|
|
|
if runtimecall {
|
2020-09-14 16:30:43 +02:00
|
|
|
// rely on runtime to instrument:
|
|
|
|
|
// copy(n.Left, n.Right)
|
|
|
|
|
// n.Right can be a slice or string.
|
2017-08-18 18:40:12 +02:00
|
|
|
|
2020-01-31 21:01:55 -08:00
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2020-09-14 16:30:43 +02:00
|
|
|
ptrL, lenL := n.Left.backingArrayPtrLen()
|
2020-01-31 21:01:55 -08:00
|
|
|
n.Right = cheapexpr(n.Right, init)
|
2020-09-14 16:30:43 +02:00
|
|
|
ptrR, lenR := n.Right.backingArrayPtrLen()
|
|
|
|
|
|
|
|
|
|
fn := syslook("slicecopy")
|
|
|
|
|
fn = substArgTypes(fn, ptrL.Type.Elem(), ptrR.Type.Elem())
|
|
|
|
|
|
2020-01-31 21:01:55 -08:00
|
|
|
return mkcall1(fn, n.Type, init, ptrL, lenL, ptrR, lenR, nodintconst(n.Left.Type.Elem().Width))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
n.Right = walkexpr(n.Right, init)
|
2015-02-23 16:07:24 -05:00
|
|
|
nl := temp(n.Left.Type)
|
|
|
|
|
nr := temp(n.Right.Type)
|
2016-03-07 22:54:46 -08:00
|
|
|
var l []*Node
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, nl, n.Left))
|
|
|
|
|
l = append(l, nod(OAS, nr, n.Right))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
nfrm := nod(OSPTR, nr, nil)
|
|
|
|
|
nto := nod(OSPTR, nl, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
nlen := temp(types.Types[TINT])
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// n = len(to)
|
2016-09-16 11:00:54 +10:00
|
|
|
l = append(l, nod(OAS, nlen, nod(OLEN, nl, nil)))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// if n > len(frm) { n = len(frm) }
|
2016-09-16 11:00:54 +10:00
|
|
|
nif := nod(OIF, nil, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-16 11:00:54 +10:00
|
|
|
nif.Left = nod(OGT, nlen, nod(OLEN, nr, nil))
|
|
|
|
|
nif.Nbody.Append(nod(OAS, nlen, nod(OLEN, nr, nil)))
|
2016-03-07 22:54:46 -08:00
|
|
|
l = append(l, nif)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-02-14 19:05:36 -08:00
|
|
|
// if to.ptr != frm.ptr { memmove( ... ) }
|
|
|
|
|
ne := nod(OIF, nod(ONE, nto, nfrm), nil)
|
|
|
|
|
ne.SetLikely(true)
|
|
|
|
|
l = append(l, ne)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-02-14 19:05:36 -08:00
|
|
|
fn := syslook("memmove")
|
2016-03-30 10:57:47 -07:00
|
|
|
fn = substArgTypes(fn, nl.Type.Elem(), nl.Type.Elem())
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
nwid := temp(types.Types[TUINTPTR])
|
2018-02-14 19:05:36 -08:00
|
|
|
setwid := nod(OAS, nwid, conv(nlen, types.Types[TUINTPTR]))
|
|
|
|
|
ne.Nbody.Append(setwid)
|
2016-09-16 11:00:54 +10:00
|
|
|
nwid = nod(OMUL, nwid, nodintconst(nl.Type.Elem().Width))
|
2018-02-14 19:05:36 -08:00
|
|
|
call := mkcall1(fn, nil, init, nto, nfrm, nwid)
|
|
|
|
|
ne.Nbody.Append(call)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
typecheckslice(l, ctxStmt)
|
2015-02-13 14:40:36 -05:00
|
|
|
walkstmtlist(l)
|
2016-03-07 22:54:46 -08:00
|
|
|
init.Append(l...)
|
2015-02-13 14:40:36 -05:00
|
|
|
return nlen
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-24 17:15:30 +01:00
|
|
|
func eqfor(t *types.Type) (n *Node, needsize bool) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Should only arrive here with large memory or
|
|
|
|
|
// a struct/array containing a non-memory field/element.
|
|
|
|
|
// Small memory is handled inline, and single non-memory
|
2018-10-10 16:47:47 -07:00
|
|
|
// is handled by walkcompare.
|
2016-04-01 11:22:03 -07:00
|
|
|
switch a, _ := algtype1(t); a {
|
|
|
|
|
case AMEM:
|
2016-03-04 15:19:06 -08:00
|
|
|
n := syslook("memequal")
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
n = substArgTypes(n, t, t)
|
2017-10-24 17:15:30 +01:00
|
|
|
return n, true
|
2016-04-01 11:22:03 -07:00
|
|
|
case ASPECIAL:
|
|
|
|
|
sym := typesymprefix(".eq", t)
|
|
|
|
|
n := newname(sym)
|
2020-08-19 16:05:02 +07:00
|
|
|
setNodeNameFunc(n)
|
2017-04-07 15:39:36 -07:00
|
|
|
n.Type = functype(nil, []*Node{
|
|
|
|
|
anonfield(types.NewPtr(t)),
|
|
|
|
|
anonfield(types.NewPtr(t)),
|
|
|
|
|
}, []*Node{
|
|
|
|
|
anonfield(types.Types[TBOOL]),
|
|
|
|
|
})
|
2017-10-24 17:15:30 +01:00
|
|
|
return n, false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-04-01 11:22:03 -07:00
|
|
|
Fatalf("eqfor %v", t)
|
2017-10-24 17:15:30 +01:00
|
|
|
return nil, false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
// The result of walkcompare MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = walkcompare(n.Left, init)
|
|
|
|
|
func walkcompare(n *Node, init *Nodes) *Node {
|
2020-11-13 20:38:21 -08:00
|
|
|
if n.Left.Type.IsInterface() && n.Right.Type.IsInterface() && n.Left.Op != ONIL && n.Right.Op != ONIL {
|
2018-10-10 16:47:47 -07:00
|
|
|
return walkcompareInterface(n, init)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Left.Type.IsString() && n.Right.Type.IsString() {
|
|
|
|
|
return walkcompareString(n, init)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n.Left = walkexpr(n.Left, init)
|
|
|
|
|
n.Right = walkexpr(n.Right, init)
|
|
|
|
|
|
2019-05-25 01:27:40 +07:00
|
|
|
// Given mixed interface/concrete comparison,
|
|
|
|
|
// rewrite into types-equal && data-equal.
|
2016-06-06 13:01:48 -07:00
|
|
|
// This is efficient, avoids allocations, and avoids runtime calls.
|
2019-05-25 01:27:40 +07:00
|
|
|
if n.Left.Type.IsInterface() != n.Right.Type.IsInterface() {
|
|
|
|
|
// Preserve side-effects in case of short-circuiting; see #32187.
|
|
|
|
|
l := cheapexpr(n.Left, init)
|
|
|
|
|
r := cheapexpr(n.Right, init)
|
|
|
|
|
// Swap so that l is the interface value and r is the concrete value.
|
|
|
|
|
if n.Right.Type.IsInterface() {
|
|
|
|
|
l, r = r, l
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-06-06 13:01:48 -07:00
|
|
|
// Handle both == and !=.
|
|
|
|
|
eq := n.Op
|
2019-03-05 20:44:29 +00:00
|
|
|
andor := OOROR
|
2016-06-06 13:01:48 -07:00
|
|
|
if eq == OEQ {
|
|
|
|
|
andor = OANDAND
|
|
|
|
|
}
|
|
|
|
|
// Check for types equal.
|
|
|
|
|
// For empty interface, this is:
|
|
|
|
|
// l.tab == type(r)
|
|
|
|
|
// For non-empty interface, this is:
|
|
|
|
|
// l.tab != nil && l.tab._type == type(r)
|
|
|
|
|
var eqtype *Node
|
2016-09-16 11:00:54 +10:00
|
|
|
tab := nod(OITAB, l, nil)
|
2016-06-06 13:01:48 -07:00
|
|
|
rtyp := typename(r.Type)
|
|
|
|
|
if l.Type.IsEmptyInterface() {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
tab.Type = types.NewPtr(types.Types[TUINT8])
|
2017-04-25 18:02:43 -07:00
|
|
|
tab.SetTypecheck(1)
|
2016-09-16 11:00:54 +10:00
|
|
|
eqtype = nod(eq, tab, rtyp)
|
2016-06-06 13:01:48 -07:00
|
|
|
} else {
|
2016-09-16 11:00:54 +10:00
|
|
|
nonnil := nod(brcom(eq), nodnil(), tab)
|
|
|
|
|
match := nod(eq, itabType(tab), rtyp)
|
|
|
|
|
eqtype = nod(andor, nonnil, match)
|
2016-06-06 13:01:48 -07:00
|
|
|
}
|
|
|
|
|
// Check for data equal.
|
2020-04-13 23:29:17 -07:00
|
|
|
eqdata := nod(eq, ifaceData(n.Pos, l, r.Type), r)
|
2016-06-06 13:01:48 -07:00
|
|
|
// Put it all together.
|
2016-09-16 11:00:54 +10:00
|
|
|
expr := nod(andor, eqtype, eqdata)
|
2016-06-06 13:01:48 -07:00
|
|
|
n = finishcompare(n, expr, init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Must be comparison of array or struct.
|
|
|
|
|
// Otherwise back end handles it.
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
// While we're here, decide whether to
|
|
|
|
|
// inline or call an eq alg.
|
2015-03-02 20:34:22 -05:00
|
|
|
t := n.Left.Type
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
var inline bool
|
2017-04-14 13:53:40 -05:00
|
|
|
|
|
|
|
|
maxcmpsize := int64(4)
|
2018-03-26 21:18:27 +01:00
|
|
|
unalignedLoad := canMergeLoads()
|
|
|
|
|
if unalignedLoad {
|
|
|
|
|
// Keep this low enough to generate less code than a function call.
|
|
|
|
|
maxcmpsize = 2 * int64(thearch.LinkArch.RegSize)
|
2017-04-14 13:53:40 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
switch t.Etype {
|
|
|
|
|
default:
|
2019-10-28 15:30:35 -07:00
|
|
|
if Debug_libfuzzer != 0 && t.IsInteger() {
|
|
|
|
|
n.Left = cheapexpr(n.Left, init)
|
|
|
|
|
n.Right = cheapexpr(n.Right, init)
|
|
|
|
|
|
|
|
|
|
// If exactly one comparison operand is
|
|
|
|
|
// constant, invoke the constcmp functions
|
|
|
|
|
// instead, and arrange for the constant
|
|
|
|
|
// operand to be the first argument.
|
|
|
|
|
l, r := n.Left, n.Right
|
|
|
|
|
if r.Op == OLITERAL {
|
|
|
|
|
l, r = r, l
|
|
|
|
|
}
|
|
|
|
|
constcmp := l.Op == OLITERAL && r.Op != OLITERAL
|
|
|
|
|
|
|
|
|
|
var fn string
|
|
|
|
|
var paramType *types.Type
|
|
|
|
|
switch t.Size() {
|
|
|
|
|
case 1:
|
|
|
|
|
fn = "libfuzzerTraceCmp1"
|
|
|
|
|
if constcmp {
|
|
|
|
|
fn = "libfuzzerTraceConstCmp1"
|
|
|
|
|
}
|
|
|
|
|
paramType = types.Types[TUINT8]
|
|
|
|
|
case 2:
|
|
|
|
|
fn = "libfuzzerTraceCmp2"
|
|
|
|
|
if constcmp {
|
|
|
|
|
fn = "libfuzzerTraceConstCmp2"
|
|
|
|
|
}
|
|
|
|
|
paramType = types.Types[TUINT16]
|
|
|
|
|
case 4:
|
|
|
|
|
fn = "libfuzzerTraceCmp4"
|
|
|
|
|
if constcmp {
|
|
|
|
|
fn = "libfuzzerTraceConstCmp4"
|
|
|
|
|
}
|
|
|
|
|
paramType = types.Types[TUINT32]
|
|
|
|
|
case 8:
|
|
|
|
|
fn = "libfuzzerTraceCmp8"
|
|
|
|
|
if constcmp {
|
|
|
|
|
fn = "libfuzzerTraceConstCmp8"
|
|
|
|
|
}
|
|
|
|
|
paramType = types.Types[TUINT64]
|
|
|
|
|
default:
|
|
|
|
|
Fatalf("unexpected integer size %d for %v", t.Size(), t)
|
|
|
|
|
}
|
|
|
|
|
init.Append(mkcall(fn, nil, init, tracecmpArg(l, paramType, init), tracecmpArg(r, paramType, init)))
|
|
|
|
|
}
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
case TARRAY:
|
2017-04-14 13:53:40 -05:00
|
|
|
// We can compare several elements at once with 2/4/8 byte integer compares
|
|
|
|
|
inline = t.NumElem() <= 1 || (issimple[t.Elem().Etype] && (t.NumElem() <= 4 || t.Elem().Width*t.NumElem() <= maxcmpsize))
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
case TSTRUCT:
|
2018-03-30 12:25:02 -07:00
|
|
|
inline = t.NumComponents(types.IgnoreBlankFields) <= 4
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-03-02 20:34:22 -05:00
|
|
|
cmpl := n.Left
|
2015-02-13 14:40:36 -05:00
|
|
|
for cmpl != nil && cmpl.Op == OCONVNOP {
|
|
|
|
|
cmpl = cmpl.Left
|
|
|
|
|
}
|
2015-03-02 20:34:22 -05:00
|
|
|
cmpr := n.Right
|
2015-02-13 14:40:36 -05:00
|
|
|
for cmpr != nil && cmpr.Op == OCONVNOP {
|
|
|
|
|
cmpr = cmpr.Left
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
// Chose not to inline. Call equality function directly.
|
|
|
|
|
if !inline {
|
2019-09-26 10:59:11 -07:00
|
|
|
// eq algs take pointers; cmpl and cmpr must be addressable
|
2017-01-14 21:40:16 -08:00
|
|
|
if !islvalue(cmpl) || !islvalue(cmpr) {
|
|
|
|
|
Fatalf("arguments of comparison must be lvalues - %v %v", cmpl, cmpr)
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-24 17:15:30 +01:00
|
|
|
fn, needsize := eqfor(t)
|
|
|
|
|
call := nod(OCALL, fn, nil)
|
2019-09-26 11:00:13 -07:00
|
|
|
call.List.Append(nod(OADDR, cmpl, nil))
|
|
|
|
|
call.List.Append(nod(OADDR, cmpr, nil))
|
2017-10-24 17:15:30 +01:00
|
|
|
if needsize {
|
2016-09-15 14:34:20 +10:00
|
|
|
call.List.Append(nodintconst(t.Width))
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
}
|
|
|
|
|
res := call
|
|
|
|
|
if n.Op != OEQ {
|
2016-09-16 11:00:54 +10:00
|
|
|
res = nod(ONOT, res, nil)
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
}
|
|
|
|
|
n = finishcompare(n, res, init)
|
|
|
|
|
return n
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
// inline: build boolean expression comparing element by element
|
|
|
|
|
andor := OANDAND
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Op == ONE {
|
|
|
|
|
andor = OOROR
|
|
|
|
|
}
|
2015-03-02 20:34:22 -05:00
|
|
|
var expr *Node
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
compare := func(el, er *Node) {
|
2016-09-16 11:00:54 +10:00
|
|
|
a := nod(n.Op, el, er)
|
2015-02-13 14:40:36 -05:00
|
|
|
if expr == nil {
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
expr = a
|
|
|
|
|
} else {
|
2016-09-16 11:00:54 +10:00
|
|
|
expr = nod(andor, expr, a)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
cmpl = safeexpr(cmpl, init)
|
|
|
|
|
cmpr = safeexpr(cmpr, init)
|
|
|
|
|
if t.IsStruct() {
|
|
|
|
|
for _, f := range t.Fields().Slice() {
|
|
|
|
|
sym := f.Sym
|
2017-04-21 07:51:41 -07:00
|
|
|
if sym.IsBlank() {
|
2015-02-13 14:40:36 -05:00
|
|
|
continue
|
|
|
|
|
}
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
compare(
|
2016-09-15 15:45:10 +10:00
|
|
|
nodSym(OXDOT, cmpl, sym),
|
|
|
|
|
nodSym(OXDOT, cmpr, sym),
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
} else {
|
2017-04-14 13:53:40 -05:00
|
|
|
step := int64(1)
|
|
|
|
|
remains := t.NumElem() * t.Elem().Width
|
|
|
|
|
combine64bit := unalignedLoad && Widthreg == 8 && t.Elem().Width <= 4 && t.Elem().IsInteger()
|
|
|
|
|
combine32bit := unalignedLoad && t.Elem().Width <= 2 && t.Elem().IsInteger()
|
|
|
|
|
combine16bit := unalignedLoad && t.Elem().Width == 1 && t.Elem().IsInteger()
|
|
|
|
|
for i := int64(0); remains > 0; {
|
|
|
|
|
var convType *types.Type
|
|
|
|
|
switch {
|
|
|
|
|
case remains >= 8 && combine64bit:
|
|
|
|
|
convType = types.Types[TINT64]
|
|
|
|
|
step = 8 / t.Elem().Width
|
|
|
|
|
case remains >= 4 && combine32bit:
|
|
|
|
|
convType = types.Types[TUINT32]
|
|
|
|
|
step = 4 / t.Elem().Width
|
|
|
|
|
case remains >= 2 && combine16bit:
|
|
|
|
|
convType = types.Types[TUINT16]
|
|
|
|
|
step = 2 / t.Elem().Width
|
|
|
|
|
default:
|
|
|
|
|
step = 1
|
|
|
|
|
}
|
|
|
|
|
if step == 1 {
|
|
|
|
|
compare(
|
2018-02-25 20:08:22 +09:00
|
|
|
nod(OINDEX, cmpl, nodintconst(i)),
|
|
|
|
|
nod(OINDEX, cmpr, nodintconst(i)),
|
2017-04-14 13:53:40 -05:00
|
|
|
)
|
|
|
|
|
i++
|
|
|
|
|
remains -= t.Elem().Width
|
|
|
|
|
} else {
|
2018-02-06 09:44:34 -08:00
|
|
|
elemType := t.Elem().ToUnsigned()
|
2018-02-25 20:08:22 +09:00
|
|
|
cmplw := nod(OINDEX, cmpl, nodintconst(i))
|
2018-02-06 09:44:34 -08:00
|
|
|
cmplw = conv(cmplw, elemType) // convert to unsigned
|
|
|
|
|
cmplw = conv(cmplw, convType) // widen
|
2018-02-25 20:08:22 +09:00
|
|
|
cmprw := nod(OINDEX, cmpr, nodintconst(i))
|
2018-02-06 09:44:34 -08:00
|
|
|
cmprw = conv(cmprw, elemType)
|
2017-04-14 13:53:40 -05:00
|
|
|
cmprw = conv(cmprw, convType)
|
|
|
|
|
// For code like this: uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 ...
|
|
|
|
|
// ssa will generate a single large load.
|
|
|
|
|
for offset := int64(1); offset < step; offset++ {
|
2018-02-25 20:08:22 +09:00
|
|
|
lb := nod(OINDEX, cmpl, nodintconst(i+offset))
|
2018-02-06 09:44:34 -08:00
|
|
|
lb = conv(lb, elemType)
|
2017-04-14 13:53:40 -05:00
|
|
|
lb = conv(lb, convType)
|
2018-02-25 20:08:22 +09:00
|
|
|
lb = nod(OLSH, lb, nodintconst(8*t.Elem().Width*offset))
|
2017-04-14 13:53:40 -05:00
|
|
|
cmplw = nod(OOR, cmplw, lb)
|
2018-02-25 20:08:22 +09:00
|
|
|
rb := nod(OINDEX, cmpr, nodintconst(i+offset))
|
2018-02-06 09:44:34 -08:00
|
|
|
rb = conv(rb, elemType)
|
2017-04-14 13:53:40 -05:00
|
|
|
rb = conv(rb, convType)
|
2018-02-25 20:08:22 +09:00
|
|
|
rb = nod(OLSH, rb, nodintconst(8*t.Elem().Width*offset))
|
2017-04-14 13:53:40 -05:00
|
|
|
cmprw = nod(OOR, cmprw, rb)
|
|
|
|
|
}
|
|
|
|
|
compare(cmplw, cmprw)
|
|
|
|
|
i += step
|
|
|
|
|
remains -= step * t.Elem().Width
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
if expr == nil {
|
2016-09-15 15:45:10 +10:00
|
|
|
expr = nodbool(n.Op == OEQ)
|
2018-10-29 17:02:42 -07:00
|
|
|
// We still need to use cmpl and cmpr, in case they contain
|
|
|
|
|
// an expression which might panic. See issue 23837.
|
|
|
|
|
t := temp(cmpl.Type)
|
|
|
|
|
a1 := nod(OAS, t, cmpl)
|
2018-11-18 08:34:38 -08:00
|
|
|
a1 = typecheck(a1, ctxStmt)
|
2018-10-29 17:02:42 -07:00
|
|
|
a2 := nod(OAS, t, cmpr)
|
2018-11-18 08:34:38 -08:00
|
|
|
a2 = typecheck(a2, ctxStmt)
|
2018-10-29 17:02:42 -07:00
|
|
|
init.Append(a1, a2)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: when inlining ==, don’t take the address of the values
This CL reworks walkcompare for clarity and concision.
It also makes one significant functional change.
(The functional change is hard to separate cleanly
from the cleanup, so I just did them together.)
When inlining and unrolling an equality comparison
for a small struct or array, compare the elements like:
a[0] == b[0] && a[1] == b[1]
rather than
pa := &a
pb := &b
pa[0] == pb[0] && pa[1] == pb[1]
The result is the same, but taking the address
and working through the indirect
forces the backends to generate less efficient code.
This is only an improvement with the SSA backend.
However, every port but s390x now has a working
SSA backend, and switching to the SSA backend
by default everywhere is a priority for Go 1.8.
It thus seems reasonable to start to prioritize
SSA performance over the old backend.
Updates #15303
Sample code:
type T struct {
a, b int8
}
func g(a T) bool {
return a == T{1, 2}
}
SSA before:
"".g t=1 size=80 args=0x10 locals=0x8
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $8-16
0x0000 00000 (badeq.go:7) SUBQ $8, SP
0x0004 00004 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0004 00004 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0004 00004 (badeq.go:8) MOVBLZX "".a+16(FP), AX
0x0009 00009 (badeq.go:8) MOVB AL, "".autotmp_0+6(SP)
0x000d 00013 (badeq.go:8) MOVBLZX "".a+17(FP), AX
0x0012 00018 (badeq.go:8) MOVB AL, "".autotmp_0+7(SP)
0x0016 00022 (badeq.go:8) MOVB $0, "".autotmp_1+4(SP)
0x001b 00027 (badeq.go:8) MOVB $1, "".autotmp_1+4(SP)
0x0020 00032 (badeq.go:8) MOVB $2, "".autotmp_1+5(SP)
0x0025 00037 (badeq.go:8) MOVBLZX "".autotmp_0+6(SP), AX
0x002a 00042 (badeq.go:8) MOVBLZX "".autotmp_1+4(SP), CX
0x002f 00047 (badeq.go:8) CMPB AL, CL
0x0031 00049 (badeq.go:8) JNE 70
0x0033 00051 (badeq.go:8) MOVBLZX "".autotmp_0+7(SP), AX
0x0038 00056 (badeq.go:8) CMPB AL, $2
0x003a 00058 (badeq.go:8) SETEQ AL
0x003d 00061 (badeq.go:8) MOVB AL, "".~r1+24(FP)
0x0041 00065 (badeq.go:8) ADDQ $8, SP
0x0045 00069 (badeq.go:8) RET
0x0046 00070 (badeq.go:8) MOVB $0, AL
0x0048 00072 (badeq.go:8) JMP 61
SSA after:
"".g t=1 size=32 args=0x10 locals=0x0
0x0000 00000 (badeq.go:7) TEXT "".g(SB), $0-16
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) NOP
0x0000 00000 (badeq.go:7) FUNCDATA $0, gclocals·23e8278e2b69a3a75fa59b23c49ed6ad(SB)
0x0000 00000 (badeq.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (badeq.go:8) MOVBLZX "".a+8(FP), AX
0x0005 00005 (badeq.go:8) CMPB AL, $1
0x0007 00007 (badeq.go:8) JNE 25
0x0009 00009 (badeq.go:8) MOVBLZX "".a+9(FP), CX
0x000e 00014 (badeq.go:8) CMPB CL, $2
0x0011 00017 (badeq.go:8) SETEQ AL
0x0014 00020 (badeq.go:8) MOVB AL, "".~r1+16(FP)
0x0018 00024 (badeq.go:8) RET
0x0019 00025 (badeq.go:8) MOVB $0, AL
0x001b 00027 (badeq.go:8) JMP 20
Change-Id: I120185d58012b7bbcdb1ec01225b5b08d0855d86
Reviewed-on: https://go-review.googlesource.com/22277
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2016-04-15 15:49:00 -07:00
|
|
|
n = finishcompare(n, expr, init)
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
return n
|
2015-03-02 20:34:22 -05:00
|
|
|
}
|
|
|
|
|
|
2019-10-28 15:30:35 -07:00
|
|
|
func tracecmpArg(n *Node, t *types.Type, init *Nodes) *Node {
|
|
|
|
|
// Ugly hack to avoid "constant -1 overflows uintptr" errors, etc.
|
2020-10-12 15:02:59 +02:00
|
|
|
if n.Op == OLITERAL && n.Type.IsSigned() && n.Int64Val() < 0 {
|
2019-10-28 15:30:35 -07:00
|
|
|
n = copyexpr(n, n.Type, init)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return conv(n, t)
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-10 16:47:47 -07:00
|
|
|
func walkcompareInterface(n *Node, init *Nodes) *Node {
|
|
|
|
|
n.Right = cheapexpr(n.Right, init)
|
|
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2020-04-24 11:00:44 -07:00
|
|
|
eqtab, eqdata := eqinterface(n.Left, n.Right)
|
2018-10-10 16:47:47 -07:00
|
|
|
var cmp *Node
|
|
|
|
|
if n.Op == OEQ {
|
2020-04-24 11:00:44 -07:00
|
|
|
cmp = nod(OANDAND, eqtab, eqdata)
|
2018-10-10 16:47:47 -07:00
|
|
|
} else {
|
2020-04-24 11:00:44 -07:00
|
|
|
eqtab.Op = ONE
|
|
|
|
|
cmp = nod(OOROR, eqtab, nod(ONOT, eqdata, nil))
|
2018-10-10 16:47:47 -07:00
|
|
|
}
|
|
|
|
|
return finishcompare(n, cmp, init)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func walkcompareString(n *Node, init *Nodes) *Node {
|
|
|
|
|
// Rewrite comparisons to short constant strings as length+byte-wise comparisons.
|
|
|
|
|
var cs, ncs *Node // const string, non-const string
|
|
|
|
|
switch {
|
2020-11-23 13:42:43 -08:00
|
|
|
case Isconst(n.Left, constant.String) && Isconst(n.Right, constant.String):
|
2018-10-10 16:47:47 -07:00
|
|
|
// ignore; will be constant evaluated
|
2020-11-23 13:42:43 -08:00
|
|
|
case Isconst(n.Left, constant.String):
|
2018-10-10 16:47:47 -07:00
|
|
|
cs = n.Left
|
|
|
|
|
ncs = n.Right
|
2020-11-23 13:42:43 -08:00
|
|
|
case Isconst(n.Right, constant.String):
|
2018-10-10 16:47:47 -07:00
|
|
|
cs = n.Right
|
|
|
|
|
ncs = n.Left
|
|
|
|
|
}
|
|
|
|
|
if cs != nil {
|
|
|
|
|
cmp := n.Op
|
|
|
|
|
// Our comparison below assumes that the non-constant string
|
|
|
|
|
// is on the left hand side, so rewrite "" cmp x to x cmp "".
|
|
|
|
|
// See issue 24817.
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(n.Left, constant.String) {
|
2018-10-10 16:47:47 -07:00
|
|
|
cmp = brrev(cmp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// maxRewriteLen was chosen empirically.
|
|
|
|
|
// It is the value that minimizes cmd/go file size
|
|
|
|
|
// across most architectures.
|
|
|
|
|
// See the commit description for CL 26758 for details.
|
|
|
|
|
maxRewriteLen := 6
|
|
|
|
|
// Some architectures can load unaligned byte sequence as 1 word.
|
|
|
|
|
// So we can cover longer strings with the same amount of code.
|
|
|
|
|
canCombineLoads := canMergeLoads()
|
|
|
|
|
combine64bit := false
|
|
|
|
|
if canCombineLoads {
|
|
|
|
|
// Keep this low enough to generate less code than a function call.
|
|
|
|
|
maxRewriteLen = 2 * thearch.LinkArch.RegSize
|
|
|
|
|
combine64bit = thearch.LinkArch.RegSize >= 8
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var and Op
|
|
|
|
|
switch cmp {
|
|
|
|
|
case OEQ:
|
|
|
|
|
and = OANDAND
|
|
|
|
|
case ONE:
|
|
|
|
|
and = OOROR
|
|
|
|
|
default:
|
|
|
|
|
// Don't do byte-wise comparisons for <, <=, etc.
|
|
|
|
|
// They're fairly complicated.
|
|
|
|
|
// Length-only checks are ok, though.
|
|
|
|
|
maxRewriteLen = 0
|
|
|
|
|
}
|
2020-10-12 15:02:59 +02:00
|
|
|
if s := cs.StringVal(); len(s) <= maxRewriteLen {
|
2018-10-10 16:47:47 -07:00
|
|
|
if len(s) > 0 {
|
|
|
|
|
ncs = safeexpr(ncs, init)
|
|
|
|
|
}
|
|
|
|
|
r := nod(cmp, nod(OLEN, ncs, nil), nodintconst(int64(len(s))))
|
|
|
|
|
remains := len(s)
|
|
|
|
|
for i := 0; remains > 0; {
|
|
|
|
|
if remains == 1 || !canCombineLoads {
|
|
|
|
|
cb := nodintconst(int64(s[i]))
|
|
|
|
|
ncb := nod(OINDEX, ncs, nodintconst(int64(i)))
|
|
|
|
|
r = nod(and, r, nod(cmp, ncb, cb))
|
|
|
|
|
remains--
|
|
|
|
|
i++
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
var step int
|
|
|
|
|
var convType *types.Type
|
|
|
|
|
switch {
|
|
|
|
|
case remains >= 8 && combine64bit:
|
|
|
|
|
convType = types.Types[TINT64]
|
|
|
|
|
step = 8
|
|
|
|
|
case remains >= 4:
|
|
|
|
|
convType = types.Types[TUINT32]
|
|
|
|
|
step = 4
|
|
|
|
|
case remains >= 2:
|
|
|
|
|
convType = types.Types[TUINT16]
|
|
|
|
|
step = 2
|
|
|
|
|
}
|
|
|
|
|
ncsubstr := nod(OINDEX, ncs, nodintconst(int64(i)))
|
|
|
|
|
ncsubstr = conv(ncsubstr, convType)
|
|
|
|
|
csubstr := int64(s[i])
|
|
|
|
|
// Calculate large constant from bytes as sequence of shifts and ors.
|
|
|
|
|
// Like this: uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 ...
|
|
|
|
|
// ssa will combine this into a single large load.
|
|
|
|
|
for offset := 1; offset < step; offset++ {
|
|
|
|
|
b := nod(OINDEX, ncs, nodintconst(int64(i+offset)))
|
|
|
|
|
b = conv(b, convType)
|
|
|
|
|
b = nod(OLSH, b, nodintconst(int64(8*offset)))
|
|
|
|
|
ncsubstr = nod(OOR, ncsubstr, b)
|
|
|
|
|
csubstr |= int64(s[i+offset]) << uint8(8*offset)
|
|
|
|
|
}
|
|
|
|
|
csubstrPart := nodintconst(csubstr)
|
|
|
|
|
// Compare "step" bytes as once
|
|
|
|
|
r = nod(and, r, nod(cmp, csubstrPart, ncsubstr))
|
|
|
|
|
remains -= step
|
|
|
|
|
i += step
|
|
|
|
|
}
|
|
|
|
|
return finishcompare(n, r, init)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var r *Node
|
|
|
|
|
if n.Op == OEQ || n.Op == ONE {
|
|
|
|
|
// prepare for rewrite below
|
|
|
|
|
n.Left = cheapexpr(n.Left, init)
|
|
|
|
|
n.Right = cheapexpr(n.Right, init)
|
2020-04-24 09:43:49 -07:00
|
|
|
eqlen, eqmem := eqstring(n.Left, n.Right)
|
2018-10-10 16:47:47 -07:00
|
|
|
// quick check of len before full compare for == or !=.
|
|
|
|
|
// memequal then tests equality up to length len.
|
|
|
|
|
if n.Op == OEQ {
|
|
|
|
|
// len(left) == len(right) && memequal(left, right, len)
|
2020-04-24 09:43:49 -07:00
|
|
|
r = nod(OANDAND, eqlen, eqmem)
|
2018-10-10 16:47:47 -07:00
|
|
|
} else {
|
|
|
|
|
// len(left) != len(right) || !memequal(left, right, len)
|
2020-04-24 09:43:49 -07:00
|
|
|
eqlen.Op = ONE
|
|
|
|
|
r = nod(OOROR, eqlen, nod(ONOT, eqmem, nil))
|
2018-10-10 16:47:47 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// sys_cmpstring(s1, s2) :: 0
|
|
|
|
|
r = mkcall("cmpstring", types.Types[TINT], init, conv(n.Left, types.Types[TSTRING]), conv(n.Right, types.Types[TSTRING]))
|
|
|
|
|
r = nod(n.Op, r, nodintconst(0))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return finishcompare(n, r, init)
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-23 16:43:17 -07:00
|
|
|
// The result of finishcompare MUST be assigned back to n, e.g.
|
cmd/compile: reduce use of **Node parameters
Escape analysis has a hard time with tree-like
structures (see #13493 and #14858).
This is unlikely to change.
As a result, when invoking a function that accepts
a **Node parameter, we usually allocate a *Node
on the heap. This happens a whole lot.
This CL changes functions from taking a **Node
to acting more like append: It both modifies
the input and returns a replacement for it.
Because of the cascading nature of escape analysis,
in order to get the benefits, I had to modify
almost all such functions. The remaining functions
are in racewalk and the backend. I would be happy
to update them as well in a separate CL.
This CL was created by manually updating the
function signatures and the directly impacted
bits of code. The callsites were then automatically
updated using a bespoke script:
https://gist.github.com/josharian/046b1be7aceae244de39
For ease of reviewing and future understanding,
this CL is also broken down into four CLs,
mailed separately, which show the manual
and the automated changes separately.
They are CLs 20990, 20991, 20992, and 20993.
Passes toolstash -cmp.
name old time/op new time/op delta
Template 335ms ± 5% 324ms ± 5% -3.35% (p=0.000 n=23+24)
Unicode 176ms ± 9% 165ms ± 6% -6.12% (p=0.000 n=23+24)
GoTypes 1.10s ± 4% 1.07s ± 2% -2.77% (p=0.000 n=24+24)
Compiler 5.31s ± 3% 5.15s ± 3% -2.95% (p=0.000 n=24+24)
MakeBash 41.6s ± 1% 41.7s ± 2% ~ (p=0.586 n=23+23)
name old alloc/op new alloc/op delta
Template 63.3MB ± 0% 62.4MB ± 0% -1.36% (p=0.000 n=25+23)
Unicode 42.4MB ± 0% 41.6MB ± 0% -1.99% (p=0.000 n=24+25)
GoTypes 220MB ± 0% 217MB ± 0% -1.11% (p=0.000 n=25+25)
Compiler 994MB ± 0% 973MB ± 0% -2.08% (p=0.000 n=24+25)
name old allocs/op new allocs/op delta
Template 681k ± 0% 574k ± 0% -15.71% (p=0.000 n=24+25)
Unicode 518k ± 0% 413k ± 0% -20.34% (p=0.000 n=25+24)
GoTypes 2.08M ± 0% 1.78M ± 0% -14.62% (p=0.000 n=25+25)
Compiler 9.26M ± 0% 7.64M ± 0% -17.48% (p=0.000 n=25+25)
name old text-bytes new text-bytes delta
HelloSize 578k ± 0% 578k ± 0% ~ (all samples are equal)
CmdGoSize 6.46M ± 0% 6.46M ± 0% ~ (all samples are equal)
name old data-bytes new data-bytes delta
HelloSize 128k ± 0% 128k ± 0% ~ (all samples are equal)
CmdGoSize 281k ± 0% 281k ± 0% ~ (all samples are equal)
name old exe-bytes new exe-bytes delta
HelloSize 921k ± 0% 921k ± 0% ~ (all samples are equal)
CmdGoSize 9.86M ± 0% 9.86M ± 0% ~ (all samples are equal)
Change-Id: I277d95bd56d51c166ef7f560647aeaa092f3f475
Reviewed-on: https://go-review.googlesource.com/20959
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-20 08:03:31 -07:00
|
|
|
// n.Left = finishcompare(n.Left, x, r, init)
|
2016-03-23 16:43:17 -07:00
|
|
|
func finishcompare(n, r *Node, init *Nodes) *Node {
|
2018-11-18 08:34:38 -08:00
|
|
|
r = typecheck(r, ctxExpr)
|
2018-03-01 09:26:38 -05:00
|
|
|
r = conv(r, n.Type)
|
2018-02-25 18:14:20 +09:00
|
|
|
r = walkexpr(r, init)
|
|
|
|
|
return r
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// return 1 if integer n must be in range [0, max), 0 otherwise
|
2015-02-17 22:13:49 -05:00
|
|
|
func bounded(n *Node, max int64) bool {
|
2016-03-30 15:09:25 -07:00
|
|
|
if n.Type == nil || !n.Type.IsInteger() {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-30 15:09:25 -07:00
|
|
|
sign := n.Type.IsSigned()
|
2015-02-23 16:07:24 -05:00
|
|
|
bits := int32(8 * n.Type.Width)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2016-09-15 14:34:20 +10:00
|
|
|
if smallintconst(n) {
|
2020-10-12 15:02:59 +02:00
|
|
|
v := n.Int64Val()
|
2015-02-17 22:13:49 -05:00
|
|
|
return 0 <= v && v < max
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
cmd/compile: defer lowering OANDNOT until SSA
Currently, "x &^ y" gets rewriten into "x & ^y" during walk. It adds
unnecessary complexity to other parts, which must aware about this.
Instead, we can just implement "&^" in the conversion to SSA, so "&^"
can be handled like other binary operators.
However, this CL does not pass toolstash-check. It seems that implements
"&^" in the conversion to SSA causes registers allocation change.
With the parent:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ "".n(SP), CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ DX, CX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, CX
With this CL:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ "".n(SP), DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ CX, DX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, DX
Change-Id: I80acf8496a91be4804fb7ef3df04c19baae2754c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264660
Trust: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Run-TryBot: Cuong Manh Le <cuong.manhle.vn@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-10-24 10:40:09 +07:00
|
|
|
case OAND, OANDNOT:
|
2015-02-23 16:07:24 -05:00
|
|
|
v := int64(-1)
|
cmd/compile: defer lowering OANDNOT until SSA
Currently, "x &^ y" gets rewriten into "x & ^y" during walk. It adds
unnecessary complexity to other parts, which must aware about this.
Instead, we can just implement "&^" in the conversion to SSA, so "&^"
can be handled like other binary operators.
However, this CL does not pass toolstash-check. It seems that implements
"&^" in the conversion to SSA causes registers allocation change.
With the parent:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ "".n(SP), CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ DX, CX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, CX
With this CL:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ "".n(SP), DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ CX, DX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, DX
Change-Id: I80acf8496a91be4804fb7ef3df04c19baae2754c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264660
Trust: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Run-TryBot: Cuong Manh Le <cuong.manhle.vn@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-10-24 10:40:09 +07:00
|
|
|
switch {
|
|
|
|
|
case smallintconst(n.Left):
|
2020-10-12 15:02:59 +02:00
|
|
|
v = n.Left.Int64Val()
|
cmd/compile: defer lowering OANDNOT until SSA
Currently, "x &^ y" gets rewriten into "x & ^y" during walk. It adds
unnecessary complexity to other parts, which must aware about this.
Instead, we can just implement "&^" in the conversion to SSA, so "&^"
can be handled like other binary operators.
However, this CL does not pass toolstash-check. It seems that implements
"&^" in the conversion to SSA causes registers allocation change.
With the parent:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ "".n(SP), CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ DX, CX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, CX
With this CL:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ "".n(SP), DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ CX, DX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, DX
Change-Id: I80acf8496a91be4804fb7ef3df04c19baae2754c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264660
Trust: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Run-TryBot: Cuong Manh Le <cuong.manhle.vn@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-10-24 10:40:09 +07:00
|
|
|
case smallintconst(n.Right):
|
2020-10-12 15:02:59 +02:00
|
|
|
v = n.Right.Int64Val()
|
cmd/compile: defer lowering OANDNOT until SSA
Currently, "x &^ y" gets rewriten into "x & ^y" during walk. It adds
unnecessary complexity to other parts, which must aware about this.
Instead, we can just implement "&^" in the conversion to SSA, so "&^"
can be handled like other binary operators.
However, this CL does not pass toolstash-check. It seems that implements
"&^" in the conversion to SSA causes registers allocation change.
With the parent:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ "".n(SP), CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ DX, CX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, CX
With this CL:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ "".n(SP), DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ CX, DX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, DX
Change-Id: I80acf8496a91be4804fb7ef3df04c19baae2754c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264660
Trust: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Run-TryBot: Cuong Manh Le <cuong.manhle.vn@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-10-24 10:40:09 +07:00
|
|
|
if n.Op == OANDNOT {
|
|
|
|
|
v = ^v
|
|
|
|
|
if !sign {
|
|
|
|
|
v &= 1<<uint(bits) - 1
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if 0 <= v && v < max {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OMOD:
|
2016-09-15 14:34:20 +10:00
|
|
|
if !sign && smallintconst(n.Right) {
|
2020-10-12 15:02:59 +02:00
|
|
|
v := n.Right.Int64Val()
|
2015-02-13 14:40:36 -05:00
|
|
|
if 0 <= v && v <= max {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ODIV:
|
2016-09-15 14:34:20 +10:00
|
|
|
if !sign && smallintconst(n.Right) {
|
2020-10-12 15:02:59 +02:00
|
|
|
v := n.Right.Int64Val()
|
2015-02-13 14:40:36 -05:00
|
|
|
for bits > 0 && v >= 2 {
|
|
|
|
|
bits--
|
|
|
|
|
v >>= 1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ORSH:
|
2016-09-15 14:34:20 +10:00
|
|
|
if !sign && smallintconst(n.Right) {
|
2020-10-12 15:02:59 +02:00
|
|
|
v := n.Right.Int64Val()
|
2015-02-13 14:40:36 -05:00
|
|
|
if v > int64(bits) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
bits -= int32(v)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-01 07:54:01 +00:00
|
|
|
if !sign && bits <= 62 && 1<<uint(bits) <= max {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2017-06-14 14:03:46 -07:00
|
|
|
// usemethod checks interface method calls for uses of reflect.Type.Method.
|
2016-03-10 16:15:26 -05:00
|
|
|
func usemethod(n *Node) {
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
|
|
|
|
|
// Looking for either of:
|
|
|
|
|
// Method(int) reflect.Method
|
|
|
|
|
// MethodByName(string) (reflect.Method, bool)
|
|
|
|
|
//
|
|
|
|
|
// TODO(crawshaw): improve precision of match by working out
|
|
|
|
|
// how to check the method name.
|
2017-05-02 09:16:22 -07:00
|
|
|
if n := t.NumParams(); n != 1 {
|
2016-03-10 16:15:26 -05:00
|
|
|
return
|
|
|
|
|
}
|
2017-05-02 09:16:22 -07:00
|
|
|
if n := t.NumResults(); n != 1 && n != 2 {
|
2016-03-10 16:15:26 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
p0 := t.Params().Field(0)
|
|
|
|
|
res0 := t.Results().Field(0)
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
var res1 *types.Field
|
2017-05-02 09:16:22 -07:00
|
|
|
if t.NumResults() == 2 {
|
2016-03-10 16:15:26 -05:00
|
|
|
res1 = t.Results().Field(1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if res1 == nil {
|
|
|
|
|
if p0.Type.Etype != TINT {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2016-03-30 14:56:08 -07:00
|
|
|
if !p0.Type.IsString() {
|
2016-03-10 16:15:26 -05:00
|
|
|
return
|
|
|
|
|
}
|
2016-03-30 14:56:08 -07:00
|
|
|
if !res1.Type.IsBoolean() {
|
2016-03-10 16:15:26 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-14 14:03:46 -07:00
|
|
|
// Note: Don't rely on res0.Type.String() since its formatting depends on multiple factors
|
|
|
|
|
// (including global variables such as numImports - was issue #19028).
|
2020-04-18 17:47:54 -04:00
|
|
|
// Also need to check for reflect package itself (see Issue #38515).
|
2020-04-19 22:18:15 +07:00
|
|
|
if s := res0.Type.Sym; s != nil && s.Name == "Method" && isReflectPkg(s.Pkg) {
|
2017-06-14 14:03:46 -07:00
|
|
|
Curfn.Func.SetReflectMethod(true)
|
2020-09-20 23:29:20 -04:00
|
|
|
// The LSym is initialized at this point. We need to set the attribute on the LSym.
|
|
|
|
|
Curfn.Func.lsym.Set(obj.AttrReflectMethod, true)
|
2017-06-14 14:03:46 -07:00
|
|
|
}
|
2016-03-10 16:15:26 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
func usefield(n *Node) {
|
2017-04-18 12:53:25 -07:00
|
|
|
if objabi.Fieldtrack_enabled == 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("usefield %v", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case ODOT, ODOTPTR:
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: change ODOT and friends to use Sym, not Right
The Node type ODOT and its variants all represent a selector, with a
simple name to the right of the dot. Before this change this was
represented by using an ONAME Node in the Right field. This ONAME node
served no useful purpose. This CL changes these Node types to store the
symbol in the Sym field instead, thus not requiring allocating a Node
for each selector.
When compiling x/tools/go/types this CL eliminates nearly 5000 calls to
newname and reduces the total number of Nodes allocated by about 6.6%.
It seems to cut compilation time by 1 to 2 percent.
Getting this right was somewhat subtle, and I added two dubious changes
to produce the exact same output as before. One is to ishairy in
inl.go: the ONAME node increased the cost of ODOT and friends by 1, and
I retained that, although really ODOT is not more expensive than any
other node. The other is to varexpr in walk.go: because the ONAME in
the Right field of an ODOT has no class, varexpr would always return
false for an ODOT, although in fact for some ODOT's it seemingly ought
to return true; I added an && false for now. I will send separate CLs,
that will break toolstash -cmp, to clean these up.
This CL passes toolstash -cmp.
Change-Id: I4af8a10cc59078c436130ce472f25abc3a9b2f80
Reviewed-on: https://go-review.googlesource.com/20890
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-03-18 16:52:30 -07:00
|
|
|
if n.Sym == nil {
|
2016-03-02 20:54:41 -08:00
|
|
|
// No field name. This DOTPTR was built by the compiler for access
|
|
|
|
|
// to runtime data structures. Ignore.
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-05-26 21:49:31 -04:00
|
|
|
t := n.Left.Type
|
2016-03-30 15:09:25 -07:00
|
|
|
if t.IsPtr() {
|
2016-03-30 10:57:47 -07:00
|
|
|
t = t.Elem()
|
2015-05-26 21:49:31 -04:00
|
|
|
}
|
cmd/compile: fix panic in field tracking logic
Within the frontend, we generally don't guarantee uniqueness of
anonymous types. For example, each struct type literal gets
represented by its own types.Type instance.
However, the field tracking code was using the struct type as a map
key. This broke in golang.org/cl/256457, because that CL started
changing the inlined parameter variables from using the types.Type of
the declared parameter to that of the call site argument. These are
always identical types (e.g., types.Identical would report true), but
they can be different pointer values, causing the map lookup to fail.
The easiest fix is to simply get rid of the map and instead use
Node.Opt for tracking the types.Field. To mitigate against more latent
field tracking failures (e.g., if any other code were to start trying
to use Opt on ODOT/ODOTPTR fields), we store this field
unconditionally. I also expect having the types.Field will be useful
to other frontend code in the future.
Finally, to make it easier to test field tracking without having to
run make.bash with GOEXPERIMENT=fieldtrack, this commit adds a
-d=fieldtrack flag as an alternative way to enable field tracking
within the compiler. See also #42681.
Fixes #42686.
Change-Id: I6923d206d5e2cab1e6798cba36cae96c1eeaea55
Reviewed-on: https://go-review.googlesource.com/c/go/+/271217
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: Matthew Dempsky <mdempsky@google.com>
2020-11-18 12:08:59 -08:00
|
|
|
field := n.Opt().(*types.Field)
|
2015-02-13 14:40:36 -05:00
|
|
|
if field == nil {
|
cmd/compile: change ODOT and friends to use Sym, not Right
The Node type ODOT and its variants all represent a selector, with a
simple name to the right of the dot. Before this change this was
represented by using an ONAME Node in the Right field. This ONAME node
served no useful purpose. This CL changes these Node types to store the
symbol in the Sym field instead, thus not requiring allocating a Node
for each selector.
When compiling x/tools/go/types this CL eliminates nearly 5000 calls to
newname and reduces the total number of Nodes allocated by about 6.6%.
It seems to cut compilation time by 1 to 2 percent.
Getting this right was somewhat subtle, and I added two dubious changes
to produce the exact same output as before. One is to ishairy in
inl.go: the ONAME node increased the cost of ODOT and friends by 1, and
I retained that, although really ODOT is not more expensive than any
other node. The other is to varexpr in walk.go: because the ONAME in
the Right field of an ODOT has no class, varexpr would always return
false for an ODOT, although in fact for some ODOT's it seemingly ought
to return true; I added an && false for now. I will send separate CLs,
that will break toolstash -cmp, to clean these up.
This CL passes toolstash -cmp.
Change-Id: I4af8a10cc59078c436130ce472f25abc3a9b2f80
Reviewed-on: https://go-review.googlesource.com/20890
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-03-18 16:52:30 -07:00
|
|
|
Fatalf("usefield %v %v without paramfld", n.Left.Type, n.Sym)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: fix panic in field tracking logic
Within the frontend, we generally don't guarantee uniqueness of
anonymous types. For example, each struct type literal gets
represented by its own types.Type instance.
However, the field tracking code was using the struct type as a map
key. This broke in golang.org/cl/256457, because that CL started
changing the inlined parameter variables from using the types.Type of
the declared parameter to that of the call site argument. These are
always identical types (e.g., types.Identical would report true), but
they can be different pointer values, causing the map lookup to fail.
The easiest fix is to simply get rid of the map and instead use
Node.Opt for tracking the types.Field. To mitigate against more latent
field tracking failures (e.g., if any other code were to start trying
to use Opt on ODOT/ODOTPTR fields), we store this field
unconditionally. I also expect having the types.Field will be useful
to other frontend code in the future.
Finally, to make it easier to test field tracking without having to
run make.bash with GOEXPERIMENT=fieldtrack, this commit adds a
-d=fieldtrack flag as an alternative way to enable field tracking
within the compiler. See also #42681.
Fixes #42686.
Change-Id: I6923d206d5e2cab1e6798cba36cae96c1eeaea55
Reviewed-on: https://go-review.googlesource.com/c/go/+/271217
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: Matthew Dempsky <mdempsky@google.com>
2020-11-18 12:08:59 -08:00
|
|
|
if field.Sym != n.Sym || field.Offset != n.Xoffset {
|
|
|
|
|
Fatalf("field inconsistency: %v,%v != %v,%v", field.Sym, field.Offset, n.Sym, n.Xoffset)
|
|
|
|
|
}
|
2016-04-25 13:24:48 -07:00
|
|
|
if !strings.Contains(field.Note, "go:\"track\"") {
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-10 16:15:44 -08:00
|
|
|
outer := n.Left.Type
|
2016-03-30 15:09:25 -07:00
|
|
|
if outer.IsPtr() {
|
2016-03-30 10:57:47 -07:00
|
|
|
outer = outer.Elem()
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2016-03-10 16:15:44 -08:00
|
|
|
if outer.Sym == nil {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("tracked field must be in named struct type")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
2018-04-09 15:22:01 -07:00
|
|
|
if !types.IsExported(field.Sym.Name) {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("tracked field must be exported (upper case)")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-10 16:15:44 -08:00
|
|
|
sym := tracksym(outer, field)
|
|
|
|
|
if Curfn.Func.FieldTrack == nil {
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
Curfn.Func.FieldTrack = make(map[*types.Sym]struct{})
|
2016-03-10 16:15:44 -08:00
|
|
|
}
|
|
|
|
|
Curfn.Func.FieldTrack[sym] = struct{}{}
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-08 10:26:20 -08:00
|
|
|
func candiscardlist(l Nodes) bool {
|
2016-03-08 15:10:26 -08:00
|
|
|
for _, n := range l.Slice() {
|
|
|
|
|
if !candiscard(n) {
|
2016-02-27 14:31:33 -08:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func candiscard(n *Node) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n == nil {
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Discardable as long as the subpieces are.
|
|
|
|
|
case ONAME,
|
|
|
|
|
ONONAME,
|
|
|
|
|
OTYPE,
|
|
|
|
|
OPACK,
|
|
|
|
|
OLITERAL,
|
2020-11-13 20:38:21 -08:00
|
|
|
ONIL,
|
2015-02-13 14:40:36 -05:00
|
|
|
OADD,
|
|
|
|
|
OSUB,
|
|
|
|
|
OOR,
|
|
|
|
|
OXOR,
|
|
|
|
|
OADDSTR,
|
|
|
|
|
OADDR,
|
|
|
|
|
OANDAND,
|
2018-11-18 08:34:38 -08:00
|
|
|
OBYTES2STR,
|
|
|
|
|
ORUNES2STR,
|
|
|
|
|
OSTR2BYTES,
|
|
|
|
|
OSTR2RUNES,
|
2015-02-13 14:40:36 -05:00
|
|
|
OCAP,
|
|
|
|
|
OCOMPLIT,
|
|
|
|
|
OMAPLIT,
|
|
|
|
|
OSTRUCTLIT,
|
|
|
|
|
OARRAYLIT,
|
2016-06-19 07:20:28 -07:00
|
|
|
OSLICELIT,
|
2015-02-13 14:40:36 -05:00
|
|
|
OPTRLIT,
|
|
|
|
|
OCONV,
|
|
|
|
|
OCONVIFACE,
|
|
|
|
|
OCONVNOP,
|
|
|
|
|
ODOT,
|
|
|
|
|
OEQ,
|
|
|
|
|
ONE,
|
|
|
|
|
OLT,
|
|
|
|
|
OLE,
|
|
|
|
|
OGT,
|
|
|
|
|
OGE,
|
|
|
|
|
OKEY,
|
2016-10-12 15:48:18 -07:00
|
|
|
OSTRUCTKEY,
|
2015-02-13 14:40:36 -05:00
|
|
|
OLEN,
|
|
|
|
|
OMUL,
|
|
|
|
|
OLSH,
|
|
|
|
|
ORSH,
|
|
|
|
|
OAND,
|
|
|
|
|
OANDNOT,
|
|
|
|
|
ONEW,
|
|
|
|
|
ONOT,
|
2018-11-18 08:34:38 -08:00
|
|
|
OBITNOT,
|
2015-02-13 14:40:36 -05:00
|
|
|
OPLUS,
|
2018-11-18 08:34:38 -08:00
|
|
|
ONEG,
|
2015-02-13 14:40:36 -05:00
|
|
|
OOROR,
|
|
|
|
|
OPAREN,
|
|
|
|
|
ORUNESTR,
|
|
|
|
|
OREAL,
|
|
|
|
|
OIMAG,
|
|
|
|
|
OCOMPLEX:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
// Discardable as long as we know it's not division by zero.
|
2015-04-01 09:38:44 -07:00
|
|
|
case ODIV, OMOD:
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(n.Right, constant.Int) && n.Right.Val().U.(*Mpint).CmpInt64(0) != 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(n.Right, constant.Float) && n.Right.Val().U.(*Mpflt).CmpFloat64(0) != 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Discardable as long as we know it won't fail because of a bad size.
|
2015-04-01 09:38:44 -07:00
|
|
|
case OMAKECHAN, OMAKEMAP:
|
2020-11-23 13:42:43 -08:00
|
|
|
if Isconst(n.Left, constant.Int) && n.Left.Val().U.(*Mpint).CmpInt64(0) == 0 {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// Difficult to tell what sizes are okay.
|
|
|
|
|
case OMAKESLICE:
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2018-10-23 13:50:07 +02:00
|
|
|
|
|
|
|
|
case OMAKESLICECOPY:
|
|
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-03 20:48:00 -08:00
|
|
|
if !candiscard(n.Left) || !candiscard(n.Right) || !candiscardlist(n.Ninit) || !candiscardlist(n.Nbody) || !candiscardlist(n.List) || !candiscardlist(n.Rlist) {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
// Rewrite
|
|
|
|
|
// go builtin(x, y, z)
|
2015-02-13 14:40:36 -05:00
|
|
|
// into
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
// go func(a1, a2, a3) {
|
|
|
|
|
// builtin(a1, a2, a3)
|
2015-02-13 14:40:36 -05:00
|
|
|
// }(x, y, z)
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
// for print, println, and delete.
|
2020-09-08 15:28:43 +07:00
|
|
|
//
|
|
|
|
|
// Rewrite
|
|
|
|
|
// go f(x, y, uintptr(unsafe.Pointer(z)))
|
|
|
|
|
// into
|
|
|
|
|
// go func(a1, a2, a3) {
|
|
|
|
|
// builtin(a1, a2, uintptr(a3))
|
|
|
|
|
// }(x, y, unsafe.Pointer(z))
|
|
|
|
|
// for function contains unsafe-uintptr arguments.
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
var wrapCall_prgen int
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
// The result of wrapCall MUST be assigned back to n, e.g.
|
|
|
|
|
// n.Left = wrapCall(n.Left, init)
|
|
|
|
|
func wrapCall(n *Node, init *Nodes) *Node {
|
2016-03-08 15:10:26 -08:00
|
|
|
if n.Ninit.Len() != 0 {
|
2016-03-07 22:54:46 -08:00
|
|
|
walkstmtlist(n.Ninit.Slice())
|
|
|
|
|
init.AppendNodes(&n.Ninit)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2020-09-08 15:28:43 +07:00
|
|
|
isBuiltinCall := n.Op != OCALLFUNC && n.Op != OCALLMETH && n.Op != OCALLINTER
|
2020-10-17 01:10:06 -07:00
|
|
|
|
|
|
|
|
// Turn f(a, b, []T{c, d, e}...) back into f(a, b, c, d, e).
|
|
|
|
|
if !isBuiltinCall && n.IsDDD() {
|
|
|
|
|
last := n.List.Len() - 1
|
|
|
|
|
if va := n.List.Index(last); va.Op == OSLICELIT {
|
|
|
|
|
n.List.Set(append(n.List.Slice()[:last], va.List.Slice()...))
|
|
|
|
|
n.SetIsDDD(false)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-08 15:28:43 +07:00
|
|
|
// origArgs keeps track of what argument is uintptr-unsafe/unsafe-uintptr conversion.
|
|
|
|
|
origArgs := make([]*Node, n.List.Len())
|
2016-09-16 11:00:54 +10:00
|
|
|
t := nod(OTFUNC, nil, nil)
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
for i, arg := range n.List.Slice() {
|
2018-04-18 23:22:26 -07:00
|
|
|
s := lookupN("a", i)
|
2020-09-09 12:09:26 +07:00
|
|
|
if !isBuiltinCall && arg.Op == OCONVNOP && arg.Type.IsUintptr() && arg.Left.Type.IsUnsafePtr() {
|
2020-09-08 15:28:43 +07:00
|
|
|
origArgs[i] = arg
|
|
|
|
|
arg = arg.Left
|
|
|
|
|
n.List.SetIndex(i, arg)
|
|
|
|
|
}
|
2018-04-18 23:22:26 -07:00
|
|
|
t.List.Append(symfield(s, arg.Type))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: fix miscompilation of "defer delete(m, k)"
Previously, for slow map key types (i.e., any type other than a 32-bit
or 64-bit plain memory type), we would rewrite
defer delete(m, k)
into
ktmp := k
defer delete(m, &ktmp)
However, if the defer statement was inside a loop, we would end up
reusing the same ktmp value for all of the deferred deletes.
We already rewrite
defer print(x, y, z)
into
defer func(a1, a2, a3) {
print(a1, a2, a3)
}(x, y, z)
This CL generalizes this rewrite to also apply for slow map deletes.
This could be extended to apply even more generally to other builtins,
but as discussed on #24259, there are cases where we must *not* do
this (e.g., "defer recover()"). However, if we elect to do this more
generally, this CL should still make that easier.
Lastly, while here, fix a few isues in wrapCall (nee walkprintfunc):
1) lookupN appends the generation number to the symbol anyway, so "%d"
was being literally included in the generated function names.
2) walkstmt will be called when the function is compiled later anyway,
so no need to do it now.
Fixes #24259.
Change-Id: I70286867c64c69c18e9552f69e3f4154a0fc8b04
Reviewed-on: https://go-review.googlesource.com/99017
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-03-06 14:36:49 -08:00
|
|
|
wrapCall_prgen++
|
|
|
|
|
sym := lookupN("wrap·", wrapCall_prgen)
|
2017-04-10 13:03:14 -07:00
|
|
|
fn := dclfunc(sym, t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-09-08 15:28:43 +07:00
|
|
|
args := paramNnames(t.Type)
|
|
|
|
|
for i, origArg := range origArgs {
|
|
|
|
|
if origArg == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
arg := nod(origArg.Op, args[i], nil)
|
|
|
|
|
arg.Type = origArg.Type
|
|
|
|
|
args[i] = arg
|
|
|
|
|
}
|
|
|
|
|
call := nod(n.Op, nil, nil)
|
|
|
|
|
if !isBuiltinCall {
|
|
|
|
|
call.Op = OCALL
|
|
|
|
|
call.Left = n.Left
|
2020-09-17 21:38:42 -07:00
|
|
|
call.SetIsDDD(n.IsDDD())
|
2020-09-08 15:28:43 +07:00
|
|
|
}
|
|
|
|
|
call.List.Set(args)
|
|
|
|
|
fn.Nbody.Set1(call)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2017-08-09 16:13:09 +09:00
|
|
|
funcbody()
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2018-11-18 08:34:38 -08:00
|
|
|
fn = typecheck(fn, ctxStmt)
|
|
|
|
|
typecheckslice(fn.Nbody.Slice(), ctxStmt)
|
2016-03-09 20:29:21 -08:00
|
|
|
xtop = append(xtop, fn)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2020-09-08 15:28:43 +07:00
|
|
|
call = nod(OCALL, nil, nil)
|
|
|
|
|
call.Left = fn.Func.Nname
|
|
|
|
|
call.List.Set(n.List.Slice())
|
|
|
|
|
call = typecheck(call, ctxStmt)
|
|
|
|
|
call = walkexpr(call, init)
|
|
|
|
|
return call
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
|
|
|
|
|
// substArgTypes substitutes the given list of types for
|
|
|
|
|
// successive occurrences of the "any" placeholder in the
|
|
|
|
|
// type syntax expression n.Type.
|
|
|
|
|
// The result of substArgTypes MUST be assigned back to old, e.g.
|
|
|
|
|
// n.Left = substArgTypes(n.Left, t1, t2)
|
|
|
|
|
func substArgTypes(old *Node, types_ ...*types.Type) *Node {
|
2018-09-20 15:22:33 -07:00
|
|
|
n := old.copy()
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
|
|
|
|
|
for _, t := range types_ {
|
|
|
|
|
dowidth(t)
|
|
|
|
|
}
|
|
|
|
|
n.Type = types.SubstAny(n.Type, &types_)
|
|
|
|
|
if len(types_) > 0 {
|
|
|
|
|
Fatalf("substArgTypes: too many argument types")
|
|
|
|
|
}
|
2017-10-23 19:57:07 +01:00
|
|
|
return n
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
}
|
2018-03-26 21:18:27 +01:00
|
|
|
|
|
|
|
|
// canMergeLoads reports whether the backend optimization passes for
|
|
|
|
|
// the current architecture can combine adjacent loads into a single
|
|
|
|
|
// larger, possibly unaligned, load. Note that currently the
|
|
|
|
|
// optimizations must be able to handle little endian byte order.
|
|
|
|
|
func canMergeLoads() bool {
|
|
|
|
|
switch thearch.LinkArch.Family {
|
|
|
|
|
case sys.ARM64, sys.AMD64, sys.I386, sys.S390X:
|
|
|
|
|
return true
|
|
|
|
|
case sys.PPC64:
|
|
|
|
|
// Load combining only supported on ppc64le.
|
|
|
|
|
return thearch.LinkArch.ByteOrder == binary.LittleEndian
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2018-04-24 15:13:08 +02:00
|
|
|
|
|
|
|
|
// isRuneCount reports whether n is of the form len([]rune(string)).
|
2018-06-01 19:43:58 +02:00
|
|
|
// These are optimized into a call to runtime.countrunes.
|
2018-04-24 15:13:08 +02:00
|
|
|
func isRuneCount(n *Node) bool {
|
2020-10-19 11:31:10 +02:00
|
|
|
return Debug.N == 0 && !instrumenting && n.Op == OLEN && n.Left.Op == OSTR2RUNES
|
2018-04-24 15:13:08 +02:00
|
|
|
}
|
2019-02-12 19:40:42 -08:00
|
|
|
|
2019-10-17 14:29:16 -07:00
|
|
|
func walkCheckPtrAlignment(n *Node, init *Nodes, count *Node) *Node {
|
|
|
|
|
if !n.Type.IsPtr() {
|
|
|
|
|
Fatalf("expected pointer type: %v", n.Type)
|
|
|
|
|
}
|
|
|
|
|
elem := n.Type.Elem()
|
|
|
|
|
if count != nil {
|
|
|
|
|
if !elem.IsArray() {
|
|
|
|
|
Fatalf("expected array type: %v", elem)
|
|
|
|
|
}
|
|
|
|
|
elem = elem.Elem()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size := elem.Size()
|
|
|
|
|
if elem.Alignment() == 1 && (size == 0 || size == 1 && count == nil) {
|
2019-02-12 19:40:42 -08:00
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-17 14:29:16 -07:00
|
|
|
if count == nil {
|
|
|
|
|
count = nodintconst(1)
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-12 19:40:42 -08:00
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2019-10-17 14:29:16 -07:00
|
|
|
init.Append(mkcall("checkptrAlignment", nil, init, convnop(n.Left, types.Types[TUNSAFEPTR]), typename(elem), conv(count, types.Types[TUINTPTR])))
|
2019-02-12 19:40:42 -08:00
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var walkCheckPtrArithmeticMarker byte
|
|
|
|
|
|
|
|
|
|
func walkCheckPtrArithmetic(n *Node, init *Nodes) *Node {
|
|
|
|
|
// Calling cheapexpr(n, init) below leads to a recursive call
|
|
|
|
|
// to walkexpr, which leads us back here again. Use n.Opt to
|
|
|
|
|
// prevent infinite loops.
|
2019-10-21 23:25:32 +07:00
|
|
|
if opt := n.Opt(); opt == &walkCheckPtrArithmeticMarker {
|
2019-02-12 19:40:42 -08:00
|
|
|
return n
|
2019-10-21 23:25:32 +07:00
|
|
|
} else if opt != nil {
|
|
|
|
|
// We use n.Opt() here because today it's not used for OCONVNOP. If that changes,
|
|
|
|
|
// there's no guarantee that temporarily replacing it is safe, so just hard fail here.
|
|
|
|
|
Fatalf("unexpected Opt: %v", opt)
|
2019-02-12 19:40:42 -08:00
|
|
|
}
|
|
|
|
|
n.SetOpt(&walkCheckPtrArithmeticMarker)
|
|
|
|
|
defer n.SetOpt(nil)
|
|
|
|
|
|
|
|
|
|
// TODO(mdempsky): Make stricter. We only need to exempt
|
|
|
|
|
// reflect.Value.Pointer and reflect.Value.UnsafeAddr.
|
|
|
|
|
switch n.Left.Op {
|
|
|
|
|
case OCALLFUNC, OCALLMETH, OCALLINTER:
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-21 12:44:42 -07:00
|
|
|
if n.Left.Op == ODOTPTR && isReflectHeaderDataField(n.Left) {
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-12 19:40:42 -08:00
|
|
|
// Find original unsafe.Pointer operands involved in this
|
|
|
|
|
// arithmetic expression.
|
|
|
|
|
//
|
|
|
|
|
// "It is valid both to add and to subtract offsets from a
|
|
|
|
|
// pointer in this way. It is also valid to use &^ to round
|
|
|
|
|
// pointers, usually for alignment."
|
|
|
|
|
var originals []*Node
|
|
|
|
|
var walk func(n *Node)
|
|
|
|
|
walk = func(n *Node) {
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OADD:
|
|
|
|
|
walk(n.Left)
|
|
|
|
|
walk(n.Right)
|
cmd/compile: defer lowering OANDNOT until SSA
Currently, "x &^ y" gets rewriten into "x & ^y" during walk. It adds
unnecessary complexity to other parts, which must aware about this.
Instead, we can just implement "&^" in the conversion to SSA, so "&^"
can be handled like other binary operators.
However, this CL does not pass toolstash-check. It seems that implements
"&^" in the conversion to SSA causes registers allocation change.
With the parent:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ "".n(SP), CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ DX, CX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, CX
With this CL:
obj: 00212 (.../src/runtime/complex.go:47) MOVQ X0, AX
obj: 00213 (.../src/runtime/complex.go:47) BTRQ $63, AX
obj: 00214 (.../src/runtime/complex.go:47) MOVQ $-9223372036854775808, CX
obj: 00215 (.../src/runtime/complex.go:47) MOVQ "".n(SP), DX
obj: 00216 (.../src/runtime/complex.go:47) ANDQ CX, DX
obj: 00217 (.../src/runtime/complex.go:47) ORQ AX, DX
Change-Id: I80acf8496a91be4804fb7ef3df04c19baae2754c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264660
Trust: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Run-TryBot: Cuong Manh Le <cuong.manhle.vn@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2020-10-24 10:40:09 +07:00
|
|
|
case OSUB, OANDNOT:
|
2019-02-12 19:40:42 -08:00
|
|
|
walk(n.Left)
|
|
|
|
|
case OCONVNOP:
|
2020-09-09 12:06:18 +07:00
|
|
|
if n.Left.Type.IsUnsafePtr() {
|
2019-02-12 19:40:42 -08:00
|
|
|
n.Left = cheapexpr(n.Left, init)
|
2019-10-17 13:16:15 -07:00
|
|
|
originals = append(originals, convnop(n.Left, types.Types[TUNSAFEPTR]))
|
2019-02-12 19:40:42 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
walk(n.Left)
|
|
|
|
|
|
|
|
|
|
n = cheapexpr(n, init)
|
|
|
|
|
|
2020-04-21 15:37:29 -07:00
|
|
|
slice := mkdotargslice(types.NewSlice(types.Types[TUNSAFEPTR]), originals)
|
|
|
|
|
slice.Esc = EscNone
|
2019-02-12 19:40:42 -08:00
|
|
|
|
2019-10-17 13:16:15 -07:00
|
|
|
init.Append(mkcall("checkptrArithmetic", nil, init, convnop(n, types.Types[TUNSAFEPTR]), slice))
|
2020-01-14 09:50:43 -08:00
|
|
|
// TODO(khr): Mark backing store of slice as dead. This will allow us to reuse
|
|
|
|
|
// the backing store for multiple calls to checkptrArithmetic.
|
|
|
|
|
|
2019-02-12 19:40:42 -08:00
|
|
|
return n
|
|
|
|
|
}
|
2019-10-17 12:31:07 -07:00
|
|
|
|
|
|
|
|
// checkPtr reports whether pointer checking should be enabled for
|
2019-10-17 16:31:19 -07:00
|
|
|
// function fn at a given level. See debugHelpFooter for defined
|
|
|
|
|
// levels.
|
|
|
|
|
func checkPtr(fn *Node, level int) bool {
|
|
|
|
|
return Debug_checkptr >= level && fn.Func.Pragma&NoCheckPtr == 0
|
2019-10-17 12:31:07 -07:00
|
|
|
}
|