2016-03-11 14:28:16 -08:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
//go:generate go run mkbuiltin.go
|
|
|
|
|
|
|
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
2016-06-24 15:03:04 -07:00
|
|
|
"bytes"
|
cmd/compile: add framework for logging optimizer (non)actions to LSP
This is intended to allow IDEs to note where the optimizer
was not able to improve users' code. There may be other
applications for this, for example in studying effectiveness
of optimizer changes more quickly than running benchmarks,
or in verifying that code changes did not accidentally disable
optimizations in performance-critical code.
Logging of nilcheck (bad) for amd64 is implemented as
proof-of-concept. In general, the intent is that optimizations
that didn't happen are what will be logged, because that is
believed to be what IDE users want.
Added flag -json=version,dest
Check that version=0. (Future compilers will support a
few recent versions, I hope that version is always <=3.)
Dest is expected to be one of:
/path (or \path in Windows)
will create directory /path and fill it w/ json files
file://path
will create directory path, intended either for
I:\dont\know\enough\about\windows\paths
trustme_I_know_what_I_am_doing_probably_testing
Not passing an absolute path name usually leads to
json splattered all over source directories,
or failure when those directories are not writeable.
If you want a foot-gun, you have to ask for it.
The JSON output is directed to subdirectories of dest,
where each subdirectory is net/url.PathEscape of the
package name, and each for each foo.go in the package,
net/url.PathEscape(foo).json is created. The first line
of foo.json contains version and context information,
and subsequent lines contains LSP-conforming JSON
describing the missing optimizations.
Change-Id: Ib83176a53a8c177ee9081aefc5ae05604ccad8a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/204338
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-10-24 13:48:17 -04:00
|
|
|
"cmd/compile/internal/logopt"
|
2016-03-11 14:28:16 -08:00
|
|
|
"cmd/compile/internal/ssa"
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
"cmd/compile/internal/types"
|
2018-04-17 14:59:13 -07:00
|
|
|
"cmd/internal/bio"
|
2017-10-17 17:09:54 -04:00
|
|
|
"cmd/internal/dwarf"
|
2020-08-02 19:36:28 -04:00
|
|
|
"cmd/internal/goobj"
|
2016-03-11 14:28:16 -08:00
|
|
|
"cmd/internal/obj"
|
2017-04-18 12:53:25 -07:00
|
|
|
"cmd/internal/objabi"
|
2016-12-06 17:08:06 -08:00
|
|
|
"cmd/internal/src"
|
2016-04-06 12:01:40 -07:00
|
|
|
"cmd/internal/sys"
|
2016-03-11 14:28:16 -08:00
|
|
|
"flag"
|
|
|
|
|
"fmt"
|
2019-04-19 16:09:17 +00:00
|
|
|
"internal/goversion"
|
2016-03-11 14:28:16 -08:00
|
|
|
"io"
|
2017-05-31 11:19:54 -04:00
|
|
|
"io/ioutil"
|
2016-03-11 14:28:16 -08:00
|
|
|
"log"
|
|
|
|
|
"os"
|
|
|
|
|
"path"
|
2018-10-24 15:49:32 -07:00
|
|
|
"regexp"
|
2016-03-21 14:37:57 +11:00
|
|
|
"runtime"
|
2019-04-30 15:23:14 -04:00
|
|
|
"sort"
|
2016-03-11 14:28:16 -08:00
|
|
|
"strconv"
|
|
|
|
|
"strings"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
2020-01-06 10:31:39 -05:00
|
|
|
buildid string
|
|
|
|
|
spectre string
|
|
|
|
|
spectreIndex bool
|
2016-03-11 14:28:16 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
cmd/compile: don't update outer variables after capturevars is complete
When compiling concurrently, we walk all functions before compiling
any of them. Walking functions can cause variables to switch from
being non-addrtaken to addrtaken, e.g. to prepare for a runtime call.
Typechecking propagates addrtaken-ness of closure variables to
their outer variables, so that capturevars can decide whether to
pass the variable's value or a pointer to it.
When all functions are compiled immediately, as long as the containing
function is compiled prior to the closure, this propagation has no effect.
When compilation is deferred, though, in rare cases, this results in
a change in the addrtaken-ness of a variable in the outer function,
which in turn changes the compiler's output.
(This is rare because in a great many cases, a temporary has been
introduced, insulating the outer variable from modification.)
But concurrent compilation must generate identical results.
To fix this, track whether capturevars has run.
If it has, there is no need to update outer variables
when closure variables change.
Capturevars always runs before any functions are walked or compiled.
The remainder of the changes in this CL are to support the test.
In particular, -d=compilelater forces the compiler to walk all
functions before compiling any of them, despite being non-concurrent.
This is useful because -live is fundamentally incompatible with
concurrent compilation, but we want -c=1 to have no behavior changes.
Fixes #20250
Change-Id: I89bcb54268a41e8588af1ac8cc37fbef856a90c2
Reviewed-on: https://go-review.googlesource.com/42853
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2017-05-05 15:22:59 -07:00
|
|
|
Debug_append int
|
2019-02-12 19:40:42 -08:00
|
|
|
Debug_checkptr int
|
cmd/compile: don't update outer variables after capturevars is complete
When compiling concurrently, we walk all functions before compiling
any of them. Walking functions can cause variables to switch from
being non-addrtaken to addrtaken, e.g. to prepare for a runtime call.
Typechecking propagates addrtaken-ness of closure variables to
their outer variables, so that capturevars can decide whether to
pass the variable's value or a pointer to it.
When all functions are compiled immediately, as long as the containing
function is compiled prior to the closure, this propagation has no effect.
When compilation is deferred, though, in rare cases, this results in
a change in the addrtaken-ness of a variable in the outer function,
which in turn changes the compiler's output.
(This is rare because in a great many cases, a temporary has been
introduced, insulating the outer variable from modification.)
But concurrent compilation must generate identical results.
To fix this, track whether capturevars has run.
If it has, there is no need to update outer variables
when closure variables change.
Capturevars always runs before any functions are walked or compiled.
The remainder of the changes in this CL are to support the test.
In particular, -d=compilelater forces the compiler to walk all
functions before compiling any of them, despite being non-concurrent.
This is useful because -live is fundamentally incompatible with
concurrent compilation, but we want -c=1 to have no behavior changes.
Fixes #20250
Change-Id: I89bcb54268a41e8588af1ac8cc37fbef856a90c2
Reviewed-on: https://go-review.googlesource.com/42853
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2017-05-05 15:22:59 -07:00
|
|
|
Debug_closure int
|
|
|
|
|
Debug_compilelater int
|
|
|
|
|
debug_dclstack int
|
2020-11-18 16:42:31 -08:00
|
|
|
Debug_dumpptrs int
|
2019-10-18 17:39:39 -07:00
|
|
|
Debug_libfuzzer int
|
cmd/compile: don't update outer variables after capturevars is complete
When compiling concurrently, we walk all functions before compiling
any of them. Walking functions can cause variables to switch from
being non-addrtaken to addrtaken, e.g. to prepare for a runtime call.
Typechecking propagates addrtaken-ness of closure variables to
their outer variables, so that capturevars can decide whether to
pass the variable's value or a pointer to it.
When all functions are compiled immediately, as long as the containing
function is compiled prior to the closure, this propagation has no effect.
When compilation is deferred, though, in rare cases, this results in
a change in the addrtaken-ness of a variable in the outer function,
which in turn changes the compiler's output.
(This is rare because in a great many cases, a temporary has been
introduced, insulating the outer variable from modification.)
But concurrent compilation must generate identical results.
To fix this, track whether capturevars has run.
If it has, there is no need to update outer variables
when closure variables change.
Capturevars always runs before any functions are walked or compiled.
The remainder of the changes in this CL are to support the test.
In particular, -d=compilelater forces the compiler to walk all
functions before compiling any of them, despite being non-concurrent.
This is useful because -live is fundamentally incompatible with
concurrent compilation, but we want -c=1 to have no behavior changes.
Fixes #20250
Change-Id: I89bcb54268a41e8588af1ac8cc37fbef856a90c2
Reviewed-on: https://go-review.googlesource.com/42853
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2017-05-05 15:22:59 -07:00
|
|
|
Debug_panic int
|
|
|
|
|
Debug_slice int
|
|
|
|
|
Debug_vlog bool
|
|
|
|
|
Debug_wb int
|
|
|
|
|
Debug_pctab string
|
[dev.debug] cmd/compile: better DWARF with optimizations on
Debuggers use DWARF information to find local variables on the
stack and in registers. Prior to this CL, the DWARF information for
functions claimed that all variables were on the stack at all times.
That's incorrect when optimizations are enabled, and results in
debuggers showing data that is out of date or complete gibberish.
After this CL, the compiler is capable of representing variable
locations more accurately, and attempts to do so. Due to limitations of
the SSA backend, it's not possible to be completely correct.
There are a number of problems in the current design. One of the easier
to understand is that variable names currently must be attached to an
SSA value, but not all assignments in the source code actually result
in machine code. For example:
type myint int
var a int
b := myint(int)
and
b := (*uint64)(unsafe.Pointer(a))
don't generate machine code because the underlying representation is the
same, so the correct value of b will not be set when the user would
expect.
Generating the more precise debug information is behind a flag,
dwarflocationlists. Because of the issues described above, setting the
flag may not make the debugging experience much better, and may actually
make it worse in cases where the variable actually is on the stack and
the more complicated analysis doesn't realize it.
A number of changes are included:
- Add a new pseudo-instruction, RegKill, which indicates that the value
in the register has been clobbered.
- Adjust regalloc to emit RegKills in the right places. Significantly,
this means that phis are mixed with StoreReg and RegKills after
regalloc.
- Track variable decomposition in ssa.LocalSlots.
- After the SSA backend is done, analyze the result and build location
lists for each LocalSlot.
- After assembly is done, update the location lists with the assembled
PC offsets, recompose variables, and build DWARF location lists. Emit the
list as a new linker symbol, one per function.
- In the linker, aggregate the location lists into a .debug_loc section.
TODO:
- currently disabled for non-X86/AMD64 because there are no data tables.
go build -toolexec 'toolstash -cmp' -a std succeeds.
With -dwarflocationlists false:
before: f02812195637909ff675782c0b46836a8ff01976
after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec
benchstat -geomean /tmp/220352263 /tmp/621364410
completed 15 of 15, estimated time remaining 0s (eta 3:52PM)
name old time/op new time/op delta
Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14)
Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15)
GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14)
Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15)
GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15)
Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13)
Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15)
XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15)
[Geo mean] 206ms 377ms +82.86%
name old user-time/op new user-time/op delta
Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15)
Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14)
GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15)
Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15)
GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15)
Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13)
Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15)
XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15)
[Geo mean] 317ms 583ms +83.72%
name old alloc/op new alloc/op delta
Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15)
Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15)
GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14)
Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15)
GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15)
Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15)
Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15)
XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15)
[Geo mean] 42.1MB 75.0MB +78.05%
name old allocs/op new allocs/op delta
Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15)
Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14)
GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14)
Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15)
GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15)
Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15)
Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15)
XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15)
[Geo mean] 439k 755k +72.01%
name old text-bytes new text-bytes delta
HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15)
name old data-bytes new data-bytes delta
HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal)
name old bss-bytes new bss-bytes delta
HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal)
name old exe-bytes new exe-bytes delta
HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal)
Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8
Reviewed-on: https://go-review.googlesource.com/41770
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2017-07-21 18:30:19 -04:00
|
|
|
Debug_locationlist int
|
2017-10-20 15:20:56 -07:00
|
|
|
Debug_typecheckinl int
|
2017-10-06 11:32:28 -04:00
|
|
|
Debug_gendwarfinl int
|
2017-11-10 18:08:48 +01:00
|
|
|
Debug_softfloat int
|
cmd/compile, cmd/link, runtime: make defers low-cost through inline code and extra funcdata
Generate inline code at defer time to save the args of defer calls to unique
(autotmp) stack slots, and generate inline code at exit time to check which defer
calls were made and make the associated function/method/interface calls. We
remember that a particular defer statement was reached by storing in the deferBits
variable (always stored on the stack). At exit time, we check the bits of the
deferBits variable to determine which defer function calls to make (in reverse
order). These low-cost defers are only used for functions where no defers
appear in loops. In addition, we don't do these low-cost defers if there are too
many defer statements or too many exits in a function (to limit code increase).
When a function uses open-coded defers, we produce extra
FUNCDATA_OpenCodedDeferInfo information that specifies the number of defers, and
for each defer, the stack slots where the closure and associated args have been
stored. The funcdata also includes the location of the deferBits variable.
Therefore, for panics, we can use this funcdata to determine exactly which defers
are active, and call the appropriate functions/methods/closures with the correct
arguments for each active defer.
In order to unwind the stack correctly after a recover(), we need to add an extra
code segment to functions with open-coded defers that simply calls deferreturn()
and returns. This segment is not reachable by the normal function, but is returned
to by the runtime during recovery. We set the liveness information of this
deferreturn() to be the same as the liveness at the first function call during the
last defer exit code (so all return values and all stack slots needed by the defer
calls will be live).
I needed to increase the stackguard constant from 880 to 896, because of a small
amount of new code in deferreturn().
The -N flag disables open-coded defers. '-d defer' prints out the kind of defer
being used at each defer statement (heap-allocated, stack-allocated, or
open-coded).
Cost of defer statement [ go test -run NONE -bench BenchmarkDefer$ runtime ]
With normal (stack-allocated) defers only: 35.4 ns/op
With open-coded defers: 5.6 ns/op
Cost of function call alone (remove defer keyword): 4.4 ns/op
Text size increase (including funcdata) for go binary without/with open-coded defers: 0.09%
The average size increase (including funcdata) for only the functions that use
open-coded defers is 1.1%.
The cost of a panic followed by a recover got noticeably slower, since panic
processing now requires a scan of the stack for open-coded defer frames. This scan
is required, even if no frames are using open-coded defers:
Cost of panic and recover [ go test -run NONE -bench BenchmarkPanicRecover runtime ]
Without open-coded defers: 62.0 ns/op
With open-coded defers: 255 ns/op
A CGO Go-to-C-to-Go benchmark got noticeably faster because of open-coded defers:
CGO Go-to-C-to-Go benchmark [cd misc/cgo/test; go test -run NONE -bench BenchmarkCGoCallback ]
Without open-coded defers: 443 ns/op
With open-coded defers: 347 ns/op
Updates #14939 (defer performance)
Updates #34481 (design doc)
Change-Id: I63b1a60d1ebf28126f55ee9fd7ecffe9cb23d1ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/202340
Reviewed-by: Austin Clements <austin@google.com>
2019-06-24 12:59:22 -07:00
|
|
|
Debug_defer int
|
2016-03-11 14:28:16 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Debug arguments.
|
|
|
|
|
// These can be specified with the -d flag, as in "-d nil"
|
2017-02-22 16:13:06 -05:00
|
|
|
// to set the debug_checknil variable.
|
|
|
|
|
// Multiple options can be comma-separated.
|
|
|
|
|
// Each option accepts an optional argument, as in "gcprog=2"
|
2016-03-11 14:28:16 -08:00
|
|
|
var debugtab = []struct {
|
|
|
|
|
name string
|
2017-04-19 19:24:27 +01:00
|
|
|
help string
|
2017-02-22 16:13:06 -05:00
|
|
|
val interface{} // must be *int or *string
|
2016-03-11 14:28:16 -08:00
|
|
|
}{
|
2017-04-19 19:24:27 +01:00
|
|
|
{"append", "print information about append compilation", &Debug_append},
|
2019-02-12 19:40:42 -08:00
|
|
|
{"checkptr", "instrument unsafe pointer conversions", &Debug_checkptr},
|
2017-04-19 19:24:27 +01:00
|
|
|
{"closure", "print information about closure compilation", &Debug_closure},
|
cmd/compile: don't update outer variables after capturevars is complete
When compiling concurrently, we walk all functions before compiling
any of them. Walking functions can cause variables to switch from
being non-addrtaken to addrtaken, e.g. to prepare for a runtime call.
Typechecking propagates addrtaken-ness of closure variables to
their outer variables, so that capturevars can decide whether to
pass the variable's value or a pointer to it.
When all functions are compiled immediately, as long as the containing
function is compiled prior to the closure, this propagation has no effect.
When compilation is deferred, though, in rare cases, this results in
a change in the addrtaken-ness of a variable in the outer function,
which in turn changes the compiler's output.
(This is rare because in a great many cases, a temporary has been
introduced, insulating the outer variable from modification.)
But concurrent compilation must generate identical results.
To fix this, track whether capturevars has run.
If it has, there is no need to update outer variables
when closure variables change.
Capturevars always runs before any functions are walked or compiled.
The remainder of the changes in this CL are to support the test.
In particular, -d=compilelater forces the compiler to walk all
functions before compiling any of them, despite being non-concurrent.
This is useful because -live is fundamentally incompatible with
concurrent compilation, but we want -c=1 to have no behavior changes.
Fixes #20250
Change-Id: I89bcb54268a41e8588af1ac8cc37fbef856a90c2
Reviewed-on: https://go-review.googlesource.com/42853
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2017-05-05 15:22:59 -07:00
|
|
|
{"compilelater", "compile functions as late as possible", &Debug_compilelater},
|
2017-04-19 19:24:27 +01:00
|
|
|
{"disablenil", "disable nil checks", &disable_checknil},
|
|
|
|
|
{"dclstack", "run internal dclstack check", &debug_dclstack},
|
2020-11-18 16:42:31 -08:00
|
|
|
{"dumpptrs", "show Node pointer values in Dump/dumplist output", &Debug_dumpptrs},
|
2017-04-19 19:24:27 +01:00
|
|
|
{"gcprog", "print dump of GC programs", &Debug_gcprog},
|
2019-10-18 17:39:39 -07:00
|
|
|
{"libfuzzer", "coverage instrumentation for libfuzzer", &Debug_libfuzzer},
|
2017-04-19 19:24:27 +01:00
|
|
|
{"nil", "print information about nil checks", &Debug_checknil},
|
|
|
|
|
{"panic", "do not hide any compiler panic", &Debug_panic},
|
|
|
|
|
{"slice", "print information about slice compilation", &Debug_slice},
|
|
|
|
|
{"typeassert", "print information about type assertion inlining", &Debug_typeassert},
|
|
|
|
|
{"wb", "print information about write barriers", &Debug_wb},
|
|
|
|
|
{"export", "print export data", &Debug_export},
|
|
|
|
|
{"pctab", "print named pc-value table", &Debug_pctab},
|
[dev.debug] cmd/compile: better DWARF with optimizations on
Debuggers use DWARF information to find local variables on the
stack and in registers. Prior to this CL, the DWARF information for
functions claimed that all variables were on the stack at all times.
That's incorrect when optimizations are enabled, and results in
debuggers showing data that is out of date or complete gibberish.
After this CL, the compiler is capable of representing variable
locations more accurately, and attempts to do so. Due to limitations of
the SSA backend, it's not possible to be completely correct.
There are a number of problems in the current design. One of the easier
to understand is that variable names currently must be attached to an
SSA value, but not all assignments in the source code actually result
in machine code. For example:
type myint int
var a int
b := myint(int)
and
b := (*uint64)(unsafe.Pointer(a))
don't generate machine code because the underlying representation is the
same, so the correct value of b will not be set when the user would
expect.
Generating the more precise debug information is behind a flag,
dwarflocationlists. Because of the issues described above, setting the
flag may not make the debugging experience much better, and may actually
make it worse in cases where the variable actually is on the stack and
the more complicated analysis doesn't realize it.
A number of changes are included:
- Add a new pseudo-instruction, RegKill, which indicates that the value
in the register has been clobbered.
- Adjust regalloc to emit RegKills in the right places. Significantly,
this means that phis are mixed with StoreReg and RegKills after
regalloc.
- Track variable decomposition in ssa.LocalSlots.
- After the SSA backend is done, analyze the result and build location
lists for each LocalSlot.
- After assembly is done, update the location lists with the assembled
PC offsets, recompose variables, and build DWARF location lists. Emit the
list as a new linker symbol, one per function.
- In the linker, aggregate the location lists into a .debug_loc section.
TODO:
- currently disabled for non-X86/AMD64 because there are no data tables.
go build -toolexec 'toolstash -cmp' -a std succeeds.
With -dwarflocationlists false:
before: f02812195637909ff675782c0b46836a8ff01976
after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec
benchstat -geomean /tmp/220352263 /tmp/621364410
completed 15 of 15, estimated time remaining 0s (eta 3:52PM)
name old time/op new time/op delta
Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14)
Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15)
GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14)
Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15)
GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15)
Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13)
Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15)
XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15)
[Geo mean] 206ms 377ms +82.86%
name old user-time/op new user-time/op delta
Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15)
Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14)
GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15)
Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15)
GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15)
Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13)
Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15)
XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15)
[Geo mean] 317ms 583ms +83.72%
name old alloc/op new alloc/op delta
Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15)
Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15)
GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14)
Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15)
GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15)
Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15)
Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15)
XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15)
[Geo mean] 42.1MB 75.0MB +78.05%
name old allocs/op new allocs/op delta
Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15)
Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14)
GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14)
Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15)
GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15)
Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15)
Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15)
XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15)
[Geo mean] 439k 755k +72.01%
name old text-bytes new text-bytes delta
HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15)
name old data-bytes new data-bytes delta
HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal)
name old bss-bytes new bss-bytes delta
HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal)
name old exe-bytes new exe-bytes delta
HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal)
Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8
Reviewed-on: https://go-review.googlesource.com/41770
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2017-07-21 18:30:19 -04:00
|
|
|
{"locationlists", "print information about DWARF location list creation", &Debug_locationlist},
|
2017-10-20 15:20:56 -07:00
|
|
|
{"typecheckinl", "eager typechecking of inline function bodies", &Debug_typecheckinl},
|
2017-10-06 11:32:28 -04:00
|
|
|
{"dwarfinl", "print information about DWARF inlined function creation", &Debug_gendwarfinl},
|
2017-11-10 18:08:48 +01:00
|
|
|
{"softfloat", "force compiler to emit soft-float code", &Debug_softfloat},
|
cmd/compile, cmd/link, runtime: make defers low-cost through inline code and extra funcdata
Generate inline code at defer time to save the args of defer calls to unique
(autotmp) stack slots, and generate inline code at exit time to check which defer
calls were made and make the associated function/method/interface calls. We
remember that a particular defer statement was reached by storing in the deferBits
variable (always stored on the stack). At exit time, we check the bits of the
deferBits variable to determine which defer function calls to make (in reverse
order). These low-cost defers are only used for functions where no defers
appear in loops. In addition, we don't do these low-cost defers if there are too
many defer statements or too many exits in a function (to limit code increase).
When a function uses open-coded defers, we produce extra
FUNCDATA_OpenCodedDeferInfo information that specifies the number of defers, and
for each defer, the stack slots where the closure and associated args have been
stored. The funcdata also includes the location of the deferBits variable.
Therefore, for panics, we can use this funcdata to determine exactly which defers
are active, and call the appropriate functions/methods/closures with the correct
arguments for each active defer.
In order to unwind the stack correctly after a recover(), we need to add an extra
code segment to functions with open-coded defers that simply calls deferreturn()
and returns. This segment is not reachable by the normal function, but is returned
to by the runtime during recovery. We set the liveness information of this
deferreturn() to be the same as the liveness at the first function call during the
last defer exit code (so all return values and all stack slots needed by the defer
calls will be live).
I needed to increase the stackguard constant from 880 to 896, because of a small
amount of new code in deferreturn().
The -N flag disables open-coded defers. '-d defer' prints out the kind of defer
being used at each defer statement (heap-allocated, stack-allocated, or
open-coded).
Cost of defer statement [ go test -run NONE -bench BenchmarkDefer$ runtime ]
With normal (stack-allocated) defers only: 35.4 ns/op
With open-coded defers: 5.6 ns/op
Cost of function call alone (remove defer keyword): 4.4 ns/op
Text size increase (including funcdata) for go binary without/with open-coded defers: 0.09%
The average size increase (including funcdata) for only the functions that use
open-coded defers is 1.1%.
The cost of a panic followed by a recover got noticeably slower, since panic
processing now requires a scan of the stack for open-coded defer frames. This scan
is required, even if no frames are using open-coded defers:
Cost of panic and recover [ go test -run NONE -bench BenchmarkPanicRecover runtime ]
Without open-coded defers: 62.0 ns/op
With open-coded defers: 255 ns/op
A CGO Go-to-C-to-Go benchmark got noticeably faster because of open-coded defers:
CGO Go-to-C-to-Go benchmark [cd misc/cgo/test; go test -run NONE -bench BenchmarkCGoCallback ]
Without open-coded defers: 443 ns/op
With open-coded defers: 347 ns/op
Updates #14939 (defer performance)
Updates #34481 (design doc)
Change-Id: I63b1a60d1ebf28126f55ee9fd7ecffe9cb23d1ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/202340
Reviewed-by: Austin Clements <austin@google.com>
2019-06-24 12:59:22 -07:00
|
|
|
{"defer", "print information about defer compilation", &Debug_defer},
|
cmd/compile: fix panic in field tracking logic
Within the frontend, we generally don't guarantee uniqueness of
anonymous types. For example, each struct type literal gets
represented by its own types.Type instance.
However, the field tracking code was using the struct type as a map
key. This broke in golang.org/cl/256457, because that CL started
changing the inlined parameter variables from using the types.Type of
the declared parameter to that of the call site argument. These are
always identical types (e.g., types.Identical would report true), but
they can be different pointer values, causing the map lookup to fail.
The easiest fix is to simply get rid of the map and instead use
Node.Opt for tracking the types.Field. To mitigate against more latent
field tracking failures (e.g., if any other code were to start trying
to use Opt on ODOT/ODOTPTR fields), we store this field
unconditionally. I also expect having the types.Field will be useful
to other frontend code in the future.
Finally, to make it easier to test field tracking without having to
run make.bash with GOEXPERIMENT=fieldtrack, this commit adds a
-d=fieldtrack flag as an alternative way to enable field tracking
within the compiler. See also #42681.
Fixes #42686.
Change-Id: I6923d206d5e2cab1e6798cba36cae96c1eeaea55
Reviewed-on: https://go-review.googlesource.com/c/go/+/271217
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: Matthew Dempsky <mdempsky@google.com>
2020-11-18 12:08:59 -08:00
|
|
|
{"fieldtrack", "enable fieldtracking", &objabi.Fieldtrack_enabled},
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2017-04-19 19:24:27 +01:00
|
|
|
const debugHelpHeader = `usage: -d arg[,arg]* and arg is <key>[=<value>]
|
|
|
|
|
|
|
|
|
|
<key> is one of:
|
|
|
|
|
|
|
|
|
|
`
|
|
|
|
|
|
|
|
|
|
const debugHelpFooter = `
|
|
|
|
|
<value> is key-specific.
|
|
|
|
|
|
2019-10-17 16:31:19 -07:00
|
|
|
Key "checkptr" supports values:
|
|
|
|
|
"0": instrumentation disabled
|
|
|
|
|
"1": conversions involving unsafe.Pointer are instrumented
|
|
|
|
|
"2": conversions to unsafe.Pointer force heap allocation
|
|
|
|
|
|
2017-04-19 19:24:27 +01:00
|
|
|
Key "pctab" supports values:
|
|
|
|
|
"pctospadj", "pctofile", "pctoline", "pctoinline", "pctopcdata"
|
|
|
|
|
`
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
func usage() {
|
2017-12-24 16:50:28 +00:00
|
|
|
fmt.Fprintf(os.Stderr, "usage: compile [options] file.go...\n")
|
|
|
|
|
objabi.Flagprint(os.Stderr)
|
2016-03-11 14:28:16 -08:00
|
|
|
Exit(2)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func hidePanic() {
|
|
|
|
|
if Debug_panic == 0 && nsavederrors+nerrors > 0 {
|
|
|
|
|
// If we've already complained about things
|
|
|
|
|
// in the program, don't bother complaining
|
|
|
|
|
// about a panic too; let the user clean up
|
|
|
|
|
// the code and try again.
|
|
|
|
|
if err := recover(); err != nil {
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-07 15:31:49 -04:00
|
|
|
// supportsDynlink reports whether or not the code generator for the given
|
|
|
|
|
// architecture supports the -shared and -dynlink flags.
|
|
|
|
|
func supportsDynlink(arch *sys.Arch) bool {
|
2020-11-04 00:26:15 +11:00
|
|
|
return arch.InFamily(sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.PPC64, sys.RISCV64, sys.S390X)
|
2016-04-07 15:31:49 -04:00
|
|
|
}
|
|
|
|
|
|
2016-06-24 15:03:04 -07:00
|
|
|
// timing data for compiler phases
|
|
|
|
|
var timings Timings
|
|
|
|
|
var benchfile string
|
|
|
|
|
|
cmd/compile: improve coverage of nowritebarrierrec check
The current go:nowritebarrierrec checker has two problems that limit
its coverage:
1. It doesn't understand that systemstack calls its argument, which
means there are several cases where we fail to detect prohibited write
barriers.
2. It only observes calls in the AST, so calls constructed during
lowering by SSA aren't followed.
This CL completely rewrites this checker to address these issues.
The current checker runs entirely after walk and uses visitBottomUp,
which introduces several problems for checking across systemstack.
First, visitBottomUp itself doesn't understand systemstack calls, so
the callee may be ordered after the caller, causing the checker to
fail to propagate constraints. Second, many systemstack calls are
passed a closure, which is quite difficult to resolve back to the
function definition after transformclosure and walk have run. Third,
visitBottomUp works exclusively on the AST, so it can't observe calls
created by SSA.
To address these problems, this commit splits the check into two
phases and rewrites it to use a call graph generated during SSA
lowering. The first phase runs before transformclosure/walk and simply
records systemstack arguments when they're easy to get. Then, it
modifies genssa to record static call edges at the point where we're
lowering to Progs (which is the latest point at which position
information is conveniently available). Finally, the second phase runs
after all functions have been lowered and uses a direct BFS walk of
the call graph (combining systemstack calls with static calls) to find
prohibited write barriers and construct nice error messages.
Fixes #22384.
For #22460.
Change-Id: I39668f7f2366ab3c1ab1a71eaf25484d25349540
Reviewed-on: https://go-review.googlesource.com/72773
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-10-22 16:36:27 -04:00
|
|
|
var nowritebarrierrecCheck *nowritebarrierrecChecker
|
|
|
|
|
|
2016-11-11 16:56:07 -08:00
|
|
|
// Main parses flags and Go source files specified in the command-line
|
|
|
|
|
// arguments, type-checks the parsed Go package, compiles functions to machine
|
|
|
|
|
// code, and finally writes the compiled package definition to disk.
|
2017-03-17 13:35:31 -07:00
|
|
|
func Main(archInit func(*Arch)) {
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "init")
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
defer hidePanic()
|
|
|
|
|
|
2017-03-17 13:35:36 -07:00
|
|
|
archInit(&thearch)
|
2017-03-17 13:35:31 -07:00
|
|
|
|
2017-03-17 13:35:36 -07:00
|
|
|
Ctxt = obj.Linknew(thearch.LinkArch)
|
2016-09-15 15:45:10 +10:00
|
|
|
Ctxt.DiagFunc = yyerror
|
2017-10-10 17:43:41 -07:00
|
|
|
Ctxt.DiagFlush = flusherrors
|
2016-09-13 14:05:14 -07:00
|
|
|
Ctxt.Bso = bufio.NewWriter(os.Stdout)
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2019-04-02 17:26:49 -04:00
|
|
|
// UseBASEntries is preferred because it shaves about 2% off build time, but LLDB, dsymutil, and dwarfdump
|
|
|
|
|
// on Darwin don't support it properly, especially since macOS 10.14 (Mojave). This is exposed as a flag
|
|
|
|
|
// to allow testing with LLVM tools on Linux, and to help with reporting this bug to the LLVM project.
|
|
|
|
|
// See bugs 31188 and 21945 (CLs 170638, 98075, 72371).
|
|
|
|
|
Ctxt.UseBASEntries = Ctxt.Headtype != objabi.Hdarwin
|
|
|
|
|
|
2017-04-19 10:27:19 -07:00
|
|
|
localpkg = types.NewPkg("", "")
|
2016-03-11 14:28:16 -08:00
|
|
|
localpkg.Prefix = "\"\""
|
|
|
|
|
|
2018-04-05 14:29:32 -07:00
|
|
|
// We won't know localpkg's height until after import
|
|
|
|
|
// processing. In the mean time, set to MaxPkgHeight to ensure
|
|
|
|
|
// height comparisons at least work until then.
|
|
|
|
|
localpkg.Height = types.MaxPkgHeight
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
// pseudo-package, for scoping
|
2017-04-19 10:27:19 -07:00
|
|
|
builtinpkg = types.NewPkg("go.builtin", "") // TODO(gri) name this package go.builtin?
|
|
|
|
|
builtinpkg.Prefix = "go.builtin" // not go%2ebuiltin
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
// pseudo-package, accessed by import "unsafe"
|
2017-04-19 10:27:19 -07:00
|
|
|
unsafepkg = types.NewPkg("unsafe", "unsafe")
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2017-02-28 15:51:29 -08:00
|
|
|
// Pseudo-package that contains the compiler's builtin
|
|
|
|
|
// declarations for package runtime. These are declared in a
|
|
|
|
|
// separate package to avoid conflicts with package runtime's
|
|
|
|
|
// actual declarations, which may differ intentionally but
|
|
|
|
|
// insignificantly.
|
2017-04-19 10:27:19 -07:00
|
|
|
Runtimepkg = types.NewPkg("go.runtime", "runtime")
|
2017-02-28 15:51:29 -08:00
|
|
|
Runtimepkg.Prefix = "runtime"
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
// pseudo-packages used in symbol tables
|
2017-04-19 10:27:19 -07:00
|
|
|
itabpkg = types.NewPkg("go.itab", "go.itab")
|
2016-03-11 14:28:16 -08:00
|
|
|
itabpkg.Prefix = "go.itab" // not go%2eitab
|
|
|
|
|
|
2017-04-19 10:27:19 -07:00
|
|
|
itablinkpkg = types.NewPkg("go.itablink", "go.itablink")
|
2016-03-17 06:18:13 -07:00
|
|
|
itablinkpkg.Prefix = "go.itablink" // not go%2eitablink
|
|
|
|
|
|
2017-04-19 10:27:19 -07:00
|
|
|
trackpkg = types.NewPkg("go.track", "go.track")
|
2016-03-11 14:28:16 -08:00
|
|
|
trackpkg.Prefix = "go.track" // not go%2etrack
|
|
|
|
|
|
2016-04-19 08:31:04 -07:00
|
|
|
// pseudo-package used for map zero values
|
2017-04-19 10:27:19 -07:00
|
|
|
mappkg = types.NewPkg("go.map", "go.map")
|
2016-04-19 08:31:04 -07:00
|
|
|
mappkg.Prefix = "go.map"
|
|
|
|
|
|
2018-04-04 18:42:39 -07:00
|
|
|
// pseudo-package used for methods with anonymous receivers
|
|
|
|
|
gopkg = types.NewPkg("go", "")
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
Wasm := objabi.GOARCH == "wasm"
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2019-06-05 14:53:28 -04:00
|
|
|
// Whether the limit for stack-allocated objects is much smaller than normal.
|
|
|
|
|
// This can be helpful for diagnosing certain causes of GC latency. See #27732.
|
|
|
|
|
smallFrames := false
|
cmd/compile: add framework for logging optimizer (non)actions to LSP
This is intended to allow IDEs to note where the optimizer
was not able to improve users' code. There may be other
applications for this, for example in studying effectiveness
of optimizer changes more quickly than running benchmarks,
or in verifying that code changes did not accidentally disable
optimizations in performance-critical code.
Logging of nilcheck (bad) for amd64 is implemented as
proof-of-concept. In general, the intent is that optimizations
that didn't happen are what will be logged, because that is
believed to be what IDE users want.
Added flag -json=version,dest
Check that version=0. (Future compilers will support a
few recent versions, I hope that version is always <=3.)
Dest is expected to be one of:
/path (or \path in Windows)
will create directory /path and fill it w/ json files
file://path
will create directory path, intended either for
I:\dont\know\enough\about\windows\paths
trustme_I_know_what_I_am_doing_probably_testing
Not passing an absolute path name usually leads to
json splattered all over source directories,
or failure when those directories are not writeable.
If you want a foot-gun, you have to ask for it.
The JSON output is directed to subdirectories of dest,
where each subdirectory is net/url.PathEscape of the
package name, and each for each foo.go in the package,
net/url.PathEscape(foo).json is created. The first line
of foo.json contains version and context information,
and subsequent lines contains LSP-conforming JSON
describing the missing optimizations.
Change-Id: Ib83176a53a8c177ee9081aefc5ae05604ccad8a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/204338
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-10-24 13:48:17 -04:00
|
|
|
jsonLogOpt := ""
|
2019-06-05 14:53:28 -04:00
|
|
|
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.BoolVar(&compiling_runtime, "+", false, "compiling runtime")
|
2017-06-06 13:23:59 -07:00
|
|
|
flag.BoolVar(&compiling_std, "std", false, "compiling standard library")
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.StringVar(&localimport, "D", "", "set relative `path` for local imports")
|
2020-10-19 11:31:10 +02:00
|
|
|
|
|
|
|
|
objabi.Flagcount("%", "debug non-static initializers", &Debug.P)
|
|
|
|
|
objabi.Flagcount("B", "disable bounds checking", &Debug.B)
|
|
|
|
|
objabi.Flagcount("C", "disable printing of columns in error messages", &Debug.C)
|
|
|
|
|
objabi.Flagcount("E", "debug symbol export", &Debug.E)
|
|
|
|
|
objabi.Flagcount("K", "debug missing line numbers", &Debug.K)
|
|
|
|
|
objabi.Flagcount("L", "show full file names in error messages", &Debug.L)
|
|
|
|
|
objabi.Flagcount("N", "disable optimizations", &Debug.N)
|
|
|
|
|
objabi.Flagcount("S", "print assembly listing", &Debug.S)
|
|
|
|
|
objabi.Flagcount("W", "debug parse tree after type checking", &Debug.W)
|
|
|
|
|
objabi.Flagcount("e", "no limit on number of errors reported", &Debug.e)
|
|
|
|
|
objabi.Flagcount("h", "halt on error", &Debug.h)
|
|
|
|
|
objabi.Flagcount("j", "debug runtime-initialized variables", &Debug.j)
|
|
|
|
|
objabi.Flagcount("l", "disable inlining", &Debug.l)
|
|
|
|
|
objabi.Flagcount("m", "print optimization decisions", &Debug.m)
|
|
|
|
|
objabi.Flagcount("r", "debug generated wrappers", &Debug.r)
|
|
|
|
|
objabi.Flagcount("w", "debug type checking", &Debug.w)
|
|
|
|
|
|
2017-04-18 12:53:25 -07:00
|
|
|
objabi.Flagfn1("I", "add `directory` to import search path", addidir)
|
2017-10-05 15:37:13 -04:00
|
|
|
objabi.AddVersionFlag() // -V
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.StringVar(&asmhdr, "asmhdr", "", "write assembly header to `file`")
|
|
|
|
|
flag.StringVar(&buildid, "buildid", "", "record `id` as the build id in the export metadata")
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
flag.IntVar(&nBackendWorkers, "c", 1, "concurrency during compilation, 1 means no concurrency")
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.BoolVar(&pure_go, "complete", false, "compiling complete package (no C or assembly)")
|
2017-04-19 19:24:27 +01:00
|
|
|
flag.StringVar(&debugstr, "d", "", "print debug information about items in `list`; try -d help")
|
2018-03-29 00:55:53 +02:00
|
|
|
flag.BoolVar(&flagDWARF, "dwarf", !Wasm, "generate DWARF symbols")
|
2018-03-14 16:54:33 -04:00
|
|
|
flag.BoolVar(&Ctxt.Flag_locationlists, "dwarflocationlists", true, "add location lists to DWARF in optimized mode")
|
2017-10-06 11:32:28 -04:00
|
|
|
flag.IntVar(&genDwarfInline, "gendwarfinl", 2, "generate DWARF inline info records")
|
2020-07-19 00:32:02 -04:00
|
|
|
objabi.Flagfn1("embedcfg", "read go:embed configuration from `file`", readEmbedCfg)
|
2017-04-18 12:53:25 -07:00
|
|
|
objabi.Flagfn1("importmap", "add `definition` of the form source=actual to import map", addImportMap)
|
2017-05-31 11:19:54 -04:00
|
|
|
objabi.Flagfn1("importcfg", "read import configuration from `file`", readImportCfg)
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.StringVar(&flag_installsuffix, "installsuffix", "", "set pkg directory `suffix`")
|
2018-11-13 15:36:02 -08:00
|
|
|
flag.StringVar(&flag_lang, "lang", "", "release to compile for")
|
2016-04-26 21:50:59 -04:00
|
|
|
flag.StringVar(&linkobj, "linkobj", "", "write linker-specific object to `file`")
|
2017-04-18 12:53:25 -07:00
|
|
|
objabi.Flagcount("live", "debug liveness analysis", &debuglive)
|
2018-06-29 21:14:47 -07:00
|
|
|
if sys.MSanSupported(objabi.GOOS, objabi.GOARCH) {
|
|
|
|
|
flag.BoolVar(&flag_msan, "msan", false, "build code compatible with C/C++ memory sanitizer")
|
|
|
|
|
}
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.BoolVar(&nolocalimports, "nolocalimports", false, "reject local (relative) imports")
|
|
|
|
|
flag.StringVar(&outfile, "o", "", "write output to `file`")
|
|
|
|
|
flag.StringVar(&myimportpath, "p", "", "set expected package import `path`")
|
2018-03-22 13:51:13 -07:00
|
|
|
flag.BoolVar(&writearchive, "pack", false, "write to file.a instead of file.o")
|
2018-06-29 21:14:47 -07:00
|
|
|
if sys.RaceDetectorSupported(objabi.GOOS, objabi.GOARCH) {
|
|
|
|
|
flag.BoolVar(&flag_race, "race", false, "enable race detector")
|
|
|
|
|
}
|
2020-01-06 10:31:39 -05:00
|
|
|
flag.StringVar(&spectre, "spectre", spectre, "enable spectre mitigations in `list` (all, index, ret)")
|
2018-11-01 13:46:50 -07:00
|
|
|
if enableTrace {
|
|
|
|
|
flag.BoolVar(&trace, "t", false, "trace type-checking")
|
|
|
|
|
}
|
2016-12-09 17:15:05 -08:00
|
|
|
flag.StringVar(&pathPrefix, "trimpath", "", "remove `prefix` from recorded source file paths")
|
2017-03-20 15:01:20 -07:00
|
|
|
flag.BoolVar(&Debug_vlog, "v", false, "increase debug verbosity")
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.BoolVar(&use_writebarrier, "wb", true, "enable write barrier")
|
|
|
|
|
var flag_shared bool
|
2016-03-11 14:28:16 -08:00
|
|
|
var flag_dynlink bool
|
2017-03-17 13:35:36 -07:00
|
|
|
if supportsDynlink(thearch.LinkArch.Arch) {
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.BoolVar(&flag_shared, "shared", false, "generate code that can be linked into a shared library")
|
2016-04-07 15:31:49 -04:00
|
|
|
flag.BoolVar(&flag_dynlink, "dynlink", false, "support references to Go symbols defined in other shared libraries")
|
[dev.link] cmd: reference symbols by name when linking against Go shared library
When building a program that links against Go shared libraries,
it needs to reference symbols defined in the shared library. At
compile time, we don't know where the shared library boundary is.
If we reference a symbol in package p by index, and package p is
actually part of a shared library, we cannot resolve the index at
link time, as the linker doesn't see the object file of p.
So when linking against Go shared libraries, always use named
reference for now.
To do this, the compiler needs to know whether we will be linking
against Go shared libraries. The -dynlink flag kind of indicates
that (as the document says), but currently it is actually
overloaded: it is also used when building a plugin or a shared
library, which is self-contained (if -linkshared is not otherwise
specified) and could use index for symbol reference. So we
introduce another compiler flag, -linkshared, specifically for
linking against Go shared libraries. The go command will pass
this flag if its -linkshared flag is specified
("go build -linkshared").
There may be better way to handle this. For example, we can
put the symbol indices in a special section in the shared library
that the linker can read. Or we can generate some per-package
description file to include the indices. (Currently we generate
a .shlibname file for each package that is included in a shared
library, which contains the path of the library. We could
consider extending this.) That said, this CL is a stop-gap
solution. And it is no worse than the old object files.
If we were to redesign the build system so that the shared
library boundary is known at compile time, we could use indices
for symbol references that do not cross shared library boundary,
as well as doing other things better.
Change-Id: I9c02aad36518051cc4785dbe25c4b4cef8f3faeb
Reviewed-on: https://go-review.googlesource.com/c/go/+/201818
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
2019-10-16 21:42:18 -04:00
|
|
|
flag.BoolVar(&Ctxt.Flag_linkshared, "linkshared", false, "generate code that will be linked against Go shared libraries")
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2016-04-13 18:37:18 -07:00
|
|
|
flag.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to `file`")
|
|
|
|
|
flag.StringVar(&memprofile, "memprofile", "", "write memory profile to `file`")
|
|
|
|
|
flag.Int64Var(&memprofilerate, "memprofilerate", 0, "set runtime.MemProfileRate to `rate`")
|
2017-05-04 13:58:27 -04:00
|
|
|
var goversion string
|
|
|
|
|
flag.StringVar(&goversion, "goversion", "", "required version of the runtime")
|
2018-10-22 10:10:23 -04:00
|
|
|
var symabisPath string
|
|
|
|
|
flag.StringVar(&symabisPath, "symabis", "", "read symbol ABIs from `file`")
|
2016-08-01 20:34:12 +10:00
|
|
|
flag.StringVar(&traceprofile, "traceprofile", "", "write an execution trace to `file`")
|
2017-04-02 18:37:04 -07:00
|
|
|
flag.StringVar(&blockprofile, "blockprofile", "", "write block profile to `file`")
|
2017-04-05 08:01:33 -07:00
|
|
|
flag.StringVar(&mutexprofile, "mutexprofile", "", "write mutex profile to `file`")
|
2016-06-24 15:03:04 -07:00
|
|
|
flag.StringVar(&benchfile, "bench", "", "append benchmark times to `file`")
|
2019-06-05 14:53:28 -04:00
|
|
|
flag.BoolVar(&smallFrames, "smallframes", false, "reduce the size limit for stack allocated objects")
|
2019-04-02 17:26:49 -04:00
|
|
|
flag.BoolVar(&Ctxt.UseBASEntries, "dwarfbasentries", Ctxt.UseBASEntries, "use base address selection entries in DWARF")
|
cmd/compile: add framework for logging optimizer (non)actions to LSP
This is intended to allow IDEs to note where the optimizer
was not able to improve users' code. There may be other
applications for this, for example in studying effectiveness
of optimizer changes more quickly than running benchmarks,
or in verifying that code changes did not accidentally disable
optimizations in performance-critical code.
Logging of nilcheck (bad) for amd64 is implemented as
proof-of-concept. In general, the intent is that optimizations
that didn't happen are what will be logged, because that is
believed to be what IDE users want.
Added flag -json=version,dest
Check that version=0. (Future compilers will support a
few recent versions, I hope that version is always <=3.)
Dest is expected to be one of:
/path (or \path in Windows)
will create directory /path and fill it w/ json files
file://path
will create directory path, intended either for
I:\dont\know\enough\about\windows\paths
trustme_I_know_what_I_am_doing_probably_testing
Not passing an absolute path name usually leads to
json splattered all over source directories,
or failure when those directories are not writeable.
If you want a foot-gun, you have to ask for it.
The JSON output is directed to subdirectories of dest,
where each subdirectory is net/url.PathEscape of the
package name, and each for each foo.go in the package,
net/url.PathEscape(foo).json is created. The first line
of foo.json contains version and context information,
and subsequent lines contains LSP-conforming JSON
describing the missing optimizations.
Change-Id: Ib83176a53a8c177ee9081aefc5ae05604ccad8a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/204338
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-10-24 13:48:17 -04:00
|
|
|
flag.StringVar(&jsonLogOpt, "json", "", "version,destination for JSON compiler/optimizer logging")
|
[dev.link] cmd/compile, cmd/asm: assign index to symbols
We are planning to use indices for symbol references, instead of
symbol names. Here we assign indices to symbols defined in the
package being compiled, and propagate the indices to the
dependent packages in the export data.
A symbol is referenced by a tuple, (package index, symbol index).
Normally, for a given symbol, this index is unique, and the
symbol index is globally consistent (but with exceptions, see
below). The package index is local to a compilation. For example,
when compiling the fmt package, fmt.Println gets assigned index
25, then all packages that reference fmt.Println will refer it
as (X, 25) with some X. X is the index for the fmt package, which
may differ in different compilations.
There are some symbols that do not have clear package affiliation,
such as dupOK symbols and linknamed symbols. We cannot give them
globally consistent indices. We categorize them as non-package
symbols, assign them with package index 1 and a symbol index that
is only meaningful locally.
Currently nothing will consume the indices.
All this is behind a flag, -newobj. The flag needs to be set for
all builds (-gcflags=all=-newobj -asmflags=all=-newobj), or none.
Change-Id: I18e489c531e9a9fbc668519af92c6116b7308cab
Reviewed-on: https://go-review.googlesource.com/c/go/+/196029
Reviewed-by: Than McIntosh <thanm@google.com>
2019-09-11 16:17:01 -04:00
|
|
|
|
2017-04-18 12:53:25 -07:00
|
|
|
objabi.Flagparse(usage)
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2020-07-28 13:49:13 -04:00
|
|
|
Ctxt.Pkgpath = myimportpath
|
|
|
|
|
|
2020-01-06 10:31:39 -05:00
|
|
|
for _, f := range strings.Split(spectre, ",") {
|
|
|
|
|
f = strings.TrimSpace(f)
|
|
|
|
|
switch f {
|
|
|
|
|
default:
|
|
|
|
|
log.Fatalf("unknown setting -spectre=%s", f)
|
|
|
|
|
case "":
|
|
|
|
|
// nothing
|
|
|
|
|
case "all":
|
|
|
|
|
spectreIndex = true
|
2020-01-17 13:54:30 -05:00
|
|
|
Ctxt.Retpoline = true
|
2020-01-06 10:31:39 -05:00
|
|
|
case "index":
|
|
|
|
|
spectreIndex = true
|
2020-01-17 13:54:30 -05:00
|
|
|
case "ret":
|
|
|
|
|
Ctxt.Retpoline = true
|
2020-01-06 10:31:39 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if spectreIndex {
|
|
|
|
|
switch objabi.GOARCH {
|
|
|
|
|
case "amd64":
|
|
|
|
|
// ok
|
|
|
|
|
default:
|
|
|
|
|
log.Fatalf("GOARCH=%s does not support -spectre=index", objabi.GOARCH)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-17 17:09:54 -04:00
|
|
|
// Record flags that affect the build result. (And don't
|
|
|
|
|
// record flags that don't, since that would cause spurious
|
|
|
|
|
// changes in the binary.)
|
2020-05-01 19:13:30 -04:00
|
|
|
recordFlags("B", "N", "l", "msan", "race", "shared", "dynlink", "dwarflocationlists", "dwarfbasentries", "smallframes", "spectre")
|
2019-06-05 14:53:28 -04:00
|
|
|
|
|
|
|
|
if smallFrames {
|
|
|
|
|
maxStackVarSize = 128 * 1024
|
|
|
|
|
maxImplicitStackVarSize = 16 * 1024
|
|
|
|
|
}
|
2017-10-17 17:09:54 -04:00
|
|
|
|
2016-04-13 18:41:59 -07:00
|
|
|
Ctxt.Flag_shared = flag_dynlink || flag_shared
|
2016-03-11 14:28:16 -08:00
|
|
|
Ctxt.Flag_dynlink = flag_dynlink
|
2020-10-19 11:31:10 +02:00
|
|
|
Ctxt.Flag_optimize = Debug.N == 0
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
Ctxt.Debugasm = Debug.S
|
2017-03-20 15:01:20 -07:00
|
|
|
Ctxt.Debugvlog = Debug_vlog
|
2017-04-16 05:53:06 -07:00
|
|
|
if flagDWARF {
|
|
|
|
|
Ctxt.DebugInfo = debuginfo
|
2017-10-06 11:32:28 -04:00
|
|
|
Ctxt.GenAbstractFunc = genAbstractFunc
|
|
|
|
|
Ctxt.DwFixups = obj.NewDwarfFixupTable(Ctxt)
|
|
|
|
|
} else {
|
|
|
|
|
// turn off inline generation if no dwarf at all
|
|
|
|
|
genDwarfInline = 0
|
2018-03-29 00:55:53 +02:00
|
|
|
Ctxt.Flag_locationlists = false
|
2017-04-16 05:53:06 -07:00
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2017-04-19 19:24:27 +01:00
|
|
|
if flag.NArg() < 1 && debugstr != "help" && debugstr != "ssa/help" {
|
2016-03-11 14:28:16 -08:00
|
|
|
usage()
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-04 13:58:27 -04:00
|
|
|
if goversion != "" && goversion != runtime.Version() {
|
|
|
|
|
fmt.Printf("compile: version %q does not match go tool version %q\n", runtime.Version(), goversion)
|
|
|
|
|
Exit(2)
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-24 15:49:32 -07:00
|
|
|
checkLang()
|
|
|
|
|
|
2018-10-22 10:10:23 -04:00
|
|
|
if symabisPath != "" {
|
|
|
|
|
readSymABIs(symabisPath, myimportpath)
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-06 13:42:31 -07:00
|
|
|
thearch.LinkArch.Init(Ctxt)
|
|
|
|
|
|
2017-01-11 15:20:38 -08:00
|
|
|
if outfile == "" {
|
|
|
|
|
p := flag.Arg(0)
|
|
|
|
|
if i := strings.LastIndex(p, "/"); i >= 0 {
|
|
|
|
|
p = p[i+1:]
|
|
|
|
|
}
|
|
|
|
|
if runtime.GOOS == "windows" {
|
|
|
|
|
if i := strings.LastIndex(p, `\`); i >= 0 {
|
|
|
|
|
p = p[i+1:]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if i := strings.LastIndex(p, "."); i >= 0 {
|
|
|
|
|
p = p[:i]
|
|
|
|
|
}
|
|
|
|
|
suffix := ".o"
|
|
|
|
|
if writearchive {
|
|
|
|
|
suffix = ".a"
|
|
|
|
|
}
|
|
|
|
|
outfile = p + suffix
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
startProfile()
|
|
|
|
|
|
2018-03-27 15:35:51 -07:00
|
|
|
if flag_race && flag_msan {
|
|
|
|
|
log.Fatal("cannot use both -race and -msan")
|
|
|
|
|
}
|
2020-04-04 18:36:58 +11:00
|
|
|
if flag_race || flag_msan {
|
|
|
|
|
// -race and -msan imply -d=checkptr for now.
|
2019-10-17 13:24:34 -07:00
|
|
|
Debug_checkptr = 1
|
|
|
|
|
}
|
2018-03-27 15:35:51 -07:00
|
|
|
if ispkgin(omit_pkgs) {
|
|
|
|
|
flag_race = false
|
|
|
|
|
flag_msan = false
|
|
|
|
|
}
|
2016-04-13 18:37:18 -07:00
|
|
|
if flag_race {
|
2018-04-17 14:56:29 -07:00
|
|
|
racepkg = types.NewPkg("runtime/race", "")
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2016-04-13 18:37:18 -07:00
|
|
|
if flag_msan {
|
2018-04-17 14:56:29 -07:00
|
|
|
msanpkg = types.NewPkg("runtime/msan", "")
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2018-03-27 15:35:51 -07:00
|
|
|
if flag_race || flag_msan {
|
2016-03-11 14:28:16 -08:00
|
|
|
instrumenting = true
|
|
|
|
|
}
|
2018-03-27 15:35:51 -07:00
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
if compiling_runtime && Debug.N != 0 {
|
2017-03-20 09:56:50 -07:00
|
|
|
log.Fatal("cannot disable optimizations while compiling runtime")
|
|
|
|
|
}
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
if nBackendWorkers < 1 {
|
|
|
|
|
log.Fatalf("-c must be at least 1, got %d", nBackendWorkers)
|
|
|
|
|
}
|
|
|
|
|
if nBackendWorkers > 1 && !concurrentBackendAllowed() {
|
|
|
|
|
log.Fatalf("cannot use concurrent backend compilation with provided flags; invoked as %v", os.Args)
|
|
|
|
|
}
|
[dev.debug] cmd/compile: better DWARF with optimizations on
Debuggers use DWARF information to find local variables on the
stack and in registers. Prior to this CL, the DWARF information for
functions claimed that all variables were on the stack at all times.
That's incorrect when optimizations are enabled, and results in
debuggers showing data that is out of date or complete gibberish.
After this CL, the compiler is capable of representing variable
locations more accurately, and attempts to do so. Due to limitations of
the SSA backend, it's not possible to be completely correct.
There are a number of problems in the current design. One of the easier
to understand is that variable names currently must be attached to an
SSA value, but not all assignments in the source code actually result
in machine code. For example:
type myint int
var a int
b := myint(int)
and
b := (*uint64)(unsafe.Pointer(a))
don't generate machine code because the underlying representation is the
same, so the correct value of b will not be set when the user would
expect.
Generating the more precise debug information is behind a flag,
dwarflocationlists. Because of the issues described above, setting the
flag may not make the debugging experience much better, and may actually
make it worse in cases where the variable actually is on the stack and
the more complicated analysis doesn't realize it.
A number of changes are included:
- Add a new pseudo-instruction, RegKill, which indicates that the value
in the register has been clobbered.
- Adjust regalloc to emit RegKills in the right places. Significantly,
this means that phis are mixed with StoreReg and RegKills after
regalloc.
- Track variable decomposition in ssa.LocalSlots.
- After the SSA backend is done, analyze the result and build location
lists for each LocalSlot.
- After assembly is done, update the location lists with the assembled
PC offsets, recompose variables, and build DWARF location lists. Emit the
list as a new linker symbol, one per function.
- In the linker, aggregate the location lists into a .debug_loc section.
TODO:
- currently disabled for non-X86/AMD64 because there are no data tables.
go build -toolexec 'toolstash -cmp' -a std succeeds.
With -dwarflocationlists false:
before: f02812195637909ff675782c0b46836a8ff01976
after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec
benchstat -geomean /tmp/220352263 /tmp/621364410
completed 15 of 15, estimated time remaining 0s (eta 3:52PM)
name old time/op new time/op delta
Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14)
Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15)
GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14)
Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15)
GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15)
Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13)
Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15)
XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15)
[Geo mean] 206ms 377ms +82.86%
name old user-time/op new user-time/op delta
Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15)
Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14)
GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15)
Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15)
GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15)
Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13)
Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15)
XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15)
[Geo mean] 317ms 583ms +83.72%
name old alloc/op new alloc/op delta
Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15)
Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15)
GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14)
Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15)
GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15)
Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15)
Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15)
XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15)
[Geo mean] 42.1MB 75.0MB +78.05%
name old allocs/op new allocs/op delta
Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15)
Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14)
GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14)
Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15)
GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15)
Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15)
Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15)
XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15)
[Geo mean] 439k 755k +72.01%
name old text-bytes new text-bytes delta
HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15)
name old data-bytes new data-bytes delta
HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal)
name old bss-bytes new bss-bytes delta
HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal)
name old exe-bytes new exe-bytes delta
HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal)
Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8
Reviewed-on: https://go-review.googlesource.com/41770
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2017-07-21 18:30:19 -04:00
|
|
|
if Ctxt.Flag_locationlists && len(Ctxt.Arch.DWARFRegisters) == 0 {
|
|
|
|
|
log.Fatalf("location lists requested but register mapping not available on %v", Ctxt.Arch.Name)
|
|
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
// parse -d argument
|
|
|
|
|
if debugstr != "" {
|
|
|
|
|
Split:
|
|
|
|
|
for _, name := range strings.Split(debugstr, ",") {
|
|
|
|
|
if name == "" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2017-04-19 19:24:27 +01:00
|
|
|
// display help about the -d option itself and quit
|
|
|
|
|
if name == "help" {
|
2018-06-11 09:11:29 +01:00
|
|
|
fmt.Print(debugHelpHeader)
|
2017-04-19 19:24:27 +01:00
|
|
|
maxLen := len("ssa/help")
|
|
|
|
|
for _, t := range debugtab {
|
|
|
|
|
if len(t.name) > maxLen {
|
|
|
|
|
maxLen = len(t.name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for _, t := range debugtab {
|
|
|
|
|
fmt.Printf("\t%-*s\t%s\n", maxLen, t.name, t.help)
|
|
|
|
|
}
|
|
|
|
|
// ssa options have their own help
|
|
|
|
|
fmt.Printf("\t%-*s\t%s\n", maxLen, "ssa/help", "print help about SSA debugging")
|
2018-06-11 09:11:29 +01:00
|
|
|
fmt.Print(debugHelpFooter)
|
2017-04-19 19:24:27 +01:00
|
|
|
os.Exit(0)
|
|
|
|
|
}
|
2017-02-22 16:13:06 -05:00
|
|
|
val, valstring, haveInt := 1, "", true
|
|
|
|
|
if i := strings.IndexAny(name, "=:"); i >= 0 {
|
2016-03-11 14:28:16 -08:00
|
|
|
var err error
|
2017-02-22 16:13:06 -05:00
|
|
|
name, valstring = name[:i], name[i+1:]
|
|
|
|
|
val, err = strconv.Atoi(valstring)
|
2016-03-11 14:28:16 -08:00
|
|
|
if err != nil {
|
2017-02-22 16:13:06 -05:00
|
|
|
val, haveInt = 1, false
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for _, t := range debugtab {
|
2017-02-22 16:13:06 -05:00
|
|
|
if t.name != name {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
switch vp := t.val.(type) {
|
|
|
|
|
case nil:
|
|
|
|
|
// Ignore
|
|
|
|
|
case *string:
|
|
|
|
|
*vp = valstring
|
|
|
|
|
case *int:
|
|
|
|
|
if !haveInt {
|
|
|
|
|
log.Fatalf("invalid debug value %v", name)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2017-02-22 16:13:06 -05:00
|
|
|
*vp = val
|
|
|
|
|
default:
|
|
|
|
|
panic("bad debugtab type")
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2017-02-22 16:13:06 -05:00
|
|
|
continue Split
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
// special case for ssa for now
|
|
|
|
|
if strings.HasPrefix(name, "ssa/") {
|
|
|
|
|
// expect form ssa/phase/flag
|
|
|
|
|
// e.g. -d=ssa/generic_cse/time
|
|
|
|
|
// _ in phase name also matches space
|
|
|
|
|
phase := name[4:]
|
|
|
|
|
flag := "debug" // default flag is debug
|
2017-10-05 15:49:32 +02:00
|
|
|
if i := strings.Index(phase, "/"); i >= 0 {
|
2016-03-11 14:28:16 -08:00
|
|
|
flag = phase[i+1:]
|
|
|
|
|
phase = phase[:i]
|
|
|
|
|
}
|
2016-05-11 15:25:17 -04:00
|
|
|
err := ssa.PhaseOption(phase, flag, val, valstring)
|
2016-03-11 14:28:16 -08:00
|
|
|
if err != "" {
|
|
|
|
|
log.Fatalf(err)
|
|
|
|
|
}
|
|
|
|
|
continue Split
|
|
|
|
|
}
|
|
|
|
|
log.Fatalf("unknown debug key -d %s\n", name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-12 19:40:42 -08:00
|
|
|
if compiling_runtime {
|
2019-10-18 17:39:39 -07:00
|
|
|
// Runtime can't use -d=checkptr, at least not yet.
|
2019-02-12 19:40:42 -08:00
|
|
|
Debug_checkptr = 0
|
2019-10-18 17:39:39 -07:00
|
|
|
|
|
|
|
|
// Fuzzing the runtime isn't interesting either.
|
|
|
|
|
Debug_libfuzzer = 0
|
2019-02-12 19:40:42 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-17 16:55:40 -05:00
|
|
|
// set via a -d flag
|
|
|
|
|
Ctxt.Debugpcln = Debug_pctab
|
2017-10-06 11:32:28 -04:00
|
|
|
if flagDWARF {
|
|
|
|
|
dwarf.EnableLogging(Debug_gendwarfinl != 0)
|
|
|
|
|
}
|
2017-02-17 16:55:40 -05:00
|
|
|
|
2017-11-10 18:08:48 +01:00
|
|
|
if Debug_softfloat != 0 {
|
|
|
|
|
thearch.SoftFloat = true
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
// enable inlining. for now:
|
2020-10-19 11:31:10 +02:00
|
|
|
// default: inlining on. (Debug.l == 1)
|
|
|
|
|
// -l: inlining off (Debug.l == 0)
|
|
|
|
|
// -l=2, -l=3: inlining on again, with extra debugging (Debug.l > 1)
|
|
|
|
|
if Debug.l <= 1 {
|
|
|
|
|
Debug.l = 1 - Debug.l
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
cmd/compile: add framework for logging optimizer (non)actions to LSP
This is intended to allow IDEs to note where the optimizer
was not able to improve users' code. There may be other
applications for this, for example in studying effectiveness
of optimizer changes more quickly than running benchmarks,
or in verifying that code changes did not accidentally disable
optimizations in performance-critical code.
Logging of nilcheck (bad) for amd64 is implemented as
proof-of-concept. In general, the intent is that optimizations
that didn't happen are what will be logged, because that is
believed to be what IDE users want.
Added flag -json=version,dest
Check that version=0. (Future compilers will support a
few recent versions, I hope that version is always <=3.)
Dest is expected to be one of:
/path (or \path in Windows)
will create directory /path and fill it w/ json files
file://path
will create directory path, intended either for
I:\dont\know\enough\about\windows\paths
trustme_I_know_what_I_am_doing_probably_testing
Not passing an absolute path name usually leads to
json splattered all over source directories,
or failure when those directories are not writeable.
If you want a foot-gun, you have to ask for it.
The JSON output is directed to subdirectories of dest,
where each subdirectory is net/url.PathEscape of the
package name, and each for each foo.go in the package,
net/url.PathEscape(foo).json is created. The first line
of foo.json contains version and context information,
and subsequent lines contains LSP-conforming JSON
describing the missing optimizations.
Change-Id: Ib83176a53a8c177ee9081aefc5ae05604ccad8a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/204338
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-10-24 13:48:17 -04:00
|
|
|
if jsonLogOpt != "" { // parse version,destination from json logging optimization.
|
|
|
|
|
logopt.LogJsonOption(jsonLogOpt)
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-24 13:04:35 +03:00
|
|
|
ssaDump = os.Getenv("GOSSAFUNC")
|
2020-08-31 14:29:58 -04:00
|
|
|
ssaDir = os.Getenv("GOSSADIR")
|
2018-10-16 13:03:35 +03:00
|
|
|
if ssaDump != "" {
|
|
|
|
|
if strings.HasSuffix(ssaDump, "+") {
|
|
|
|
|
ssaDump = ssaDump[:len(ssaDump)-1]
|
|
|
|
|
ssaDumpStdout = true
|
|
|
|
|
}
|
|
|
|
|
spl := strings.Split(ssaDump, ":")
|
|
|
|
|
if len(spl) > 1 {
|
|
|
|
|
ssaDump = spl[0]
|
|
|
|
|
ssaDumpCFG = spl[1]
|
|
|
|
|
}
|
2018-07-24 10:39:00 +03:00
|
|
|
}
|
2018-07-24 13:04:35 +03:00
|
|
|
|
2018-03-16 15:15:50 -04:00
|
|
|
trackScopes = flagDWARF
|
2017-05-02 16:46:01 +02:00
|
|
|
|
2017-03-17 13:35:36 -07:00
|
|
|
Widthptr = thearch.LinkArch.PtrSize
|
|
|
|
|
Widthreg = thearch.LinkArch.RegSize
|
2016-03-11 14:28:16 -08:00
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
// initialize types package
|
|
|
|
|
// (we need to do this to break dependencies that otherwise
|
|
|
|
|
// would lead to import cycles)
|
|
|
|
|
types.Widthptr = Widthptr
|
|
|
|
|
types.Dowidth = dowidth
|
|
|
|
|
types.Fatalf = Fatalf
|
|
|
|
|
types.Sconv = func(s *types.Sym, flag, mode int) string {
|
|
|
|
|
return sconv(s, FmtFlag(flag), fmtMode(mode))
|
|
|
|
|
}
|
2020-01-09 14:58:18 -08:00
|
|
|
types.Tconv = func(t *types.Type, flag, mode int) string {
|
|
|
|
|
return tconv(t, FmtFlag(flag), fmtMode(mode))
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
}
|
|
|
|
|
types.FormatSym = func(sym *types.Sym, s fmt.State, verb rune, mode int) {
|
|
|
|
|
symFormat(sym, s, verb, fmtMode(mode))
|
|
|
|
|
}
|
|
|
|
|
types.FormatType = func(t *types.Type, s fmt.State, verb rune, mode int) {
|
|
|
|
|
typeFormat(t, s, verb, fmtMode(mode))
|
|
|
|
|
}
|
|
|
|
|
types.TypeLinkSym = func(t *types.Type) *obj.LSym {
|
2017-04-21 07:51:41 -07:00
|
|
|
return typenamesym(t).Linksym()
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
}
|
|
|
|
|
types.FmtLeft = int(FmtLeft)
|
|
|
|
|
types.FmtUnsigned = int(FmtUnsigned)
|
2019-12-02 09:38:30 +08:00
|
|
|
types.FErr = int(FErr)
|
2017-04-21 07:51:41 -07:00
|
|
|
types.Ctxt = Ctxt
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
|
2016-03-11 15:22:21 -08:00
|
|
|
initUniverse()
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
dclcontext = PEXTERN
|
|
|
|
|
nerrors = 0
|
|
|
|
|
|
2017-03-28 13:52:14 -07:00
|
|
|
autogeneratedPos = makePos(src.NewFileBase("<autogenerated>", "<autogenerated>"), 1, 0)
|
|
|
|
|
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "loadsys")
|
2016-03-11 14:28:16 -08:00
|
|
|
loadsys()
|
|
|
|
|
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "parse")
|
2017-01-11 15:31:48 -08:00
|
|
|
lines := parseFiles(flag.Args())
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Stop()
|
2016-12-09 17:15:05 -08:00
|
|
|
timings.AddEvent(int64(lines), "lines")
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2016-03-11 15:22:21 -08:00
|
|
|
finishUniverse()
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2019-02-25 13:56:18 +01:00
|
|
|
recordPackageName()
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
typecheckok = true
|
|
|
|
|
|
|
|
|
|
// Process top-level declarations in phases.
|
|
|
|
|
|
2018-10-29 12:35:59 -07:00
|
|
|
// Phase 1: const, type, and names and types of funcs.
|
2016-03-11 14:28:16 -08:00
|
|
|
// This will gather all the information about types
|
|
|
|
|
// and methods but doesn't depend on any of it.
|
2018-11-02 23:28:26 -07:00
|
|
|
//
|
|
|
|
|
// We also defer type alias declarations until phase 2
|
|
|
|
|
// to avoid cycles like #18640.
|
|
|
|
|
// TODO(gri) Remove this again once we have a fix for #25838.
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
// Don't use range--typecheck can add closures to xtop.
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "typecheck", "top1")
|
2016-03-11 14:28:16 -08:00
|
|
|
for i := 0; i < len(xtop); i++ {
|
2016-12-29 15:49:01 -08:00
|
|
|
n := xtop[i]
|
2020-07-19 00:32:02 -04:00
|
|
|
if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias()) {
|
2018-11-18 08:34:38 -08:00
|
|
|
xtop[i] = typecheck(n, ctxStmt)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-29 12:35:59 -07:00
|
|
|
// Phase 2: Variable assignments.
|
2018-05-29 18:25:18 +02:00
|
|
|
// To check interface assignments, depends on phase 1.
|
|
|
|
|
|
|
|
|
|
// Don't use range--typecheck can add closures to xtop.
|
2018-10-29 12:35:59 -07:00
|
|
|
timings.Start("fe", "typecheck", "top2")
|
2016-03-11 14:28:16 -08:00
|
|
|
for i := 0; i < len(xtop); i++ {
|
2016-12-29 15:49:01 -08:00
|
|
|
n := xtop[i]
|
2020-07-19 00:32:02 -04:00
|
|
|
if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias() {
|
2018-11-18 08:34:38 -08:00
|
|
|
xtop[i] = typecheck(n, ctxStmt)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase 3: Type check function bodies.
|
|
|
|
|
// Don't use range--typecheck can add closures to xtop.
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "typecheck", "func")
|
|
|
|
|
var fcount int64
|
2016-03-11 14:28:16 -08:00
|
|
|
for i := 0; i < len(xtop); i++ {
|
2016-12-29 15:49:01 -08:00
|
|
|
n := xtop[i]
|
2020-09-16 15:41:47 -07:00
|
|
|
if n.Op == ODCLFUNC {
|
2016-12-29 15:49:01 -08:00
|
|
|
Curfn = n
|
2016-03-11 14:28:16 -08:00
|
|
|
decldepth = 1
|
|
|
|
|
saveerrors()
|
2018-11-18 08:34:38 -08:00
|
|
|
typecheckslice(Curfn.Nbody.Slice(), ctxStmt)
|
2016-03-11 14:28:16 -08:00
|
|
|
checkreturn(Curfn)
|
|
|
|
|
if nerrors != 0 {
|
|
|
|
|
Curfn.Nbody.Set(nil) // type errors; do not compile
|
|
|
|
|
}
|
2017-03-27 11:38:20 -07:00
|
|
|
// Now that we've checked whether n terminates,
|
|
|
|
|
// we can eliminate some obviously dead code.
|
|
|
|
|
deadcode(Curfn)
|
2016-06-24 15:03:04 -07:00
|
|
|
fcount++
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
2019-09-08 19:36:13 +03:00
|
|
|
// With all types checked, it's now safe to verify map keys. One single
|
2018-10-07 12:45:56 +01:00
|
|
|
// check past phase 9 isn't sufficient, as we may exit with other errors
|
|
|
|
|
// before then, thus skipping map key errors.
|
2017-11-01 17:38:07 -07:00
|
|
|
checkMapKeys()
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.AddEvent(fcount, "funcs")
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2018-06-05 10:19:03 -07:00
|
|
|
if nsavederrors+nerrors != 0 {
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-14 12:56:37 -07:00
|
|
|
fninit(xtop)
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
// Phase 4: Decide how to capture closed variables.
|
|
|
|
|
// This needs to run before escape analysis,
|
|
|
|
|
// because variables captured by value do not escape.
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "capturevars")
|
2016-03-11 14:28:16 -08:00
|
|
|
for _, n := range xtop {
|
|
|
|
|
if n.Op == ODCLFUNC && n.Func.Closure != nil {
|
|
|
|
|
Curfn = n
|
|
|
|
|
capturevars(n)
|
|
|
|
|
}
|
|
|
|
|
}
|
cmd/compile: don't update outer variables after capturevars is complete
When compiling concurrently, we walk all functions before compiling
any of them. Walking functions can cause variables to switch from
being non-addrtaken to addrtaken, e.g. to prepare for a runtime call.
Typechecking propagates addrtaken-ness of closure variables to
their outer variables, so that capturevars can decide whether to
pass the variable's value or a pointer to it.
When all functions are compiled immediately, as long as the containing
function is compiled prior to the closure, this propagation has no effect.
When compilation is deferred, though, in rare cases, this results in
a change in the addrtaken-ness of a variable in the outer function,
which in turn changes the compiler's output.
(This is rare because in a great many cases, a temporary has been
introduced, insulating the outer variable from modification.)
But concurrent compilation must generate identical results.
To fix this, track whether capturevars has run.
If it has, there is no need to update outer variables
when closure variables change.
Capturevars always runs before any functions are walked or compiled.
The remainder of the changes in this CL are to support the test.
In particular, -d=compilelater forces the compiler to walk all
functions before compiling any of them, despite being non-concurrent.
This is useful because -live is fundamentally incompatible with
concurrent compilation, but we want -c=1 to have no behavior changes.
Fixes #20250
Change-Id: I89bcb54268a41e8588af1ac8cc37fbef856a90c2
Reviewed-on: https://go-review.googlesource.com/42853
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2017-05-05 15:22:59 -07:00
|
|
|
capturevarscomplete = true
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
Curfn = nil
|
|
|
|
|
|
|
|
|
|
if nsavederrors+nerrors != 0 {
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase 5: Inlining
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "inlining")
|
2017-10-20 15:20:56 -07:00
|
|
|
if Debug_typecheckinl != 0 {
|
2020-10-19 11:31:10 +02:00
|
|
|
// Typecheck imported function bodies if Debug.l > 1,
|
2016-03-11 14:28:16 -08:00
|
|
|
// otherwise lazily when used or re-exported.
|
|
|
|
|
for _, n := range importlist {
|
2018-04-04 15:53:27 -07:00
|
|
|
if n.Func.Inl != nil {
|
2016-03-11 14:28:16 -08:00
|
|
|
saveerrors()
|
|
|
|
|
typecheckinl(n)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if nsavederrors+nerrors != 0 {
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.l != 0 {
|
2016-03-11 14:28:16 -08:00
|
|
|
// Find functions that can be inlined and clone them before walk expands them.
|
|
|
|
|
visitBottomUp(xtop, func(list []*Node, recursive bool) {
|
2020-03-31 20:24:05 -07:00
|
|
|
numfns := numNonClosures(list)
|
2016-03-23 16:35:50 +01:00
|
|
|
for _, n := range list {
|
2020-03-31 20:24:05 -07:00
|
|
|
if !recursive || numfns > 1 {
|
|
|
|
|
// We allow inlining if there is no
|
|
|
|
|
// recursion, or the recursion cycle is
|
|
|
|
|
// across more than one function.
|
2016-03-11 14:28:16 -08:00
|
|
|
caninl(n)
|
2016-05-03 17:21:32 -07:00
|
|
|
} else {
|
2020-10-19 11:31:10 +02:00
|
|
|
if Debug.m > 1 {
|
2016-05-03 17:21:32 -07:00
|
|
|
fmt.Printf("%v: cannot inline %v: recursive\n", n.Line(), n.Func.Nname)
|
|
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2016-05-05 11:45:27 -07:00
|
|
|
inlcalls(n)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 18:49:10 -07:00
|
|
|
for _, n := range xtop {
|
|
|
|
|
if n.Op == ODCLFUNC {
|
|
|
|
|
devirtualize(n)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Curfn = nil
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
// Phase 6: Escape analysis.
|
|
|
|
|
// Required for moving heap allocations onto stack,
|
|
|
|
|
// which in turn is required by the closure implementation,
|
|
|
|
|
// which stores the addresses of stack variables into the closure.
|
|
|
|
|
// If the closure does not escape, it needs to be on the stack
|
|
|
|
|
// or else the stack copier will not update it.
|
|
|
|
|
// Large values are also moved off stack in escape analysis;
|
|
|
|
|
// because large values may contain pointers, it must happen early.
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("fe", "escapes")
|
2016-03-11 14:28:16 -08:00
|
|
|
escapes(xtop)
|
|
|
|
|
|
2018-10-16 12:49:17 -07:00
|
|
|
// Collect information for go:nowritebarrierrec
|
|
|
|
|
// checking. This must happen before transformclosure.
|
|
|
|
|
// We'll do the final check after write barriers are
|
|
|
|
|
// inserted.
|
|
|
|
|
if compiling_runtime {
|
|
|
|
|
nowritebarrierrecCheck = newNowritebarrierrecChecker()
|
|
|
|
|
}
|
cmd/compile: improve coverage of nowritebarrierrec check
The current go:nowritebarrierrec checker has two problems that limit
its coverage:
1. It doesn't understand that systemstack calls its argument, which
means there are several cases where we fail to detect prohibited write
barriers.
2. It only observes calls in the AST, so calls constructed during
lowering by SSA aren't followed.
This CL completely rewrites this checker to address these issues.
The current checker runs entirely after walk and uses visitBottomUp,
which introduces several problems for checking across systemstack.
First, visitBottomUp itself doesn't understand systemstack calls, so
the callee may be ordered after the caller, causing the checker to
fail to propagate constraints. Second, many systemstack calls are
passed a closure, which is quite difficult to resolve back to the
function definition after transformclosure and walk have run. Third,
visitBottomUp works exclusively on the AST, so it can't observe calls
created by SSA.
To address these problems, this commit splits the check into two
phases and rewrites it to use a call graph generated during SSA
lowering. The first phase runs before transformclosure/walk and simply
records systemstack arguments when they're easy to get. Then, it
modifies genssa to record static call edges at the point where we're
lowering to Progs (which is the latest point at which position
information is conveniently available). Finally, the second phase runs
after all functions have been lowered and uses a direct BFS walk of
the call graph (combining systemstack calls with static calls) to find
prohibited write barriers and construct nice error messages.
Fixes #22384.
For #22460.
Change-Id: I39668f7f2366ab3c1ab1a71eaf25484d25349540
Reviewed-on: https://go-review.googlesource.com/72773
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-10-22 16:36:27 -04:00
|
|
|
|
2018-10-16 12:49:17 -07:00
|
|
|
// Phase 7: Transform closure bodies to properly reference captured variables.
|
|
|
|
|
// This needs to happen before walk, because closures must be transformed
|
|
|
|
|
// before walk reaches a call of a closure.
|
|
|
|
|
timings.Start("fe", "xclosures")
|
|
|
|
|
for _, n := range xtop {
|
|
|
|
|
if n.Op == ODCLFUNC && n.Func.Closure != nil {
|
|
|
|
|
Curfn = n
|
|
|
|
|
transformclosure(n)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2018-10-16 12:49:17 -07:00
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2018-10-16 12:49:17 -07:00
|
|
|
// Prepare for SSA compilation.
|
|
|
|
|
// This must be before peekitabs, because peekitabs
|
|
|
|
|
// can trigger function compilation.
|
|
|
|
|
initssaconfig()
|
|
|
|
|
|
|
|
|
|
// Just before compilation, compile itabs found on
|
|
|
|
|
// the right side of OCONVIFACE so that methods
|
|
|
|
|
// can be de-virtualized during compilation.
|
|
|
|
|
Curfn = nil
|
|
|
|
|
peekitabs()
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2018-10-16 12:49:17 -07:00
|
|
|
// Phase 8: Compile top level functions.
|
|
|
|
|
// Don't use range--walk can add functions to xtop.
|
|
|
|
|
timings.Start("be", "compilefuncs")
|
|
|
|
|
fcount = 0
|
|
|
|
|
for i := 0; i < len(xtop); i++ {
|
|
|
|
|
n := xtop[i]
|
|
|
|
|
if n.Op == ODCLFUNC {
|
|
|
|
|
funccompile(n)
|
|
|
|
|
fcount++
|
cmd/compile: add -dolinkobj flag
When set to false, the -dolinkobj flag instructs the compiler
not to generate or emit linker information.
This is handy when you need the compiler's export data,
e.g. for use with go/importer,
but you want to avoid the cost of full compilation.
This must be used with care, since the resulting
files are unusable for linking.
This CL interacts with #18369,
where adding gcflags and ldflags to buildid has been mooted.
On the one hand, adding gcflags would make safe use of this
flag easier, since if the full object files were needed,
a simple 'go install' would fix it.
On the other hand, this would mean that
'go install -gcflags=-dolinkobj=false' would rebuild the object files,
although any existing object files would probably suffice.
Change-Id: I8dc75ab5a40095c785c1a4d2260aeb63c4d10f73
Reviewed-on: https://go-review.googlesource.com/37384
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-22 00:05:18 -08:00
|
|
|
}
|
2018-10-16 12:49:17 -07:00
|
|
|
}
|
|
|
|
|
timings.AddEvent(fcount, "funcs")
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2018-10-16 12:49:17 -07:00
|
|
|
compileFunctions()
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
|
2018-10-16 12:49:17 -07:00
|
|
|
if nowritebarrierrecCheck != nil {
|
|
|
|
|
// Write barriers are now known. Check the
|
|
|
|
|
// call graph.
|
|
|
|
|
nowritebarrierrecCheck.check()
|
|
|
|
|
nowritebarrierrecCheck = nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Finalize DWARF inline routine DIEs, then explicitly turn off
|
|
|
|
|
// DWARF inlining gen so as to avoid problems with generated
|
|
|
|
|
// method wrappers.
|
|
|
|
|
if Ctxt.DwFixups != nil {
|
|
|
|
|
Ctxt.DwFixups.Finalize(myimportpath, Debug_gendwarfinl != 0)
|
|
|
|
|
Ctxt.DwFixups = nil
|
|
|
|
|
genDwarfInline = 0
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase 9: Check external declarations.
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("be", "externaldcls")
|
2016-03-11 14:28:16 -08:00
|
|
|
for i, n := range externdcl {
|
|
|
|
|
if n.Op == ONAME {
|
2018-11-18 08:34:38 -08:00
|
|
|
externdcl[i] = typecheck(externdcl[i], ctxExpr)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
2018-10-07 12:45:56 +01:00
|
|
|
// Check the map keys again, since we typechecked the external
|
|
|
|
|
// declarations.
|
|
|
|
|
checkMapKeys()
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
if nerrors+nsavederrors != 0 {
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-11 16:56:07 -08:00
|
|
|
// Write object data to disk.
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Start("be", "dumpobj")
|
2019-09-11 16:03:59 -04:00
|
|
|
dumpdata()
|
2020-05-18 18:47:17 -04:00
|
|
|
Ctxt.NumberSyms()
|
2016-03-11 14:28:16 -08:00
|
|
|
dumpobj()
|
|
|
|
|
if asmhdr != "" {
|
|
|
|
|
dumpasmhdr()
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-29 16:44:36 -07:00
|
|
|
// Check whether any of the functions we have compiled have gigantic stack frames.
|
2019-04-30 15:23:14 -04:00
|
|
|
sort.Slice(largeStackFrames, func(i, j int) bool {
|
2018-10-25 06:23:54 -07:00
|
|
|
return largeStackFrames[i].pos.Before(largeStackFrames[j].pos)
|
2018-05-29 16:44:36 -07:00
|
|
|
})
|
2018-10-25 06:23:54 -07:00
|
|
|
for _, large := range largeStackFrames {
|
|
|
|
|
if large.callee != 0 {
|
|
|
|
|
yyerrorl(large.pos, "stack frame too large (>1GB): %d MB locals + %d MB args + %d MB callee", large.locals>>20, large.args>>20, large.callee>>20)
|
|
|
|
|
} else {
|
|
|
|
|
yyerrorl(large.pos, "stack frame too large (>1GB): %d MB locals + %d MB args", large.locals>>20, large.args>>20)
|
|
|
|
|
}
|
2018-05-29 16:44:36 -07:00
|
|
|
}
|
|
|
|
|
|
2020-09-14 12:53:36 -07:00
|
|
|
if len(funcStack) != 0 {
|
|
|
|
|
Fatalf("funcStack is non-empty: %v", len(funcStack))
|
|
|
|
|
}
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
if len(compilequeue) != 0 {
|
|
|
|
|
Fatalf("%d uncompiled functions", len(compilequeue))
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: add framework for logging optimizer (non)actions to LSP
This is intended to allow IDEs to note where the optimizer
was not able to improve users' code. There may be other
applications for this, for example in studying effectiveness
of optimizer changes more quickly than running benchmarks,
or in verifying that code changes did not accidentally disable
optimizations in performance-critical code.
Logging of nilcheck (bad) for amd64 is implemented as
proof-of-concept. In general, the intent is that optimizations
that didn't happen are what will be logged, because that is
believed to be what IDE users want.
Added flag -json=version,dest
Check that version=0. (Future compilers will support a
few recent versions, I hope that version is always <=3.)
Dest is expected to be one of:
/path (or \path in Windows)
will create directory /path and fill it w/ json files
file://path
will create directory path, intended either for
I:\dont\know\enough\about\windows\paths
trustme_I_know_what_I_am_doing_probably_testing
Not passing an absolute path name usually leads to
json splattered all over source directories,
or failure when those directories are not writeable.
If you want a foot-gun, you have to ask for it.
The JSON output is directed to subdirectories of dest,
where each subdirectory is net/url.PathEscape of the
package name, and each for each foo.go in the package,
net/url.PathEscape(foo).json is created. The first line
of foo.json contains version and context information,
and subsequent lines contains LSP-conforming JSON
describing the missing optimizations.
Change-Id: Ib83176a53a8c177ee9081aefc5ae05604ccad8a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/204338
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-10-24 13:48:17 -04:00
|
|
|
logopt.FlushLoggedOpts(Ctxt, myimportpath)
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
if nerrors+nsavederrors != 0 {
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-15 15:45:10 +10:00
|
|
|
flusherrors()
|
2016-06-24 15:03:04 -07:00
|
|
|
timings.Stop()
|
|
|
|
|
|
|
|
|
|
if benchfile != "" {
|
|
|
|
|
if err := writebench(benchfile); err != nil {
|
|
|
|
|
log.Fatalf("cannot write benchmark data: %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-31 20:24:05 -07:00
|
|
|
// numNonClosures returns the number of functions in list which are not closures.
|
|
|
|
|
func numNonClosures(list []*Node) int {
|
|
|
|
|
count := 0
|
|
|
|
|
for _, n := range list {
|
|
|
|
|
if n.Func.Closure == nil {
|
|
|
|
|
count++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return count
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-24 15:03:04 -07:00
|
|
|
func writebench(filename string) error {
|
|
|
|
|
f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var buf bytes.Buffer
|
2017-04-18 12:53:25 -07:00
|
|
|
fmt.Fprintln(&buf, "commit:", objabi.Version)
|
2016-06-24 15:03:04 -07:00
|
|
|
fmt.Fprintln(&buf, "goos:", runtime.GOOS)
|
|
|
|
|
fmt.Fprintln(&buf, "goarch:", runtime.GOARCH)
|
|
|
|
|
timings.Write(&buf, "BenchmarkCompile:"+myimportpath+":")
|
|
|
|
|
|
|
|
|
|
n, err := f.Write(buf.Bytes())
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if n != buf.Len() {
|
|
|
|
|
panic("bad writer")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return f.Close()
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2017-05-31 11:19:54 -04:00
|
|
|
var (
|
|
|
|
|
importMap = map[string]string{}
|
|
|
|
|
packageFile map[string]string // nil means not in use
|
|
|
|
|
)
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
func addImportMap(s string) {
|
|
|
|
|
if strings.Count(s, "=") != 1 {
|
|
|
|
|
log.Fatal("-importmap argument must be of the form source=actual")
|
|
|
|
|
}
|
2017-10-05 15:49:32 +02:00
|
|
|
i := strings.Index(s, "=")
|
2016-03-11 14:28:16 -08:00
|
|
|
source, actual := s[:i], s[i+1:]
|
|
|
|
|
if source == "" || actual == "" {
|
|
|
|
|
log.Fatal("-importmap argument must be of the form source=actual; source and actual must be non-empty")
|
|
|
|
|
}
|
|
|
|
|
importMap[source] = actual
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 11:19:54 -04:00
|
|
|
func readImportCfg(file string) {
|
|
|
|
|
packageFile = map[string]string{}
|
|
|
|
|
data, err := ioutil.ReadFile(file)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("-importcfg: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for lineNum, line := range strings.Split(string(data), "\n") {
|
|
|
|
|
lineNum++ // 1-based
|
|
|
|
|
line = strings.TrimSpace(line)
|
|
|
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var verb, args string
|
2017-10-05 15:49:32 +02:00
|
|
|
if i := strings.Index(line, " "); i < 0 {
|
2017-05-31 11:19:54 -04:00
|
|
|
verb = line
|
|
|
|
|
} else {
|
|
|
|
|
verb, args = line[:i], strings.TrimSpace(line[i+1:])
|
|
|
|
|
}
|
|
|
|
|
var before, after string
|
2017-10-05 15:49:32 +02:00
|
|
|
if i := strings.Index(args, "="); i >= 0 {
|
2017-05-31 11:19:54 -04:00
|
|
|
before, after = args[:i], args[i+1:]
|
|
|
|
|
}
|
|
|
|
|
switch verb {
|
|
|
|
|
default:
|
|
|
|
|
log.Fatalf("%s:%d: unknown directive %q", file, lineNum, verb)
|
|
|
|
|
case "importmap":
|
|
|
|
|
if before == "" || after == "" {
|
|
|
|
|
log.Fatalf(`%s:%d: invalid importmap: syntax is "importmap old=new"`, file, lineNum)
|
|
|
|
|
}
|
|
|
|
|
importMap[before] = after
|
|
|
|
|
case "packagefile":
|
|
|
|
|
if before == "" || after == "" {
|
|
|
|
|
log.Fatalf(`%s:%d: invalid packagefile: syntax is "packagefile path=filename"`, file, lineNum)
|
|
|
|
|
}
|
|
|
|
|
packageFile[before] = after
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-22 10:10:23 -04:00
|
|
|
// symabiDefs and symabiRefs record the defined and referenced ABIs of
|
|
|
|
|
// symbols required by non-Go code. These are keyed by link symbol
|
|
|
|
|
// name, where the local package prefix is always `"".`
|
|
|
|
|
var symabiDefs, symabiRefs map[string]obj.ABI
|
|
|
|
|
|
|
|
|
|
// readSymABIs reads a symabis file that specifies definitions and
|
|
|
|
|
// references of text symbols by ABI.
|
|
|
|
|
//
|
|
|
|
|
// The symabis format is a set of lines, where each line is a sequence
|
|
|
|
|
// of whitespace-separated fields. The first field is a verb and is
|
|
|
|
|
// either "def" for defining a symbol ABI or "ref" for referencing a
|
|
|
|
|
// symbol using an ABI. For both "def" and "ref", the second field is
|
|
|
|
|
// the symbol name and the third field is the ABI name, as one of the
|
|
|
|
|
// named cmd/internal/obj.ABI constants.
|
|
|
|
|
func readSymABIs(file, myimportpath string) {
|
|
|
|
|
data, err := ioutil.ReadFile(file)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("-symabis: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
symabiDefs = make(map[string]obj.ABI)
|
|
|
|
|
symabiRefs = make(map[string]obj.ABI)
|
|
|
|
|
|
|
|
|
|
localPrefix := ""
|
|
|
|
|
if myimportpath != "" {
|
|
|
|
|
// Symbols in this package may be written either as
|
|
|
|
|
// "".X or with the package's import path already in
|
|
|
|
|
// the symbol.
|
|
|
|
|
localPrefix = objabi.PathToPrefix(myimportpath) + "."
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for lineNum, line := range strings.Split(string(data), "\n") {
|
|
|
|
|
lineNum++ // 1-based
|
|
|
|
|
line = strings.TrimSpace(line)
|
|
|
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parts := strings.Fields(line)
|
|
|
|
|
switch parts[0] {
|
|
|
|
|
case "def", "ref":
|
|
|
|
|
// Parse line.
|
|
|
|
|
if len(parts) != 3 {
|
|
|
|
|
log.Fatalf(`%s:%d: invalid symabi: syntax is "%s sym abi"`, file, lineNum, parts[0])
|
|
|
|
|
}
|
2020-10-07 12:31:05 -04:00
|
|
|
sym, abistr := parts[1], parts[2]
|
|
|
|
|
abi, valid := obj.ParseABI(abistr)
|
|
|
|
|
if !valid {
|
|
|
|
|
log.Fatalf(`%s:%d: invalid symabi: unknown abi "%s"`, file, lineNum, abistr)
|
2018-10-22 10:10:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the symbol is already prefixed with
|
|
|
|
|
// myimportpath, rewrite it to start with ""
|
|
|
|
|
// so it matches the compiler's internal
|
|
|
|
|
// symbol names.
|
|
|
|
|
if localPrefix != "" && strings.HasPrefix(sym, localPrefix) {
|
|
|
|
|
sym = `"".` + sym[len(localPrefix):]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Record for later.
|
|
|
|
|
if parts[0] == "def" {
|
2020-10-07 12:31:05 -04:00
|
|
|
symabiDefs[sym] = abi
|
2018-10-22 10:10:23 -04:00
|
|
|
} else {
|
2020-10-07 12:31:05 -04:00
|
|
|
symabiRefs[sym] = abi
|
2018-10-22 10:10:23 -04:00
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
log.Fatalf(`%s:%d: invalid symabi type "%s"`, file, lineNum, parts[0])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
func saveerrors() {
|
|
|
|
|
nsavederrors += nerrors
|
|
|
|
|
nerrors = 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func arsize(b *bufio.Reader, name string) int {
|
|
|
|
|
var buf [ArhdrSize]byte
|
|
|
|
|
if _, err := io.ReadFull(b, buf[:]); err != nil {
|
|
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
aname := strings.Trim(string(buf[0:16]), " ")
|
|
|
|
|
if !strings.HasPrefix(aname, name) {
|
|
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
asize := strings.Trim(string(buf[48:58]), " ")
|
|
|
|
|
i, _ := strconv.Atoi(asize)
|
|
|
|
|
return i
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var idirs []string
|
|
|
|
|
|
|
|
|
|
func addidir(dir string) {
|
|
|
|
|
if dir != "" {
|
|
|
|
|
idirs = append(idirs, dir)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isDriveLetter(b byte) bool {
|
|
|
|
|
return 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z'
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-19 22:33:51 +02:00
|
|
|
// is this path a local name? begins with ./ or ../ or /
|
2016-03-11 14:28:16 -08:00
|
|
|
func islocalname(name string) bool {
|
|
|
|
|
return strings.HasPrefix(name, "/") ||
|
2016-03-21 14:37:57 +11:00
|
|
|
runtime.GOOS == "windows" && len(name) >= 3 && isDriveLetter(name[0]) && name[1] == ':' && name[2] == '/' ||
|
2016-03-11 14:28:16 -08:00
|
|
|
strings.HasPrefix(name, "./") || name == "." ||
|
|
|
|
|
strings.HasPrefix(name, "../") || name == ".."
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func findpkg(name string) (file string, ok bool) {
|
|
|
|
|
if islocalname(name) {
|
2018-10-16 14:02:13 -07:00
|
|
|
if nolocalimports {
|
2016-03-11 14:28:16 -08:00
|
|
|
return "", false
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 11:19:54 -04:00
|
|
|
if packageFile != nil {
|
|
|
|
|
file, ok = packageFile[name]
|
|
|
|
|
return file, ok
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
// try .a before .6. important for building libraries:
|
|
|
|
|
// if there is an array.6 in the array.a library,
|
|
|
|
|
// want to find all of array.a, not just array.6.
|
|
|
|
|
file = fmt.Sprintf("%s.a", name)
|
|
|
|
|
if _, err := os.Stat(file); err == nil {
|
|
|
|
|
return file, true
|
|
|
|
|
}
|
|
|
|
|
file = fmt.Sprintf("%s.o", name)
|
|
|
|
|
if _, err := os.Stat(file); err == nil {
|
|
|
|
|
return file, true
|
|
|
|
|
}
|
|
|
|
|
return "", false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// local imports should be canonicalized already.
|
|
|
|
|
// don't want to see "encoding/../encoding/base64"
|
|
|
|
|
// as different from "encoding/base64".
|
|
|
|
|
if q := path.Clean(name); q != name {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("non-canonical import path %q (should be %q)", name, q)
|
2016-03-11 14:28:16 -08:00
|
|
|
return "", false
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 11:19:54 -04:00
|
|
|
if packageFile != nil {
|
|
|
|
|
file, ok = packageFile[name]
|
|
|
|
|
return file, ok
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
for _, dir := range idirs {
|
|
|
|
|
file = fmt.Sprintf("%s/%s.a", dir, name)
|
|
|
|
|
if _, err := os.Stat(file); err == nil {
|
|
|
|
|
return file, true
|
|
|
|
|
}
|
|
|
|
|
file = fmt.Sprintf("%s/%s.o", dir, name)
|
|
|
|
|
if _, err := os.Stat(file); err == nil {
|
|
|
|
|
return file, true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-18 12:53:25 -07:00
|
|
|
if objabi.GOROOT != "" {
|
2016-03-11 14:28:16 -08:00
|
|
|
suffix := ""
|
|
|
|
|
suffixsep := ""
|
|
|
|
|
if flag_installsuffix != "" {
|
|
|
|
|
suffixsep = "_"
|
|
|
|
|
suffix = flag_installsuffix
|
2016-04-13 18:37:18 -07:00
|
|
|
} else if flag_race {
|
2016-03-11 14:28:16 -08:00
|
|
|
suffixsep = "_"
|
|
|
|
|
suffix = "race"
|
2016-04-13 18:37:18 -07:00
|
|
|
} else if flag_msan {
|
2016-03-11 14:28:16 -08:00
|
|
|
suffixsep = "_"
|
|
|
|
|
suffix = "msan"
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-18 12:53:25 -07:00
|
|
|
file = fmt.Sprintf("%s/pkg/%s_%s%s%s/%s.a", objabi.GOROOT, objabi.GOOS, objabi.GOARCH, suffixsep, suffix, name)
|
2016-03-11 14:28:16 -08:00
|
|
|
if _, err := os.Stat(file); err == nil {
|
|
|
|
|
return file, true
|
|
|
|
|
}
|
2017-04-18 12:53:25 -07:00
|
|
|
file = fmt.Sprintf("%s/pkg/%s_%s%s%s/%s.o", objabi.GOROOT, objabi.GOOS, objabi.GOARCH, suffixsep, suffix, name)
|
2016-03-11 14:28:16 -08:00
|
|
|
if _, err := os.Stat(file); err == nil {
|
|
|
|
|
return file, true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return "", false
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-18 16:11:50 -07:00
|
|
|
// loadsys loads the definitions for the low-level runtime functions,
|
2016-03-11 14:28:16 -08:00
|
|
|
// so that the compiler can generate calls to them,
|
2016-10-18 16:11:50 -07:00
|
|
|
// but does not make them visible to user code.
|
2016-03-11 14:28:16 -08:00
|
|
|
func loadsys() {
|
2017-04-06 20:57:46 -07:00
|
|
|
types.Block = 1
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2017-03-23 17:39:28 -07:00
|
|
|
inimport = true
|
2016-10-18 16:11:50 -07:00
|
|
|
typecheckok = true
|
|
|
|
|
|
|
|
|
|
typs := runtimeTypes()
|
2019-11-01 18:12:27 +07:00
|
|
|
for _, d := range &runtimeDecls {
|
2017-03-30 13:19:18 -07:00
|
|
|
sym := Runtimepkg.Lookup(d.name)
|
2016-10-18 16:11:50 -07:00
|
|
|
typ := typs[d.typ]
|
|
|
|
|
switch d.tag {
|
|
|
|
|
case funcTag:
|
2018-04-17 14:40:56 -07:00
|
|
|
importfunc(Runtimepkg, src.NoXPos, sym, typ)
|
2016-10-18 16:11:50 -07:00
|
|
|
case varTag:
|
2018-04-17 14:40:56 -07:00
|
|
|
importvar(Runtimepkg, src.NoXPos, sym, typ)
|
2016-10-18 16:11:50 -07:00
|
|
|
default:
|
|
|
|
|
Fatalf("unhandled declaration tag %v", d.tag)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typecheckok = false
|
2017-03-23 17:39:28 -07:00
|
|
|
inimport = false
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2018-04-05 14:29:32 -07:00
|
|
|
// myheight tracks the local package's height based on packages
|
|
|
|
|
// imported so far.
|
|
|
|
|
var myheight int
|
|
|
|
|
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
func importfile(f *Val) *types.Pkg {
|
2016-03-11 14:28:16 -08:00
|
|
|
path_, ok := f.U.(string)
|
|
|
|
|
if !ok {
|
2017-03-24 11:43:08 -07:00
|
|
|
yyerror("import path must be a string")
|
2017-03-23 17:39:28 -07:00
|
|
|
return nil
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(path_) == 0 {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("import path is empty")
|
2017-03-23 17:39:28 -07:00
|
|
|
return nil
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2017-06-16 14:48:38 -07:00
|
|
|
if isbadimport(path_, false) {
|
2017-03-23 17:39:28 -07:00
|
|
|
return nil
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The package name main is no longer reserved,
|
|
|
|
|
// but we reserve the import path "main" to identify
|
|
|
|
|
// the main package, just as we reserve the import
|
|
|
|
|
// path "math" to identify the standard math package.
|
|
|
|
|
if path_ == "main" {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("cannot import \"main\"")
|
2016-03-11 14:28:16 -08:00
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if myimportpath != "" && path_ == myimportpath {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("import %q while compiling that package (import cycle)", path_)
|
2016-03-11 14:28:16 -08:00
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if mapped, ok := importMap[path_]; ok {
|
|
|
|
|
path_ = mapped
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if path_ == "unsafe" {
|
2017-03-23 17:39:28 -07:00
|
|
|
return unsafepkg
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if islocalname(path_) {
|
|
|
|
|
if path_[0] == '/' {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("import path cannot be absolute path")
|
2017-03-23 17:39:28 -07:00
|
|
|
return nil
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
prefix := Ctxt.Pathname
|
|
|
|
|
if localimport != "" {
|
|
|
|
|
prefix = localimport
|
|
|
|
|
}
|
|
|
|
|
path_ = path.Join(prefix, path_)
|
|
|
|
|
|
2017-06-16 14:48:38 -07:00
|
|
|
if isbadimport(path_, true) {
|
2017-03-23 17:39:28 -07:00
|
|
|
return nil
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
file, found := findpkg(path_)
|
|
|
|
|
if !found {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("can't find import: %q", path_)
|
2016-03-11 14:28:16 -08:00
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-19 10:27:19 -07:00
|
|
|
importpkg := types.NewPkg(path_, "")
|
2016-03-11 14:28:16 -08:00
|
|
|
if importpkg.Imported {
|
2017-03-23 17:39:28 -07:00
|
|
|
return importpkg
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
importpkg.Imported = true
|
|
|
|
|
|
2018-04-17 14:59:13 -07:00
|
|
|
imp, err := bio.Open(file)
|
2016-03-11 14:28:16 -08:00
|
|
|
if err != nil {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("can't open import: %q: %v", path_, err)
|
2016-03-11 14:28:16 -08:00
|
|
|
errorexit()
|
|
|
|
|
}
|
2018-04-17 14:59:13 -07:00
|
|
|
defer imp.Close()
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
// check object header
|
|
|
|
|
p, err := imp.ReadString('\n')
|
|
|
|
|
if err != nil {
|
2017-06-15 13:52:13 -07:00
|
|
|
yyerror("import %s: reading input: %v", file, err)
|
|
|
|
|
errorexit()
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2018-04-17 14:59:13 -07:00
|
|
|
if p == "!<arch>\n" { // package archive
|
2017-06-01 14:26:55 -04:00
|
|
|
// package export block should be first
|
2018-04-17 14:59:13 -07:00
|
|
|
sz := arsize(imp.Reader, "__.PKGDEF")
|
2017-06-01 14:26:55 -04:00
|
|
|
if sz <= 0 {
|
|
|
|
|
yyerror("import %s: not a package file", file)
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
p, err = imp.ReadString('\n')
|
|
|
|
|
if err != nil {
|
2017-06-15 13:52:13 -07:00
|
|
|
yyerror("import %s: reading input: %v", file, err)
|
|
|
|
|
errorexit()
|
2017-06-01 14:26:55 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-17 14:59:13 -07:00
|
|
|
if !strings.HasPrefix(p, "go object ") {
|
|
|
|
|
yyerror("import %s: not a go object file: %s", file, p)
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
q := fmt.Sprintf("%s %s %s %s\n", objabi.GOOS, objabi.GOARCH, objabi.Version, objabi.Expstring())
|
|
|
|
|
if p[10:] != q {
|
|
|
|
|
yyerror("import %s: object is [%s] expected [%s]", file, p[10:], q)
|
|
|
|
|
errorexit()
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2016-05-20 11:19:19 -07:00
|
|
|
// process header lines
|
|
|
|
|
for {
|
|
|
|
|
p, err = imp.ReadString('\n')
|
|
|
|
|
if err != nil {
|
2017-06-15 13:52:13 -07:00
|
|
|
yyerror("import %s: reading input: %v", file, err)
|
|
|
|
|
errorexit()
|
2016-05-20 11:19:19 -07:00
|
|
|
}
|
|
|
|
|
if p == "\n" {
|
|
|
|
|
break // header ends with blank line
|
|
|
|
|
}
|
2016-09-13 15:33:55 -07:00
|
|
|
}
|
2016-05-20 11:19:19 -07:00
|
|
|
|
2016-03-11 14:28:16 -08:00
|
|
|
// In the importfile, if we find:
|
2016-08-16 12:55:17 -07:00
|
|
|
// $$\n (textual format): not supported anymore
|
|
|
|
|
// $$B\n (binary format) : import directly, then feed the lexer a dummy statement
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
// look for $$
|
|
|
|
|
var c byte
|
|
|
|
|
for {
|
|
|
|
|
c, err = imp.ReadByte()
|
|
|
|
|
if err != nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if c == '$' {
|
|
|
|
|
c, err = imp.ReadByte()
|
|
|
|
|
if c == '$' || err != nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// get character after $$
|
|
|
|
|
if err == nil {
|
|
|
|
|
c, _ = imp.ReadByte()
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-02 19:36:28 -04:00
|
|
|
var fingerprint goobj.FingerprintType
|
2016-03-11 14:28:16 -08:00
|
|
|
switch c {
|
|
|
|
|
case '\n':
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("cannot import %s: old export format no longer supported (recompile library)", path_)
|
2017-03-23 17:39:28 -07:00
|
|
|
return nil
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
case 'B':
|
2016-03-18 17:21:32 -07:00
|
|
|
if Debug_export != 0 {
|
|
|
|
|
fmt.Printf("importing %s (%s)\n", path_, file)
|
|
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
imp.ReadByte() // skip \n after $$B
|
2018-04-01 01:55:55 -07:00
|
|
|
|
|
|
|
|
c, err = imp.ReadByte()
|
|
|
|
|
if err != nil {
|
|
|
|
|
yyerror("import %s: reading input: %v", file, err)
|
|
|
|
|
errorexit()
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-03 10:56:23 -07:00
|
|
|
// Indexed format is distinguished by an 'i' byte,
|
|
|
|
|
// whereas previous export formats started with 'c', 'd', or 'v'.
|
|
|
|
|
if c != 'i' {
|
|
|
|
|
yyerror("import %s: unexpected package format byte: %v", file, c)
|
|
|
|
|
errorexit()
|
2018-04-01 01:55:55 -07:00
|
|
|
}
|
[dev.link] cmd/internal/goobj2: add index fingerprint to object file
The new object files use indices for symbol references, instead
of names. Fundamental to the design, it requires that the
importing and imported packages have consistent view of symbol
indices. The Go command should already ensure this, when using
"go build". But in case it goes wrong, it could lead to obscure
errors like run-time crashes. It would be better to check the
index consistency at build time.
To do that, we add a fingerprint to each object file, which is
a hash of symbol indices. In the object file it records the
fingerprints of all imported packages, as well as its own
fingerprint. At link time, the linker checks that a package's
fingerprint matches the fingerprint recorded in the importing
packages, and issue an error if they don't match.
This CL does the first part: introducing the fingerprint in the
object file, and propagating fingerprints through
importing/exporting by the compiler. It is not yet used by the
linker. Next CL will do.
Change-Id: I0aa372da652e4afb11f2867cb71689a3e3f9966e
Reviewed-on: https://go-review.googlesource.com/c/go/+/229617
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
2020-04-22 19:21:30 -04:00
|
|
|
fingerprint = iimport(importpkg, imp)
|
2016-03-11 14:28:16 -08:00
|
|
|
|
|
|
|
|
default:
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("no import in %q", path_)
|
2016-03-11 14:28:16 -08:00
|
|
|
errorexit()
|
|
|
|
|
}
|
2017-03-23 17:39:28 -07:00
|
|
|
|
[dev.link] cmd/internal/goobj2: add index fingerprint to object file
The new object files use indices for symbol references, instead
of names. Fundamental to the design, it requires that the
importing and imported packages have consistent view of symbol
indices. The Go command should already ensure this, when using
"go build". But in case it goes wrong, it could lead to obscure
errors like run-time crashes. It would be better to check the
index consistency at build time.
To do that, we add a fingerprint to each object file, which is
a hash of symbol indices. In the object file it records the
fingerprints of all imported packages, as well as its own
fingerprint. At link time, the linker checks that a package's
fingerprint matches the fingerprint recorded in the importing
packages, and issue an error if they don't match.
This CL does the first part: introducing the fingerprint in the
object file, and propagating fingerprints through
importing/exporting by the compiler. It is not yet used by the
linker. Next CL will do.
Change-Id: I0aa372da652e4afb11f2867cb71689a3e3f9966e
Reviewed-on: https://go-review.googlesource.com/c/go/+/229617
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
2020-04-22 19:21:30 -04:00
|
|
|
// assume files move (get installed) so don't record the full path
|
|
|
|
|
if packageFile != nil {
|
|
|
|
|
// If using a packageFile map, assume path_ can be recorded directly.
|
|
|
|
|
Ctxt.AddImport(path_, fingerprint)
|
|
|
|
|
} else {
|
|
|
|
|
// For file "/Users/foo/go/pkg/darwin_amd64/math.a" record "math.a".
|
|
|
|
|
Ctxt.AddImport(file[len(file)-len(path_)-len(".a"):], fingerprint)
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-05 14:29:32 -07:00
|
|
|
if importpkg.Height >= myheight {
|
|
|
|
|
myheight = importpkg.Height + 1
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-23 17:39:28 -07:00
|
|
|
return importpkg
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
|
2016-12-15 17:17:01 -08:00
|
|
|
func pkgnotused(lineno src.XPos, path string, name string) {
|
2016-03-11 14:28:16 -08:00
|
|
|
// If the package was imported with a name other than the final
|
|
|
|
|
// import path element, show it explicitly in the error message.
|
|
|
|
|
// Note that this handles both renamed imports and imports of
|
|
|
|
|
// packages containing unconventional package declarations.
|
|
|
|
|
// Note that this uses / always, even on Windows, because Go import
|
|
|
|
|
// paths always use forward slashes.
|
|
|
|
|
elem := path
|
|
|
|
|
if i := strings.LastIndex(elem, "/"); i >= 0 {
|
|
|
|
|
elem = elem[i+1:]
|
|
|
|
|
}
|
|
|
|
|
if name == "" || elem == name {
|
|
|
|
|
yyerrorl(lineno, "imported and not used: %q", path)
|
|
|
|
|
} else {
|
|
|
|
|
yyerrorl(lineno, "imported and not used: %q as %s", path, name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mkpackage(pkgname string) {
|
|
|
|
|
if localpkg.Name == "" {
|
|
|
|
|
if pkgname == "_" {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("invalid package name _")
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
localpkg.Name = pkgname
|
|
|
|
|
} else {
|
|
|
|
|
if pkgname != localpkg.Name {
|
2016-09-15 15:45:10 +10:00
|
|
|
yyerror("package %s; expected %s", pkgname, localpkg.Name)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2017-01-11 15:20:38 -08:00
|
|
|
}
|
|
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
|
2017-01-11 15:20:38 -08:00
|
|
|
func clearImports() {
|
2017-05-09 08:22:43 -07:00
|
|
|
type importedPkg struct {
|
|
|
|
|
pos src.XPos
|
|
|
|
|
path string
|
|
|
|
|
name string
|
|
|
|
|
}
|
|
|
|
|
var unused []importedPkg
|
|
|
|
|
|
2017-01-11 15:20:38 -08:00
|
|
|
for _, s := range localpkg.Syms {
|
2017-05-09 08:22:43 -07:00
|
|
|
n := asNode(s.Def)
|
|
|
|
|
if n == nil {
|
2017-01-11 15:20:38 -08:00
|
|
|
continue
|
|
|
|
|
}
|
2017-05-09 08:22:43 -07:00
|
|
|
if n.Op == OPACK {
|
|
|
|
|
// throw away top-level package name left over
|
2017-01-11 15:20:38 -08:00
|
|
|
// from previous file.
|
|
|
|
|
// leave s->block set to cause redeclaration
|
|
|
|
|
// errors if a conflicting top-level name is
|
|
|
|
|
// introduced by a different file.
|
2017-05-09 08:22:43 -07:00
|
|
|
if !n.Name.Used() && nsyntaxerrors == 0 {
|
|
|
|
|
unused = append(unused, importedPkg{n.Pos, n.Name.Pkg.Path, s.Name})
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2017-01-11 15:20:38 -08:00
|
|
|
s.Def = nil
|
|
|
|
|
continue
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
if IsAlias(s) {
|
2017-01-11 15:20:38 -08:00
|
|
|
// throw away top-level name left over
|
|
|
|
|
// from previous import . "x"
|
2017-05-09 08:22:43 -07:00
|
|
|
if n.Name != nil && n.Name.Pack != nil && !n.Name.Pack.Name.Used() && nsyntaxerrors == 0 {
|
|
|
|
|
unused = append(unused, importedPkg{n.Name.Pack.Pos, n.Name.Pack.Name.Pkg.Path, ""})
|
|
|
|
|
n.Name.Pack.Name.SetUsed(true)
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
2017-01-11 15:20:38 -08:00
|
|
|
s.Def = nil
|
|
|
|
|
continue
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
|
|
|
|
}
|
2017-05-09 08:22:43 -07:00
|
|
|
|
2019-04-30 15:23:14 -04:00
|
|
|
sort.Slice(unused, func(i, j int) bool { return unused[i].pos.Before(unused[j].pos) })
|
2017-05-09 08:22:43 -07:00
|
|
|
for _, pkg := range unused {
|
|
|
|
|
pkgnotused(pkg.pos, pkg.path, pkg.name)
|
|
|
|
|
}
|
2016-03-11 14:28:16 -08:00
|
|
|
}
|
cmd/compile: factor out Pkg, Sym, and Type into package types
- created new package cmd/compile/internal/types
- moved Pkg, Sym, Type to new package
- to break cycles, for now we need the (ugly) types/utils.go
file which contains a handful of functions that must be installed
early by the gc frontend
- to break cycles, for now we need two functions to convert between
*gc.Node and *types.Node (the latter is a dummy type)
- adjusted the gc's code to use the new package and the conversion
functions as needed
- made several Pkg, Sym, and Type methods functions as needed
- renamed constructors typ, typPtr, typArray, etc. to types.New,
types.NewPtr, types.NewArray, etc.
Passes toolstash-check -all.
Change-Id: I8adfa5e85c731645d0a7fd2030375ed6ebf54b72
Reviewed-on: https://go-review.googlesource.com/39855
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-04 17:54:02 -07:00
|
|
|
|
|
|
|
|
func IsAlias(sym *types.Sym) bool {
|
|
|
|
|
return sym.Def != nil && asNode(sym.Def).Sym != sym
|
|
|
|
|
}
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
|
2020-10-19 11:31:10 +02:00
|
|
|
// By default, assume any debug flags are incompatible with concurrent
|
|
|
|
|
// compilation. A few are safe and potentially in common use for
|
|
|
|
|
// normal compiles, though; return true for those.
|
|
|
|
|
func concurrentFlagOk() bool {
|
|
|
|
|
// Report whether any debug flag that would prevent concurrent
|
|
|
|
|
// compilation is set, by zeroing out the allowed ones and then
|
|
|
|
|
// checking if the resulting struct is zero.
|
|
|
|
|
d := Debug
|
|
|
|
|
d.B = 0 // disable bounds checking
|
|
|
|
|
d.C = 0 // disable printing of columns in error messages
|
|
|
|
|
d.e = 0 // no limit on errors; errors all come from non-concurrent code
|
|
|
|
|
d.N = 0 // disable optimizations
|
|
|
|
|
d.l = 0 // disable inlining
|
|
|
|
|
d.w = 0 // all printing happens before compilation
|
|
|
|
|
d.W = 0 // all printing happens before compilation
|
|
|
|
|
d.S = 0 // printing disassembly happens at the end (but see concurrentBackendAllowed below)
|
|
|
|
|
|
|
|
|
|
return d == DebugFlags{}
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func concurrentBackendAllowed() bool {
|
2020-10-19 11:31:10 +02:00
|
|
|
if !concurrentFlagOk() {
|
|
|
|
|
return false
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
}
|
2020-10-19 11:31:10 +02:00
|
|
|
|
|
|
|
|
// Debug.S by itself is ok, because all printing occurs
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
// while writing the object file, and that is non-concurrent.
|
2020-10-19 11:31:10 +02:00
|
|
|
// Adding Debug_vlog, however, causes Debug.S to also print
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
// while flushing the plist, which happens concurrently.
|
|
|
|
|
if Debug_vlog || debugstr != "" || debuglive > 0 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
2018-11-26 10:28:44 -08:00
|
|
|
// TODO: Test and delete this condition.
|
|
|
|
|
if objabi.Fieldtrack_enabled != 0 {
|
cmd/compile: add initial backend concurrency support
This CL adds initial support for concurrent backend compilation.
BACKGROUND
The compiler currently consists (very roughly) of the following phases:
1. Initialization.
2. Lexing and parsing into the cmd/compile/internal/syntax AST.
3. Translation into the cmd/compile/internal/gc AST.
4. Some gc AST passes: typechecking, escape analysis, inlining,
closure handling, expression evaluation ordering (order.go),
and some lowering and optimization (walk.go).
5. Translation into the cmd/compile/internal/ssa SSA form.
6. Optimization and lowering of SSA form.
7. Translation from SSA form to assembler instructions.
8. Translation from assembler instructions to machine code.
9. Writing lots of output: machine code, DWARF symbols,
type and reflection info, export data.
Phase 2 was already concurrent as of Go 1.8.
Phase 3 is planned for eventual removal;
we hope to go straight from syntax AST to SSA.
Phases 5–8 are per-function; this CL adds support for
processing multiple functions concurrently.
The slowest phases in the compiler are 5 and 6,
so this offers the opportunity for some good speed-ups.
Unfortunately, it's not quite that straightforward.
In the current compiler, the latter parts of phase 4
(order, walk) are done function-at-a-time as needed.
Making order and walk concurrency-safe proved hard,
and they're not particularly slow, so there wasn't much reward.
To enable phases 5–8 to be done concurrently,
when concurrent backend compilation is requested,
we complete phase 4 for all functions
before starting later phases for any functions.
Also, in reality, we automatically generate new
functions in phase 9, such as method wrappers
and equality and has routines.
Those new functions then go through phases 4–8.
This CL disables concurrent backend compilation
after the first, big, user-provided batch of
functions has been compiled.
This is done to keep things simple,
and because the autogenerated functions
tend to be small, few, simple, and fast to compile.
USAGE
Concurrent backend compilation still defaults to off.
To set the number of functions that may be backend-compiled
concurrently, use the compiler flag -c.
In future work, cmd/go will automatically set -c.
Furthermore, this CL has been intentionally written
so that the c=1 path has no backend concurrency whatsoever,
not even spawning any goroutines.
This helps ensure that, should problems arise
late in the development cycle,
we can simply have cmd/go set c=1 always,
and revert to the original compiler behavior.
MUTEXES
Most of the work required to make concurrent backend
compilation safe has occurred over the past month.
This CL adds a handful of mutexes to get the rest of the way there;
they are the mutexes that I didn't see a clean way to avoid.
Some of them may still be eliminable in future work.
In no particular order:
* gc.funcsymsmu. The global funcsyms slice is populated
lazily when we need function symbols for closures.
This occurs during gc AST to SSA translation.
The function funcsym also does a package lookup,
which is a source of races on types.Pkg.Syms;
funcsymsmu also covers that package lookup.
This mutex is low priority: it adds a single global,
it is in an infrequently used code path, and it is low contention.
Since funcsyms may now be added in any order,
we must sort them to preserve reproducible builds.
* gc.largeStackFramesMu. We don't discover until after SSA compilation
that a function's stack frame is gigantic.
Recording that error happens basically never,
but it does happen concurrently.
Fix with a low priority mutex and sorting.
* obj.Link.hashmu. ctxt.hash stores the mapping from
types.Syms (compiler symbols) to obj.LSyms (linker symbols).
It is accessed fairly heavily through all the phases.
This is the only heavily contended mutex.
* gc.signatlistmu. The global signatlist map is
populated with types through several of the concurrent phases,
including notably via ngotype during DWARF generation.
It is low priority for removal.
* gc.typepkgmu. Looking up symbols in the types package
happens a fair amount during backend compilation
and DWARF generation, particularly via ngotype.
This mutex helps us to avoid a broader mutex on types.Pkg.Syms.
It has low-to-moderate contention.
* types.internedStringsmu. gc AST to SSA conversion and
some SSA work introduce new autotmps.
Those autotmps have their names interned to reduce allocations.
That interning requires protecting types.internedStrings.
The autotmp names are heavily re-used, and the mutex
overhead and contention here are low, so it is probably
a worthwhile performance optimization to keep this mutex.
TESTING
I have been testing this code locally by running
'go install -race cmd/compile'
and then doing
'go build -a -gcflags=-c=128 std cmd'
for all architectures and a variety of compiler flags.
This obviously needs to be made part of the builders,
but it is too expensive to make part of all.bash.
I have filed #19962 for this.
REPRODUCIBLE BUILDS
This version of the compiler generates reproducible builds.
Testing reproducible builds also needs automation, however,
and is also too expensive for all.bash.
This is #19961.
Also of note is that some of the compiler flags used by 'toolstash -cmp'
are currently incompatible with concurrent backend compilation.
They still work fine with c=1.
Time will tell whether this is a problem.
NEXT STEPS
* Continue to find and fix races and bugs,
using a combination of code inspection, fuzzing,
and hopefully some community experimentation.
I do not know of any outstanding races,
but there probably are some.
* Improve testing.
* Improve performance, for many values of c.
* Integrate with cmd/go and fine tune.
* Support concurrent compilation with the -race flag.
It is a sad irony that it does not yet work.
* Minor code cleanup that has been deferred during
the last month due to uncertainty about the
ultimate shape of this CL.
PERFORMANCE
Here's the buried lede, at last. :)
All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop.
First, going from tip to this CL with c=1 has almost no impact.
name old time/op new time/op delta
Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29)
Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30)
GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28)
Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30)
SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29)
Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30)
GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28)
Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29)
Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29)
XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28)
[Geo mean] 332ms 333ms +0.10%
name old user-time/op new user-time/op delta
Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27)
Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29)
GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29)
Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30)
SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29)
Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27)
GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30)
Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30)
Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29)
XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29)
[Geo mean] 415ms 415ms +0.02%
name old obj-bytes new obj-bytes delta
Template 382k ± 0% 382k ± 0% ~ (all equal)
Unicode 203k ± 0% 203k ± 0% ~ (all equal)
GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal)
Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal)
SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal)
Flate 230k ± 0% 230k ± 0% ~ (all equal)
GoParser 287k ± 0% 287k ± 0% ~ (all equal)
Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal)
Tar 190k ± 0% 190k ± 0% ~ (all equal)
XML 416k ± 0% 416k ± 0% ~ (all equal)
[Geo mean] 660k 660k +0.00%
Comparing this CL to itself, from c=1 to c=2
improves real times 20-30%, costs 5-10% more CPU time,
and adds about 2% alloc.
The allocation increase comes from allocating more ssa.Caches.
name old time/op new time/op delta
Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49)
Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48)
GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49)
Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46)
SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49)
Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48)
GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48)
Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49)
Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47)
XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48)
Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50)
GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50)
Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48)
SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47)
Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47)
GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46)
Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48)
Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49)
XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5)
Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5)
XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5)
Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5)
Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5)
SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5)
Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5)
GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5)
Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5)
Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5)
XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5)
From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%:
name old time/op new time/op delta
Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50)
Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47)
GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49)
Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46)
SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49)
Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48)
GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50)
Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49)
Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49)
XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48)
name old user-time/op new user-time/op delta
Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50)
Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50)
GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50)
Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49)
SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50)
Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48)
GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50)
Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48)
Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50)
XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50)
name old alloc/op new alloc/op delta
Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5)
GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5)
Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5)
SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5)
Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5)
GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5)
Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5)
Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5)
XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5)
Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5)
GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5)
Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5)
SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5)
Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5)
GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5)
Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5)
Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5)
XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5)
Going beyond c=4 on my machine tends to increase CPU time and allocs
without impacting real time.
The CPU time numbers matter, because when there are many concurrent
compilation processes, that will impact the overall throughput.
The numbers above are in many ways the best case scenario;
we can take full advantage of all cores.
Fortunately, the most common compilation scenario is incremental
re-compilation of a single package during a build/test cycle.
Updates #15756
Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea
Reviewed-on: https://go-review.googlesource.com/40693
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
// TODO: fix races and enable the following flags
|
|
|
|
|
if Ctxt.Flag_shared || Ctxt.Flag_dynlink || flag_race {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-10-17 17:09:54 -04:00
|
|
|
|
|
|
|
|
// recordFlags records the specified command-line flags to be placed
|
|
|
|
|
// in the DWARF info.
|
|
|
|
|
func recordFlags(flags ...string) {
|
|
|
|
|
if myimportpath == "" {
|
|
|
|
|
// We can't record the flags if we don't know what the
|
|
|
|
|
// package name is.
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type BoolFlag interface {
|
|
|
|
|
IsBoolFlag() bool
|
|
|
|
|
}
|
|
|
|
|
type CountFlag interface {
|
|
|
|
|
IsCountFlag() bool
|
|
|
|
|
}
|
|
|
|
|
var cmd bytes.Buffer
|
|
|
|
|
for _, name := range flags {
|
|
|
|
|
f := flag.Lookup(name)
|
|
|
|
|
if f == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
getter := f.Value.(flag.Getter)
|
|
|
|
|
if getter.String() == f.DefValue {
|
|
|
|
|
// Flag has default value, so omit it.
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if bf, ok := f.Value.(BoolFlag); ok && bf.IsBoolFlag() {
|
|
|
|
|
val, ok := getter.Get().(bool)
|
|
|
|
|
if ok && val {
|
|
|
|
|
fmt.Fprintf(&cmd, " -%s", f.Name)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if cf, ok := f.Value.(CountFlag); ok && cf.IsCountFlag() {
|
|
|
|
|
val, ok := getter.Get().(int)
|
|
|
|
|
if ok && val == 1 {
|
|
|
|
|
fmt.Fprintf(&cmd, " -%s", f.Name)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintf(&cmd, " -%s=%v", f.Name, getter.Get())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if cmd.Len() == 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
s := Ctxt.Lookup(dwarf.CUInfoPrefix + "producer." + myimportpath)
|
2020-05-20 13:51:59 -04:00
|
|
|
s.Type = objabi.SDWARFCUINFO
|
2017-10-18 20:14:29 -04:00
|
|
|
// Sometimes (for example when building tests) we can link
|
|
|
|
|
// together two package main archives. So allow dups.
|
|
|
|
|
s.Set(obj.AttrDuplicateOK, true)
|
2017-10-17 17:09:54 -04:00
|
|
|
Ctxt.Data = append(Ctxt.Data, s)
|
|
|
|
|
s.P = cmd.Bytes()[1:]
|
|
|
|
|
}
|
2018-10-24 15:49:32 -07:00
|
|
|
|
2019-02-25 13:56:18 +01:00
|
|
|
// recordPackageName records the name of the package being
|
|
|
|
|
// compiled, so that the linker can save it in the compile unit's DIE.
|
|
|
|
|
func recordPackageName() {
|
|
|
|
|
s := Ctxt.Lookup(dwarf.CUInfoPrefix + "packagename." + myimportpath)
|
2020-05-20 13:51:59 -04:00
|
|
|
s.Type = objabi.SDWARFCUINFO
|
2019-02-25 13:56:18 +01:00
|
|
|
// Sometimes (for example when building tests) we can link
|
|
|
|
|
// together two package main archives. So allow dups.
|
|
|
|
|
s.Set(obj.AttrDuplicateOK, true)
|
|
|
|
|
Ctxt.Data = append(Ctxt.Data, s)
|
|
|
|
|
s.P = []byte(localpkg.Name)
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-24 15:49:32 -07:00
|
|
|
// flag_lang is the language version we are compiling for, set by the -lang flag.
|
|
|
|
|
var flag_lang string
|
|
|
|
|
|
2018-11-13 15:36:02 -08:00
|
|
|
// currentLang returns the current language version.
|
|
|
|
|
func currentLang() string {
|
2019-04-19 16:09:17 +00:00
|
|
|
return fmt.Sprintf("go1.%d", goversion.Version)
|
2018-10-24 15:49:32 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// goVersionRE is a regular expression that matches the valid
|
|
|
|
|
// arguments to the -lang flag.
|
|
|
|
|
var goVersionRE = regexp.MustCompile(`^go([1-9][0-9]*)\.(0|[1-9][0-9]*)$`)
|
|
|
|
|
|
|
|
|
|
// A lang is a language version broken into major and minor numbers.
|
|
|
|
|
type lang struct {
|
|
|
|
|
major, minor int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// langWant is the desired language version set by the -lang flag.
|
2018-11-13 15:36:02 -08:00
|
|
|
// If the -lang flag is not set, this is the zero value, meaning that
|
|
|
|
|
// any language version is supported.
|
2018-10-24 15:49:32 -07:00
|
|
|
var langWant lang
|
|
|
|
|
|
2019-11-07 15:54:59 -08:00
|
|
|
// langSupported reports whether language version major.minor is
|
|
|
|
|
// supported in a particular package.
|
|
|
|
|
func langSupported(major, minor int, pkg *types.Pkg) bool {
|
|
|
|
|
if pkg == nil {
|
|
|
|
|
// TODO(mdempsky): Set Pkg for local types earlier.
|
|
|
|
|
pkg = localpkg
|
|
|
|
|
}
|
|
|
|
|
if pkg != localpkg {
|
|
|
|
|
// Assume imported packages passed type-checking.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-13 15:36:02 -08:00
|
|
|
if langWant.major == 0 && langWant.minor == 0 {
|
|
|
|
|
return true
|
|
|
|
|
}
|
2018-10-24 15:49:32 -07:00
|
|
|
return langWant.major > major || (langWant.major == major && langWant.minor >= minor)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// checkLang verifies that the -lang flag holds a valid value, and
|
|
|
|
|
// exits if not. It initializes data used by langSupported.
|
|
|
|
|
func checkLang() {
|
2018-11-13 15:36:02 -08:00
|
|
|
if flag_lang == "" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-24 15:49:32 -07:00
|
|
|
var err error
|
|
|
|
|
langWant, err = parseLang(flag_lang)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("invalid value %q for -lang: %v", flag_lang, err)
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-13 15:36:02 -08:00
|
|
|
if def := currentLang(); flag_lang != def {
|
2018-10-24 15:49:32 -07:00
|
|
|
defVers, err := parseLang(def)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("internal error parsing default lang %q: %v", def, err)
|
|
|
|
|
}
|
2018-11-12 10:38:02 -08:00
|
|
|
if langWant.major > defVers.major || (langWant.major == defVers.major && langWant.minor > defVers.minor) {
|
2018-10-24 15:49:32 -07:00
|
|
|
log.Fatalf("invalid value %q for -lang: max known version is %q", flag_lang, def)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// parseLang parses a -lang option into a langVer.
|
|
|
|
|
func parseLang(s string) (lang, error) {
|
|
|
|
|
matches := goVersionRE.FindStringSubmatch(s)
|
|
|
|
|
if matches == nil {
|
|
|
|
|
return lang{}, fmt.Errorf(`should be something like "go1.12"`)
|
|
|
|
|
}
|
|
|
|
|
major, err := strconv.Atoi(matches[1])
|
|
|
|
|
if err != nil {
|
|
|
|
|
return lang{}, err
|
|
|
|
|
}
|
|
|
|
|
minor, err := strconv.Atoi(matches[2])
|
|
|
|
|
if err != nil {
|
|
|
|
|
return lang{}, err
|
|
|
|
|
}
|
|
|
|
|
return lang{major: major, minor: minor}, nil
|
|
|
|
|
}
|