go/src/cmd/internal/obj/sym.go

315 lines
8.5 KiB
Go
Raw Normal View History

// Derived from Inferno utils/6l/obj.c and utils/6l/span.c
// https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/obj.c
// https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
package obj
import (
"cmd/internal/goobj2"
"cmd/internal/objabi"
"fmt"
"log"
"math"
"sort"
)
func Linknew(arch *LinkArch) *Link {
ctxt := new(Link)
cmd/internal/obj: split Link.hash into version 0 and 1 Though LSym.Version is an int, it can only have the value 0 or 1. Using that, split Link.hash into two maps, one for version 0 (which is far more common) and one for version 1. This lets use just the name for lookups, which is both faster and more compact. This matters because Link.hash map lookups are frequent, and will be contended once the backend is concurrent. name old time/op new time/op delta Template 194ms ± 3% 192ms ± 5% -1.46% (p=0.000 n=47+49) Unicode 84.5ms ± 3% 83.8ms ± 3% -0.81% (p=0.011 n=50+49) GoTypes 543ms ± 2% 545ms ± 4% ~ (p=0.566 n=46+49) Compiler 2.48s ± 2% 2.48s ± 3% ~ (p=0.706 n=47+50) SSA 5.94s ± 3% 5.98s ± 2% +0.55% (p=0.040 n=49+50) Flate 119ms ± 6% 119ms ± 4% ~ (p=0.681 n=48+47) GoParser 145ms ± 4% 145ms ± 3% ~ (p=0.662 n=47+49) Reflect 348ms ± 3% 344ms ± 3% -1.17% (p=0.000 n=47+47) Tar 105ms ± 4% 104ms ± 3% ~ (p=0.155 n=50+47) XML 197ms ± 2% 197ms ± 3% ~ (p=0.666 n=49+49) [Geo mean] 332ms 331ms -0.37% name old user-time/op new user-time/op delta Template 230ms ±10% 226ms ±10% -1.85% (p=0.041 n=50+50) Unicode 104ms ± 6% 103ms ± 5% ~ (p=0.076 n=49+49) GoTypes 707ms ± 4% 705ms ± 5% ~ (p=0.521 n=50+50) Compiler 3.30s ± 3% 3.33s ± 4% +0.76% (p=0.003 n=50+49) SSA 8.17s ± 4% 8.23s ± 3% +0.66% (p=0.030 n=50+49) Flate 139ms ± 6% 138ms ± 8% ~ (p=0.184 n=49+48) GoParser 174ms ± 5% 172ms ± 6% ~ (p=0.107 n=48+49) Reflect 431ms ± 8% 420ms ± 5% -2.57% (p=0.000 n=50+46) Tar 119ms ± 6% 118ms ± 7% -0.95% (p=0.033 n=50+49) XML 236ms ± 4% 236ms ± 4% ~ (p=0.935 n=50+48) [Geo mean] 410ms 407ms -0.67% name old alloc/op new alloc/op delta Template 38.7MB ± 0% 38.6MB ± 0% -0.29% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 29.7MB ± 0% -0.24% (p=0.008 n=5+5) GoTypes 113MB ± 0% 113MB ± 0% -0.29% (p=0.008 n=5+5) Compiler 462MB ± 0% 462MB ± 0% -0.12% (p=0.008 n=5+5) SSA 1.27GB ± 0% 1.27GB ± 0% -0.05% (p=0.008 n=5+5) Flate 25.2MB ± 0% 25.1MB ± 0% -0.37% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 31.6MB ± 0% ~ (p=0.056 n=5+5) Reflect 77.5MB ± 0% 77.2MB ± 0% -0.38% (p=0.008 n=5+5) Tar 26.4MB ± 0% 26.3MB ± 0% ~ (p=0.151 n=5+5) XML 41.9MB ± 0% 41.9MB ± 0% -0.20% (p=0.032 n=5+5) [Geo mean] 74.5MB 74.3MB -0.23% name old allocs/op new allocs/op delta Template 378k ± 1% 377k ± 1% ~ (p=0.690 n=5+5) Unicode 321k ± 0% 322k ± 0% ~ (p=0.595 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.310 n=5+5) Compiler 4.25M ± 0% 4.25M ± 0% ~ (p=0.151 n=5+5) SSA 9.84M ± 0% 9.84M ± 0% ~ (p=0.841 n=5+5) Flate 232k ± 1% 232k ± 0% ~ (p=0.690 n=5+5) GoParser 315k ± 1% 315k ± 1% ~ (p=0.841 n=5+5) Reflect 970k ± 0% 970k ± 0% ~ (p=0.841 n=5+5) Tar 248k ± 0% 248k ± 1% ~ (p=0.841 n=5+5) XML 389k ± 0% 389k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 724k 724k +0.01% Updates #15756 Change-Id: I2646332e89f0444ca9d5a41d7172537d904ed636 Reviewed-on: https://go-review.googlesource.com/41050 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-17 13:52:01 -07:00
ctxt.hash = make(map[string]*LSym)
cmd/compile: separate data and function LSyms Currently, obj.Ctxt's symbol table does not distinguish between ABI0 and ABIInternal symbols. This is *almost* okay, since a given symbol name in the final object file is only going to belong to one ABI or the other, but it requires that the compiler mark a Sym as being a function symbol before it retrieves its LSym. If it retrieves the LSym first, that LSym will be created as ABI0, and later marking the Sym as a function symbol won't change the LSym's ABI. Marking a Sym as a function symbol before looking up its LSym sounds easy, except Syms have a dual purpose: they are used just as interned strings (every function, variable, parameter, etc with the same textual name shares a Sym), and *also* to store state for whatever package global has that name. As a result, it's easy to slip up and look up an LSym when a Sym is serving as the name of a local variable, and then later mark it as a function when it's serving as the global with the name. In general, we were careful to avoid this, but #29610 demonstrates one case where we messed up. Because of on-demand importing from indexed export data, it's possible to compile a method wrapper for a type imported from another package before importing an init function from that package. If the argument of the method is named "init", the "init" LSym will be created as a data symbol when compiling the wrapper, before it gets marked as a function symbol. To fix this, we separate obj.Ctxt's symbol tables for ABI0 and ABIInternal symbols. This way, the compiler will simply get a different LSym once the Sym takes on its package-global meaning as a function. This fixes the above ordering issue, and means we no longer need to go out of our way to create the "init" function early and mark it as a function symbol. Fixes #29610. Updates #27539. Change-Id: Id9458b40017893d46ef9e4a3f9b47fc49e1ce8df Reviewed-on: https://go-review.googlesource.com/c/157017 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2019-01-08 22:23:52 -05:00
ctxt.funchash = make(map[string]*LSym)
ctxt.statichash = make(map[string]*LSym)
ctxt.Arch = arch
ctxt.Pathname = objabi.WorkingDir()
if err := ctxt.Headtype.Set(objabi.GOOS); err != nil {
log.Fatalf("unknown goos %s", objabi.GOOS)
}
ctxt.Flag_optimize = true
ctxt.Framepointer_enabled = objabi.Framepointer_enabled(objabi.GOOS, arch.Name)
return ctxt
}
// LookupDerived looks up or creates the symbol with name name derived from symbol s.
// The resulting symbol will be static iff s is.
func (ctxt *Link) LookupDerived(s *LSym, name string) *LSym {
if s.Static() {
return ctxt.LookupStatic(name)
}
return ctxt.Lookup(name)
}
// LookupStatic looks up the static symbol with name name.
// If it does not exist, it creates it.
func (ctxt *Link) LookupStatic(name string) *LSym {
s := ctxt.statichash[name]
if s == nil {
s = &LSym{Name: name, Attribute: AttrStatic}
ctxt.statichash[name] = s
}
return s
}
cmd/compile: separate data and function LSyms Currently, obj.Ctxt's symbol table does not distinguish between ABI0 and ABIInternal symbols. This is *almost* okay, since a given symbol name in the final object file is only going to belong to one ABI or the other, but it requires that the compiler mark a Sym as being a function symbol before it retrieves its LSym. If it retrieves the LSym first, that LSym will be created as ABI0, and later marking the Sym as a function symbol won't change the LSym's ABI. Marking a Sym as a function symbol before looking up its LSym sounds easy, except Syms have a dual purpose: they are used just as interned strings (every function, variable, parameter, etc with the same textual name shares a Sym), and *also* to store state for whatever package global has that name. As a result, it's easy to slip up and look up an LSym when a Sym is serving as the name of a local variable, and then later mark it as a function when it's serving as the global with the name. In general, we were careful to avoid this, but #29610 demonstrates one case where we messed up. Because of on-demand importing from indexed export data, it's possible to compile a method wrapper for a type imported from another package before importing an init function from that package. If the argument of the method is named "init", the "init" LSym will be created as a data symbol when compiling the wrapper, before it gets marked as a function symbol. To fix this, we separate obj.Ctxt's symbol tables for ABI0 and ABIInternal symbols. This way, the compiler will simply get a different LSym once the Sym takes on its package-global meaning as a function. This fixes the above ordering issue, and means we no longer need to go out of our way to create the "init" function early and mark it as a function symbol. Fixes #29610. Updates #27539. Change-Id: Id9458b40017893d46ef9e4a3f9b47fc49e1ce8df Reviewed-on: https://go-review.googlesource.com/c/157017 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2019-01-08 22:23:52 -05:00
// LookupABI looks up a symbol with the given ABI.
// If it does not exist, it creates it.
func (ctxt *Link) LookupABI(name string, abi ABI) *LSym {
return ctxt.LookupABIInit(name, abi, nil)
}
// LookupABI looks up a symbol with the given ABI.
// If it does not exist, it creates it and
// passes it to init for one-time initialization.
func (ctxt *Link) LookupABIInit(name string, abi ABI, init func(s *LSym)) *LSym {
cmd/compile: separate data and function LSyms Currently, obj.Ctxt's symbol table does not distinguish between ABI0 and ABIInternal symbols. This is *almost* okay, since a given symbol name in the final object file is only going to belong to one ABI or the other, but it requires that the compiler mark a Sym as being a function symbol before it retrieves its LSym. If it retrieves the LSym first, that LSym will be created as ABI0, and later marking the Sym as a function symbol won't change the LSym's ABI. Marking a Sym as a function symbol before looking up its LSym sounds easy, except Syms have a dual purpose: they are used just as interned strings (every function, variable, parameter, etc with the same textual name shares a Sym), and *also* to store state for whatever package global has that name. As a result, it's easy to slip up and look up an LSym when a Sym is serving as the name of a local variable, and then later mark it as a function when it's serving as the global with the name. In general, we were careful to avoid this, but #29610 demonstrates one case where we messed up. Because of on-demand importing from indexed export data, it's possible to compile a method wrapper for a type imported from another package before importing an init function from that package. If the argument of the method is named "init", the "init" LSym will be created as a data symbol when compiling the wrapper, before it gets marked as a function symbol. To fix this, we separate obj.Ctxt's symbol tables for ABI0 and ABIInternal symbols. This way, the compiler will simply get a different LSym once the Sym takes on its package-global meaning as a function. This fixes the above ordering issue, and means we no longer need to go out of our way to create the "init" function early and mark it as a function symbol. Fixes #29610. Updates #27539. Change-Id: Id9458b40017893d46ef9e4a3f9b47fc49e1ce8df Reviewed-on: https://go-review.googlesource.com/c/157017 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2019-01-08 22:23:52 -05:00
var hash map[string]*LSym
switch abi {
case ABI0:
hash = ctxt.hash
case ABIInternal:
hash = ctxt.funchash
default:
panic("unknown ABI")
}
ctxt.hashmu.Lock()
s := hash[name]
if s == nil {
s = &LSym{Name: name}
s.SetABI(abi)
hash[name] = s
if init != nil {
init(s)
}
cmd/compile: separate data and function LSyms Currently, obj.Ctxt's symbol table does not distinguish between ABI0 and ABIInternal symbols. This is *almost* okay, since a given symbol name in the final object file is only going to belong to one ABI or the other, but it requires that the compiler mark a Sym as being a function symbol before it retrieves its LSym. If it retrieves the LSym first, that LSym will be created as ABI0, and later marking the Sym as a function symbol won't change the LSym's ABI. Marking a Sym as a function symbol before looking up its LSym sounds easy, except Syms have a dual purpose: they are used just as interned strings (every function, variable, parameter, etc with the same textual name shares a Sym), and *also* to store state for whatever package global has that name. As a result, it's easy to slip up and look up an LSym when a Sym is serving as the name of a local variable, and then later mark it as a function when it's serving as the global with the name. In general, we were careful to avoid this, but #29610 demonstrates one case where we messed up. Because of on-demand importing from indexed export data, it's possible to compile a method wrapper for a type imported from another package before importing an init function from that package. If the argument of the method is named "init", the "init" LSym will be created as a data symbol when compiling the wrapper, before it gets marked as a function symbol. To fix this, we separate obj.Ctxt's symbol tables for ABI0 and ABIInternal symbols. This way, the compiler will simply get a different LSym once the Sym takes on its package-global meaning as a function. This fixes the above ordering issue, and means we no longer need to go out of our way to create the "init" function early and mark it as a function symbol. Fixes #29610. Updates #27539. Change-Id: Id9458b40017893d46ef9e4a3f9b47fc49e1ce8df Reviewed-on: https://go-review.googlesource.com/c/157017 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
2019-01-08 22:23:52 -05:00
}
ctxt.hashmu.Unlock()
return s
}
// Lookup looks up the symbol with name name.
// If it does not exist, it creates it.
func (ctxt *Link) Lookup(name string) *LSym {
return ctxt.LookupInit(name, nil)
}
// LookupInit looks up the symbol with name name.
// If it does not exist, it creates it and
// passes it to init for one-time initialization.
func (ctxt *Link) LookupInit(name string, init func(s *LSym)) *LSym {
cmd/compile: add initial backend concurrency support This CL adds initial support for concurrent backend compilation. BACKGROUND The compiler currently consists (very roughly) of the following phases: 1. Initialization. 2. Lexing and parsing into the cmd/compile/internal/syntax AST. 3. Translation into the cmd/compile/internal/gc AST. 4. Some gc AST passes: typechecking, escape analysis, inlining, closure handling, expression evaluation ordering (order.go), and some lowering and optimization (walk.go). 5. Translation into the cmd/compile/internal/ssa SSA form. 6. Optimization and lowering of SSA form. 7. Translation from SSA form to assembler instructions. 8. Translation from assembler instructions to machine code. 9. Writing lots of output: machine code, DWARF symbols, type and reflection info, export data. Phase 2 was already concurrent as of Go 1.8. Phase 3 is planned for eventual removal; we hope to go straight from syntax AST to SSA. Phases 5–8 are per-function; this CL adds support for processing multiple functions concurrently. The slowest phases in the compiler are 5 and 6, so this offers the opportunity for some good speed-ups. Unfortunately, it's not quite that straightforward. In the current compiler, the latter parts of phase 4 (order, walk) are done function-at-a-time as needed. Making order and walk concurrency-safe proved hard, and they're not particularly slow, so there wasn't much reward. To enable phases 5–8 to be done concurrently, when concurrent backend compilation is requested, we complete phase 4 for all functions before starting later phases for any functions. Also, in reality, we automatically generate new functions in phase 9, such as method wrappers and equality and has routines. Those new functions then go through phases 4–8. This CL disables concurrent backend compilation after the first, big, user-provided batch of functions has been compiled. This is done to keep things simple, and because the autogenerated functions tend to be small, few, simple, and fast to compile. USAGE Concurrent backend compilation still defaults to off. To set the number of functions that may be backend-compiled concurrently, use the compiler flag -c. In future work, cmd/go will automatically set -c. Furthermore, this CL has been intentionally written so that the c=1 path has no backend concurrency whatsoever, not even spawning any goroutines. This helps ensure that, should problems arise late in the development cycle, we can simply have cmd/go set c=1 always, and revert to the original compiler behavior. MUTEXES Most of the work required to make concurrent backend compilation safe has occurred over the past month. This CL adds a handful of mutexes to get the rest of the way there; they are the mutexes that I didn't see a clean way to avoid. Some of them may still be eliminable in future work. In no particular order: * gc.funcsymsmu. The global funcsyms slice is populated lazily when we need function symbols for closures. This occurs during gc AST to SSA translation. The function funcsym also does a package lookup, which is a source of races on types.Pkg.Syms; funcsymsmu also covers that package lookup. This mutex is low priority: it adds a single global, it is in an infrequently used code path, and it is low contention. Since funcsyms may now be added in any order, we must sort them to preserve reproducible builds. * gc.largeStackFramesMu. We don't discover until after SSA compilation that a function's stack frame is gigantic. Recording that error happens basically never, but it does happen concurrently. Fix with a low priority mutex and sorting. * obj.Link.hashmu. ctxt.hash stores the mapping from types.Syms (compiler symbols) to obj.LSyms (linker symbols). It is accessed fairly heavily through all the phases. This is the only heavily contended mutex. * gc.signatlistmu. The global signatlist map is populated with types through several of the concurrent phases, including notably via ngotype during DWARF generation. It is low priority for removal. * gc.typepkgmu. Looking up symbols in the types package happens a fair amount during backend compilation and DWARF generation, particularly via ngotype. This mutex helps us to avoid a broader mutex on types.Pkg.Syms. It has low-to-moderate contention. * types.internedStringsmu. gc AST to SSA conversion and some SSA work introduce new autotmps. Those autotmps have their names interned to reduce allocations. That interning requires protecting types.internedStrings. The autotmp names are heavily re-used, and the mutex overhead and contention here are low, so it is probably a worthwhile performance optimization to keep this mutex. TESTING I have been testing this code locally by running 'go install -race cmd/compile' and then doing 'go build -a -gcflags=-c=128 std cmd' for all architectures and a variety of compiler flags. This obviously needs to be made part of the builders, but it is too expensive to make part of all.bash. I have filed #19962 for this. REPRODUCIBLE BUILDS This version of the compiler generates reproducible builds. Testing reproducible builds also needs automation, however, and is also too expensive for all.bash. This is #19961. Also of note is that some of the compiler flags used by 'toolstash -cmp' are currently incompatible with concurrent backend compilation. They still work fine with c=1. Time will tell whether this is a problem. NEXT STEPS * Continue to find and fix races and bugs, using a combination of code inspection, fuzzing, and hopefully some community experimentation. I do not know of any outstanding races, but there probably are some. * Improve testing. * Improve performance, for many values of c. * Integrate with cmd/go and fine tune. * Support concurrent compilation with the -race flag. It is a sad irony that it does not yet work. * Minor code cleanup that has been deferred during the last month due to uncertainty about the ultimate shape of this CL. PERFORMANCE Here's the buried lede, at last. :) All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop. First, going from tip to this CL with c=1 has almost no impact. name old time/op new time/op delta Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29) Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30) GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28) Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30) SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29) Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30) GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28) Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29) Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29) XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28) [Geo mean] 332ms 333ms +0.10% name old user-time/op new user-time/op delta Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27) Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29) GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29) Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30) SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29) Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27) GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30) Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30) Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29) XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29) [Geo mean] 415ms 415ms +0.02% name old obj-bytes new obj-bytes delta Template 382k ± 0% 382k ± 0% ~ (all equal) Unicode 203k ± 0% 203k ± 0% ~ (all equal) GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal) Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal) SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal) Flate 230k ± 0% 230k ± 0% ~ (all equal) GoParser 287k ± 0% 287k ± 0% ~ (all equal) Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal) Tar 190k ± 0% 190k ± 0% ~ (all equal) XML 416k ± 0% 416k ± 0% ~ (all equal) [Geo mean] 660k 660k +0.00% Comparing this CL to itself, from c=1 to c=2 improves real times 20-30%, costs 5-10% more CPU time, and adds about 2% alloc. The allocation increase comes from allocating more ssa.Caches. name old time/op new time/op delta Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49) Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48) GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49) Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46) SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49) Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48) GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48) Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49) Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47) XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45) name old user-time/op new user-time/op delta Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48) Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50) GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50) Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48) SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47) Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47) GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46) Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48) Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49) XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50) name old alloc/op new alloc/op delta Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5) GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5) Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5) Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5) Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5) XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5) Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5) Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5) SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5) Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5) GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5) Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5) Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5) XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5) From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%: name old time/op new time/op delta Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50) Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47) GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49) Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46) SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49) Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48) GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50) Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49) Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49) XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48) name old user-time/op new user-time/op delta Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50) Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50) GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50) Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49) SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50) Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48) GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50) Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48) Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50) XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50) name old alloc/op new alloc/op delta Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5) GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5) Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5) Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5) Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5) XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5) Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5) Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5) SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5) Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5) GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5) Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5) Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5) XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5) Going beyond c=4 on my machine tends to increase CPU time and allocs without impacting real time. The CPU time numbers matter, because when there are many concurrent compilation processes, that will impact the overall throughput. The numbers above are in many ways the best case scenario; we can take full advantage of all cores. Fortunately, the most common compilation scenario is incremental re-compilation of a single package during a build/test cycle. Updates #15756 Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea Reviewed-on: https://go-review.googlesource.com/40693 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Robert Griesemer <gri@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
ctxt.hashmu.Lock()
s := ctxt.hash[name]
if s == nil {
s = &LSym{Name: name}
ctxt.hash[name] = s
if init != nil {
init(s)
}
}
cmd/compile: add initial backend concurrency support This CL adds initial support for concurrent backend compilation. BACKGROUND The compiler currently consists (very roughly) of the following phases: 1. Initialization. 2. Lexing and parsing into the cmd/compile/internal/syntax AST. 3. Translation into the cmd/compile/internal/gc AST. 4. Some gc AST passes: typechecking, escape analysis, inlining, closure handling, expression evaluation ordering (order.go), and some lowering and optimization (walk.go). 5. Translation into the cmd/compile/internal/ssa SSA form. 6. Optimization and lowering of SSA form. 7. Translation from SSA form to assembler instructions. 8. Translation from assembler instructions to machine code. 9. Writing lots of output: machine code, DWARF symbols, type and reflection info, export data. Phase 2 was already concurrent as of Go 1.8. Phase 3 is planned for eventual removal; we hope to go straight from syntax AST to SSA. Phases 5–8 are per-function; this CL adds support for processing multiple functions concurrently. The slowest phases in the compiler are 5 and 6, so this offers the opportunity for some good speed-ups. Unfortunately, it's not quite that straightforward. In the current compiler, the latter parts of phase 4 (order, walk) are done function-at-a-time as needed. Making order and walk concurrency-safe proved hard, and they're not particularly slow, so there wasn't much reward. To enable phases 5–8 to be done concurrently, when concurrent backend compilation is requested, we complete phase 4 for all functions before starting later phases for any functions. Also, in reality, we automatically generate new functions in phase 9, such as method wrappers and equality and has routines. Those new functions then go through phases 4–8. This CL disables concurrent backend compilation after the first, big, user-provided batch of functions has been compiled. This is done to keep things simple, and because the autogenerated functions tend to be small, few, simple, and fast to compile. USAGE Concurrent backend compilation still defaults to off. To set the number of functions that may be backend-compiled concurrently, use the compiler flag -c. In future work, cmd/go will automatically set -c. Furthermore, this CL has been intentionally written so that the c=1 path has no backend concurrency whatsoever, not even spawning any goroutines. This helps ensure that, should problems arise late in the development cycle, we can simply have cmd/go set c=1 always, and revert to the original compiler behavior. MUTEXES Most of the work required to make concurrent backend compilation safe has occurred over the past month. This CL adds a handful of mutexes to get the rest of the way there; they are the mutexes that I didn't see a clean way to avoid. Some of them may still be eliminable in future work. In no particular order: * gc.funcsymsmu. The global funcsyms slice is populated lazily when we need function symbols for closures. This occurs during gc AST to SSA translation. The function funcsym also does a package lookup, which is a source of races on types.Pkg.Syms; funcsymsmu also covers that package lookup. This mutex is low priority: it adds a single global, it is in an infrequently used code path, and it is low contention. Since funcsyms may now be added in any order, we must sort them to preserve reproducible builds. * gc.largeStackFramesMu. We don't discover until after SSA compilation that a function's stack frame is gigantic. Recording that error happens basically never, but it does happen concurrently. Fix with a low priority mutex and sorting. * obj.Link.hashmu. ctxt.hash stores the mapping from types.Syms (compiler symbols) to obj.LSyms (linker symbols). It is accessed fairly heavily through all the phases. This is the only heavily contended mutex. * gc.signatlistmu. The global signatlist map is populated with types through several of the concurrent phases, including notably via ngotype during DWARF generation. It is low priority for removal. * gc.typepkgmu. Looking up symbols in the types package happens a fair amount during backend compilation and DWARF generation, particularly via ngotype. This mutex helps us to avoid a broader mutex on types.Pkg.Syms. It has low-to-moderate contention. * types.internedStringsmu. gc AST to SSA conversion and some SSA work introduce new autotmps. Those autotmps have their names interned to reduce allocations. That interning requires protecting types.internedStrings. The autotmp names are heavily re-used, and the mutex overhead and contention here are low, so it is probably a worthwhile performance optimization to keep this mutex. TESTING I have been testing this code locally by running 'go install -race cmd/compile' and then doing 'go build -a -gcflags=-c=128 std cmd' for all architectures and a variety of compiler flags. This obviously needs to be made part of the builders, but it is too expensive to make part of all.bash. I have filed #19962 for this. REPRODUCIBLE BUILDS This version of the compiler generates reproducible builds. Testing reproducible builds also needs automation, however, and is also too expensive for all.bash. This is #19961. Also of note is that some of the compiler flags used by 'toolstash -cmp' are currently incompatible with concurrent backend compilation. They still work fine with c=1. Time will tell whether this is a problem. NEXT STEPS * Continue to find and fix races and bugs, using a combination of code inspection, fuzzing, and hopefully some community experimentation. I do not know of any outstanding races, but there probably are some. * Improve testing. * Improve performance, for many values of c. * Integrate with cmd/go and fine tune. * Support concurrent compilation with the -race flag. It is a sad irony that it does not yet work. * Minor code cleanup that has been deferred during the last month due to uncertainty about the ultimate shape of this CL. PERFORMANCE Here's the buried lede, at last. :) All benchmarks are from my 8 core 2.9 GHz Intel Core i7 darwin/amd64 laptop. First, going from tip to this CL with c=1 has almost no impact. name old time/op new time/op delta Template 195ms ± 3% 194ms ± 5% ~ (p=0.370 n=30+29) Unicode 86.6ms ± 3% 87.0ms ± 7% ~ (p=0.958 n=29+30) GoTypes 548ms ± 3% 555ms ± 4% +1.35% (p=0.001 n=30+28) Compiler 2.51s ± 2% 2.54s ± 2% +1.17% (p=0.000 n=28+30) SSA 5.16s ± 3% 5.16s ± 2% ~ (p=0.910 n=30+29) Flate 124ms ± 5% 124ms ± 4% ~ (p=0.947 n=30+30) GoParser 146ms ± 3% 146ms ± 3% ~ (p=0.150 n=29+28) Reflect 354ms ± 3% 352ms ± 4% ~ (p=0.096 n=29+29) Tar 107ms ± 5% 106ms ± 3% ~ (p=0.370 n=30+29) XML 200ms ± 4% 201ms ± 4% ~ (p=0.313 n=29+28) [Geo mean] 332ms 333ms +0.10% name old user-time/op new user-time/op delta Template 227ms ± 5% 225ms ± 5% ~ (p=0.457 n=28+27) Unicode 109ms ± 4% 109ms ± 5% ~ (p=0.758 n=29+29) GoTypes 713ms ± 4% 721ms ± 5% ~ (p=0.051 n=30+29) Compiler 3.36s ± 2% 3.38s ± 3% ~ (p=0.146 n=30+30) SSA 7.46s ± 3% 7.47s ± 3% ~ (p=0.804 n=30+29) Flate 146ms ± 7% 147ms ± 3% ~ (p=0.833 n=29+27) GoParser 179ms ± 5% 179ms ± 5% ~ (p=0.866 n=30+30) Reflect 431ms ± 4% 429ms ± 4% ~ (p=0.593 n=29+30) Tar 124ms ± 5% 123ms ± 5% ~ (p=0.140 n=29+29) XML 243ms ± 4% 242ms ± 7% ~ (p=0.404 n=29+29) [Geo mean] 415ms 415ms +0.02% name old obj-bytes new obj-bytes delta Template 382k ± 0% 382k ± 0% ~ (all equal) Unicode 203k ± 0% 203k ± 0% ~ (all equal) GoTypes 1.18M ± 0% 1.18M ± 0% ~ (all equal) Compiler 3.98M ± 0% 3.98M ± 0% ~ (all equal) SSA 8.28M ± 0% 8.28M ± 0% ~ (all equal) Flate 230k ± 0% 230k ± 0% ~ (all equal) GoParser 287k ± 0% 287k ± 0% ~ (all equal) Reflect 1.00M ± 0% 1.00M ± 0% ~ (all equal) Tar 190k ± 0% 190k ± 0% ~ (all equal) XML 416k ± 0% 416k ± 0% ~ (all equal) [Geo mean] 660k 660k +0.00% Comparing this CL to itself, from c=1 to c=2 improves real times 20-30%, costs 5-10% more CPU time, and adds about 2% alloc. The allocation increase comes from allocating more ssa.Caches. name old time/op new time/op delta Template 202ms ± 3% 149ms ± 3% -26.15% (p=0.000 n=49+49) Unicode 87.4ms ± 4% 84.2ms ± 3% -3.68% (p=0.000 n=48+48) GoTypes 560ms ± 2% 398ms ± 2% -28.96% (p=0.000 n=49+49) Compiler 2.46s ± 3% 1.76s ± 2% -28.61% (p=0.000 n=48+46) SSA 6.17s ± 2% 4.04s ± 1% -34.52% (p=0.000 n=49+49) Flate 126ms ± 3% 92ms ± 2% -26.81% (p=0.000 n=49+48) GoParser 148ms ± 4% 107ms ± 2% -27.78% (p=0.000 n=49+48) Reflect 361ms ± 3% 281ms ± 3% -22.10% (p=0.000 n=49+49) Tar 109ms ± 4% 86ms ± 3% -20.81% (p=0.000 n=49+47) XML 204ms ± 3% 144ms ± 2% -29.53% (p=0.000 n=48+45) name old user-time/op new user-time/op delta Template 246ms ± 9% 246ms ± 4% ~ (p=0.401 n=50+48) Unicode 109ms ± 4% 111ms ± 4% +1.47% (p=0.000 n=44+50) GoTypes 728ms ± 3% 765ms ± 3% +5.04% (p=0.000 n=46+50) Compiler 3.33s ± 3% 3.41s ± 2% +2.31% (p=0.000 n=49+48) SSA 8.52s ± 2% 9.11s ± 2% +6.93% (p=0.000 n=49+47) Flate 149ms ± 4% 161ms ± 3% +8.13% (p=0.000 n=50+47) GoParser 181ms ± 5% 192ms ± 2% +6.40% (p=0.000 n=49+46) Reflect 452ms ± 9% 474ms ± 2% +4.99% (p=0.000 n=50+48) Tar 126ms ± 6% 136ms ± 4% +7.95% (p=0.000 n=50+49) XML 247ms ± 5% 264ms ± 3% +6.94% (p=0.000 n=48+50) name old alloc/op new alloc/op delta Template 38.8MB ± 0% 39.3MB ± 0% +1.48% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 30.2MB ± 0% +1.19% (p=0.008 n=5+5) GoTypes 113MB ± 0% 114MB ± 0% +0.69% (p=0.008 n=5+5) Compiler 443MB ± 0% 447MB ± 0% +0.95% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.26GB ± 0% +0.89% (p=0.008 n=5+5) Flate 25.3MB ± 0% 25.9MB ± 1% +2.35% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 32.2MB ± 0% +1.59% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 78.9MB ± 0% +0.91% (p=0.008 n=5+5) Tar 26.6MB ± 0% 27.0MB ± 0% +1.80% (p=0.008 n=5+5) XML 42.4MB ± 0% 43.4MB ± 0% +2.35% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 379k ± 0% 378k ± 0% ~ (p=0.421 n=5+5) Unicode 322k ± 0% 321k ± 0% ~ (p=0.222 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.548 n=5+5) Compiler 4.12M ± 0% 4.11M ± 0% -0.14% (p=0.032 n=5+5) SSA 9.72M ± 0% 9.72M ± 0% ~ (p=0.421 n=5+5) Flate 234k ± 1% 234k ± 0% ~ (p=0.421 n=5+5) GoParser 316k ± 1% 315k ± 0% ~ (p=0.222 n=5+5) Reflect 980k ± 0% 979k ± 0% ~ (p=0.095 n=5+5) Tar 249k ± 1% 249k ± 1% ~ (p=0.841 n=5+5) XML 392k ± 0% 391k ± 0% ~ (p=0.095 n=5+5) From c=1 to c=4, real time is down ~40%, CPU usage up 10-20%, alloc up ~5%: name old time/op new time/op delta Template 203ms ± 3% 131ms ± 5% -35.45% (p=0.000 n=50+50) Unicode 87.2ms ± 4% 84.1ms ± 2% -3.61% (p=0.000 n=48+47) GoTypes 560ms ± 4% 310ms ± 2% -44.65% (p=0.000 n=50+49) Compiler 2.47s ± 3% 1.41s ± 2% -43.10% (p=0.000 n=50+46) SSA 6.17s ± 2% 3.20s ± 2% -48.06% (p=0.000 n=49+49) Flate 126ms ± 4% 74ms ± 2% -41.06% (p=0.000 n=49+48) GoParser 148ms ± 4% 89ms ± 3% -39.97% (p=0.000 n=49+50) Reflect 360ms ± 3% 242ms ± 3% -32.81% (p=0.000 n=49+49) Tar 108ms ± 4% 73ms ± 4% -32.48% (p=0.000 n=50+49) XML 203ms ± 3% 119ms ± 3% -41.56% (p=0.000 n=49+48) name old user-time/op new user-time/op delta Template 246ms ± 9% 287ms ± 9% +16.98% (p=0.000 n=50+50) Unicode 109ms ± 4% 118ms ± 5% +7.56% (p=0.000 n=46+50) GoTypes 735ms ± 4% 806ms ± 2% +9.62% (p=0.000 n=50+50) Compiler 3.34s ± 4% 3.56s ± 2% +6.78% (p=0.000 n=49+49) SSA 8.54s ± 3% 10.04s ± 3% +17.55% (p=0.000 n=50+50) Flate 149ms ± 6% 176ms ± 3% +17.82% (p=0.000 n=50+48) GoParser 181ms ± 5% 213ms ± 3% +17.47% (p=0.000 n=50+50) Reflect 453ms ± 6% 499ms ± 2% +10.11% (p=0.000 n=50+48) Tar 126ms ± 5% 149ms ±11% +18.76% (p=0.000 n=50+50) XML 246ms ± 5% 287ms ± 4% +16.53% (p=0.000 n=49+50) name old alloc/op new alloc/op delta Template 38.8MB ± 0% 40.4MB ± 0% +4.21% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 30.9MB ± 0% +3.68% (p=0.008 n=5+5) GoTypes 113MB ± 0% 116MB ± 0% +2.71% (p=0.008 n=5+5) Compiler 443MB ± 0% 455MB ± 0% +2.75% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.27GB ± 0% +1.84% (p=0.008 n=5+5) Flate 25.3MB ± 0% 26.9MB ± 1% +6.31% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 33.2MB ± 0% +4.61% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 80.2MB ± 0% +2.53% (p=0.008 n=5+5) Tar 26.6MB ± 0% 27.9MB ± 0% +5.19% (p=0.008 n=5+5) XML 42.4MB ± 0% 44.6MB ± 0% +5.20% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 380k ± 0% 379k ± 0% -0.39% (p=0.032 n=5+5) Unicode 321k ± 0% 321k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.421 n=5+5) Compiler 4.12M ± 0% 4.14M ± 0% +0.52% (p=0.008 n=5+5) SSA 9.72M ± 0% 9.76M ± 0% +0.37% (p=0.008 n=5+5) Flate 234k ± 1% 234k ± 1% ~ (p=0.690 n=5+5) GoParser 316k ± 0% 317k ± 1% ~ (p=0.841 n=5+5) Reflect 981k ± 0% 981k ± 0% ~ (p=1.000 n=5+5) Tar 250k ± 0% 249k ± 1% ~ (p=0.151 n=5+5) XML 393k ± 0% 392k ± 0% ~ (p=0.056 n=5+5) Going beyond c=4 on my machine tends to increase CPU time and allocs without impacting real time. The CPU time numbers matter, because when there are many concurrent compilation processes, that will impact the overall throughput. The numbers above are in many ways the best case scenario; we can take full advantage of all cores. Fortunately, the most common compilation scenario is incremental re-compilation of a single package during a build/test cycle. Updates #15756 Change-Id: I6725558ca2069edec0ac5b0d1683105a9fff6bea Reviewed-on: https://go-review.googlesource.com/40693 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Robert Griesemer <gri@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-19 08:27:26 -07:00
ctxt.hashmu.Unlock()
return s
}
func (ctxt *Link) Float32Sym(f float32) *LSym {
i := math.Float32bits(f)
name := fmt.Sprintf("$f32.%08x", i)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 4
s.Set(AttrLocal, true)
})
}
func (ctxt *Link) Float64Sym(f float64) *LSym {
i := math.Float64bits(f)
name := fmt.Sprintf("$f64.%016x", i)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 8
s.Set(AttrLocal, true)
})
}
func (ctxt *Link) Int64Sym(i int64) *LSym {
name := fmt.Sprintf("$i64.%016x", uint64(i))
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 8
s.Set(AttrLocal, true)
})
}
// Assign index to symbols.
// asm is set to true if this is called by the assembler (i.e. not the compiler),
// in which case all the symbols are non-package (for now).
func (ctxt *Link) NumberSyms(asm bool) {
if !ctxt.Flag_newobj {
return
}
if ctxt.Headtype == objabi.Haix {
// Data must be sorted to keep a constant order in TOC symbols.
// As they are created during Progedit, two symbols can be switched between
// two different compilations. Therefore, BuildID will be different.
// TODO: find a better place and optimize to only sort TOC symbols
sort.Slice(ctxt.Data, func(i, j int) bool {
return ctxt.Data[i].Name < ctxt.Data[j].Name
})
}
ctxt.pkgIdx = make(map[string]int32)
ctxt.defs = []*LSym{}
ctxt.nonpkgdefs = []*LSym{}
var idx, nonpkgidx int32 = 0, 0
ctxt.traverseSyms(traverseDefs, func(s *LSym) {
if isNonPkgSym(ctxt, asm, s) {
s.PkgIdx = goobj2.PkgIdxNone
s.SymIdx = nonpkgidx
if nonpkgidx != int32(len(ctxt.nonpkgdefs)) {
panic("bad index")
}
ctxt.nonpkgdefs = append(ctxt.nonpkgdefs, s)
nonpkgidx++
} else {
s.PkgIdx = goobj2.PkgIdxSelf
s.SymIdx = idx
if idx != int32(len(ctxt.defs)) {
panic("bad index")
}
ctxt.defs = append(ctxt.defs, s)
idx++
}
s.Set(AttrIndexed, true)
})
ipkg := int32(1) // 0 is invalid index
nonpkgdef := nonpkgidx
ctxt.traverseSyms(traverseRefs|traverseAux, func(rs *LSym) {
if rs.PkgIdx != goobj2.PkgIdxInvalid {
return
}
pkg := rs.Pkg
if pkg == "" || pkg == "\"\"" || pkg == "_" || !rs.Indexed() {
rs.PkgIdx = goobj2.PkgIdxNone
rs.SymIdx = nonpkgidx
rs.Set(AttrIndexed, true)
if nonpkgidx != nonpkgdef+int32(len(ctxt.nonpkgrefs)) {
panic("bad index")
}
ctxt.nonpkgrefs = append(ctxt.nonpkgrefs, rs)
nonpkgidx++
return
}
if k, ok := ctxt.pkgIdx[pkg]; ok {
rs.PkgIdx = k
return
}
rs.PkgIdx = ipkg
ctxt.pkgIdx[pkg] = ipkg
ipkg++
})
}
// Returns whether s is a non-package symbol, which needs to be referenced
// by name instead of by index.
func isNonPkgSym(ctxt *Link, asm bool, s *LSym) bool {
if asm && !s.Static() {
// asm symbols are referenced by name only, except static symbols
// which are file-local and can be referenced by index.
return true
}
if ctxt.Flag_linkshared {
// The referenced symbol may be in a different shared library so
// the linker cannot see its index.
return true
}
if s.Pkg == "_" {
// The frontend uses package "_" to mark symbols that should not
// be referenced by index, e.g. linkname'd symbols.
return true
}
if s.DuplicateOK() {
// Dupok symbol needs to be dedup'd by name.
return true
}
return false
}
type traverseFlag uint32
const (
traverseDefs traverseFlag = 1 << iota
traverseRefs
traverseAux
traverseAll = traverseDefs | traverseRefs | traverseAux
)
// Traverse symbols based on flag, call fn for each symbol.
func (ctxt *Link) traverseSyms(flag traverseFlag, fn func(*LSym)) {
lists := [][]*LSym{ctxt.Text, ctxt.Data, ctxt.ABIAliases}
for _, list := range lists {
for _, s := range list {
if flag&traverseDefs != 0 {
fn(s)
}
if flag&traverseRefs != 0 {
for _, r := range s.R {
if r.Sym != nil {
fn(r.Sym)
}
}
}
if flag&traverseAux != 0 {
if s.Gotype != nil {
fn(s.Gotype)
}
if s.Type == objabi.STEXT {
pc := &s.Func.Pcln
for _, d := range pc.Funcdata {
if d != nil {
fn(d)
}
}
for _, f := range pc.File {
if fsym := ctxt.Lookup(f); fsym != nil {
fn(fsym)
}
}
for _, call := range pc.InlTree.nodes {
if call.Func != nil {
fn(call.Func)
}
f, _ := linkgetlineFromPos(ctxt, call.Pos)
if fsym := ctxt.Lookup(f); fsym != nil {
fn(fsym)
}
}
}
}
}
}
}