go/src/cmd/internal/obj/plist.go

315 lines
8.5 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package obj
import (
"cmd/internal/objabi"
"fmt"
"strings"
)
type Plist struct {
Firstpc *Prog
Curfn interface{} // holds a *gc.Node, if non-nil
}
cmd/compile: teach assemblers to accept a Prog allocator The existing bulk Prog allocator is not concurrency-safe. To allow for concurrency-safe bulk allocation of Progs, I want to move Prog allocation and caching upstream, to the clients of cmd/internal/obj. This is a preliminary enabling refactoring. After this CL, instead of calling Ctxt.NewProg throughout the assemblers, we thread through a newprog function that returns a new Prog. That function is set up to be Ctxt.NewProg, so there are no real changes in this CL; this CL only establishes the plumbing. Passes toolstash-check -all. Negligible compiler performance impact. Updates #15756 name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.574 n=49+47) Unicode 90.1ms ± 5% 89.9ms ± 4% ~ (p=0.417 n=50+49) GoTypes 585ms ± 4% 584ms ± 3% ~ (p=0.466 n=49+49) SSA 6.50s ± 3% 6.52s ± 2% ~ (p=0.251 n=49+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.673 n=49+50) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.810 n=48+49) Reflect 372ms ± 4% 372ms ± 5% ~ (p=0.778 n=49+50) Tar 113ms ± 5% 111ms ± 4% -0.98% (p=0.016 n=50+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.483 n=47+49) [Geo mean] 285ms 285ms -0.17% name old user-ns/op new user-ns/op delta Template 253M ± 8% 254M ± 9% ~ (p=0.899 n=50+50) Unicode 106M ± 9% 106M ±11% ~ (p=0.642 n=50+50) GoTypes 736M ± 4% 740M ± 4% ~ (p=0.121 n=50+49) SSA 8.82G ± 3% 8.88G ± 2% +0.65% (p=0.006 n=49+48) Flate 147M ± 4% 147M ± 5% ~ (p=0.844 n=47+48) GoParser 179M ± 4% 178M ± 6% ~ (p=0.785 n=50+50) Reflect 443M ± 6% 441M ± 5% ~ (p=0.850 n=48+47) Tar 126M ± 5% 126M ± 5% ~ (p=0.734 n=50+50) XML 244M ± 5% 244M ± 5% ~ (p=0.594 n=49+50) [Geo mean] 341M 341M +0.11% Change-Id: Ice962f61eb3a524c2db00a166cb582c22caa7d68 Reviewed-on: https://go-review.googlesource.com/39633 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-04 14:31:55 -07:00
// ProgAlloc is a function that allocates Progs.
// It is used to provide access to cached/bulk-allocated Progs to the assemblers.
type ProgAlloc func() *Prog
func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string) {
// Build list of symbols, and assign instructions to lists.
var curtext *LSym
var etext *Prog
var text []*LSym
var plink *Prog
for p := plist.Firstpc; p != nil; p = plink {
if ctxt.Debugasm > 0 && ctxt.Debugvlog {
fmt.Printf("obj: %v\n", p)
}
plink = p.Link
p.Link = nil
switch p.As {
case AEND:
continue
case ATEXT:
s := p.From.Sym
if s == nil {
// func _() { }
curtext = nil
continue
}
text = append(text, s)
etext = p
curtext = s
continue
case AFUNCDATA:
// Rewrite reference to go_args_stackmap(SB) to the Go-provided declaration information.
if curtext == nil { // func _() {}
continue
}
if p.To.Sym.Name == "go_args_stackmap" {
if p.From.Type != TYPE_CONST || p.From.Offset != objabi.FUNCDATA_ArgsPointerMaps {
ctxt.Diag("FUNCDATA use of go_args_stackmap(SB) without FUNCDATA_ArgsPointerMaps")
}
p.To.Sym = ctxt.LookupDerived(curtext, curtext.Name+".args_stackmap")
}
}
if curtext == nil {
etext = nil
continue
}
etext.Link = p
etext = p
}
cmd/compile: add Prog cache to Progs The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe. This CL moves Prog allocation to its clients, the compiler and the assembler. The assembler is so fast and generates so few Progs that it does not need optimization of Prog allocation. I could not generate measureable changes. And even if I could, the assembly is a miniscule portion of build times. The compiler already has a natural place to manage Prog allocation; this CL migrates the Prog cache there. It will be made concurrency-safe in a later CL by partitioning the Prog cache into chunks and assigning each chunk to a different goroutine to manage. This CL does cause a performance degradation when the compiler is invoked with the -S flag (to dump assembly). However, such usage is rare and almost always done manually. The one instance I know of in a test is TestAssembly in cmd/compile/internal/gc, and I did not detect a measurable performance impact there. Passes toolstash-check -all. Minor compiler performance impact. Updates #15756 Performance impact from just this CL: name old time/op new time/op delta Template 213ms ± 4% 213ms ± 4% ~ (p=0.571 n=49+49) Unicode 89.1ms ± 3% 89.4ms ± 3% ~ (p=0.388 n=47+48) GoTypes 581ms ± 2% 584ms ± 3% +0.56% (p=0.019 n=47+48) SSA 6.48s ± 2% 6.53s ± 2% +0.84% (p=0.000 n=47+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.832 n=49+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.815 n=48+47) Reflect 371ms ± 4% 371ms ± 3% ~ (p=0.617 n=50+47) Tar 112ms ± 4% 112ms ± 3% ~ (p=0.724 n=49+49) XML 208ms ± 3% 208ms ± 4% ~ (p=0.678 n=49+50) [Geo mean] 284ms 285ms +0.18% name old user-ns/op new user-ns/op delta Template 251M ± 7% 252M ±11% ~ (p=0.704 n=49+50) Unicode 107M ± 7% 108M ± 5% +1.25% (p=0.036 n=50+49) GoTypes 738M ± 3% 740M ± 3% ~ (p=0.305 n=49+48) SSA 8.83G ± 2% 8.86G ± 4% ~ (p=0.098 n=47+50) Flate 146M ± 6% 147M ± 3% ~ (p=0.584 n=48+41) GoParser 178M ± 6% 179M ± 5% +0.93% (p=0.036 n=49+48) Reflect 441M ± 4% 446M ± 7% ~ (p=0.218 n=44+49) Tar 126M ± 5% 126M ± 5% ~ (p=0.766 n=48+49) XML 245M ± 5% 244M ± 4% ~ (p=0.359 n=50+50) [Geo mean] 341M 342M +0.51% Performance impact from this CL combined with its parent: name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.685 n=47+50) Unicode 89.8ms ± 6% 90.5ms ± 6% ~ (p=0.055 n=50+50) GoTypes 584ms ± 3% 585ms ± 2% ~ (p=0.710 n=49+47) SSA 6.50s ± 2% 6.53s ± 2% +0.39% (p=0.011 n=46+50) Flate 128ms ± 3% 128ms ± 4% ~ (p=0.855 n=47+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.666 n=49+49) Reflect 371ms ± 3% 372ms ± 3% ~ (p=0.298 n=48+48) Tar 112ms ± 5% 113ms ± 3% ~ (p=0.107 n=49+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.881 n=50+49) [Geo mean] 285ms 285ms +0.26% name old user-ns/op new user-ns/op delta Template 254M ± 9% 252M ± 8% ~ (p=0.290 n=49+50) Unicode 106M ± 6% 108M ± 7% +1.44% (p=0.034 n=50+50) GoTypes 741M ± 4% 743M ± 4% ~ (p=0.992 n=50+49) SSA 8.86G ± 2% 8.83G ± 3% ~ (p=0.158 n=47+49) Flate 147M ± 4% 148M ± 5% ~ (p=0.832 n=50+49) GoParser 179M ± 5% 178M ± 5% ~ (p=0.370 n=48+50) Reflect 441M ± 6% 445M ± 7% ~ (p=0.246 n=45+47) Tar 126M ± 6% 126M ± 6% ~ (p=0.815 n=49+50) XML 244M ± 3% 245M ± 4% ~ (p=0.190 n=50+50) [Geo mean] 342M 342M +0.17% Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855 Reviewed-on: https://go-review.googlesource.com/39634 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-05 07:05:35 -07:00
if newprog == nil {
newprog = ctxt.NewProg
}
cmd/compile: teach assemblers to accept a Prog allocator The existing bulk Prog allocator is not concurrency-safe. To allow for concurrency-safe bulk allocation of Progs, I want to move Prog allocation and caching upstream, to the clients of cmd/internal/obj. This is a preliminary enabling refactoring. After this CL, instead of calling Ctxt.NewProg throughout the assemblers, we thread through a newprog function that returns a new Prog. That function is set up to be Ctxt.NewProg, so there are no real changes in this CL; this CL only establishes the plumbing. Passes toolstash-check -all. Negligible compiler performance impact. Updates #15756 name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.574 n=49+47) Unicode 90.1ms ± 5% 89.9ms ± 4% ~ (p=0.417 n=50+49) GoTypes 585ms ± 4% 584ms ± 3% ~ (p=0.466 n=49+49) SSA 6.50s ± 3% 6.52s ± 2% ~ (p=0.251 n=49+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.673 n=49+50) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.810 n=48+49) Reflect 372ms ± 4% 372ms ± 5% ~ (p=0.778 n=49+50) Tar 113ms ± 5% 111ms ± 4% -0.98% (p=0.016 n=50+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.483 n=47+49) [Geo mean] 285ms 285ms -0.17% name old user-ns/op new user-ns/op delta Template 253M ± 8% 254M ± 9% ~ (p=0.899 n=50+50) Unicode 106M ± 9% 106M ±11% ~ (p=0.642 n=50+50) GoTypes 736M ± 4% 740M ± 4% ~ (p=0.121 n=50+49) SSA 8.82G ± 3% 8.88G ± 2% +0.65% (p=0.006 n=49+48) Flate 147M ± 4% 147M ± 5% ~ (p=0.844 n=47+48) GoParser 179M ± 4% 178M ± 6% ~ (p=0.785 n=50+50) Reflect 443M ± 6% 441M ± 5% ~ (p=0.850 n=48+47) Tar 126M ± 5% 126M ± 5% ~ (p=0.734 n=50+50) XML 244M ± 5% 244M ± 5% ~ (p=0.594 n=49+50) [Geo mean] 341M 341M +0.11% Change-Id: Ice962f61eb3a524c2db00a166cb582c22caa7d68 Reviewed-on: https://go-review.googlesource.com/39633 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-04 14:31:55 -07:00
// Add reference to Go arguments for C or assembly functions without them.
for _, s := range text {
if !strings.HasPrefix(s.Name, "\"\".") {
continue
}
found := false
for p := s.Func().Text; p != nil; p = p.Link {
if p.As == AFUNCDATA && p.From.Type == TYPE_CONST && p.From.Offset == objabi.FUNCDATA_ArgsPointerMaps {
found = true
break
}
}
if !found {
p := Appendp(s.Func().Text, newprog)
p.As = AFUNCDATA
p.From.Type = TYPE_CONST
p.From.Offset = objabi.FUNCDATA_ArgsPointerMaps
p.To.Type = TYPE_MEM
p.To.Name = NAME_EXTERN
p.To.Sym = ctxt.LookupDerived(s, s.Name+".args_stackmap")
}
}
// Turn functions into machine code images.
for _, s := range text {
mkfwd(s)
cmd/compile: teach assemblers to accept a Prog allocator The existing bulk Prog allocator is not concurrency-safe. To allow for concurrency-safe bulk allocation of Progs, I want to move Prog allocation and caching upstream, to the clients of cmd/internal/obj. This is a preliminary enabling refactoring. After this CL, instead of calling Ctxt.NewProg throughout the assemblers, we thread through a newprog function that returns a new Prog. That function is set up to be Ctxt.NewProg, so there are no real changes in this CL; this CL only establishes the plumbing. Passes toolstash-check -all. Negligible compiler performance impact. Updates #15756 name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.574 n=49+47) Unicode 90.1ms ± 5% 89.9ms ± 4% ~ (p=0.417 n=50+49) GoTypes 585ms ± 4% 584ms ± 3% ~ (p=0.466 n=49+49) SSA 6.50s ± 3% 6.52s ± 2% ~ (p=0.251 n=49+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.673 n=49+50) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.810 n=48+49) Reflect 372ms ± 4% 372ms ± 5% ~ (p=0.778 n=49+50) Tar 113ms ± 5% 111ms ± 4% -0.98% (p=0.016 n=50+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.483 n=47+49) [Geo mean] 285ms 285ms -0.17% name old user-ns/op new user-ns/op delta Template 253M ± 8% 254M ± 9% ~ (p=0.899 n=50+50) Unicode 106M ± 9% 106M ±11% ~ (p=0.642 n=50+50) GoTypes 736M ± 4% 740M ± 4% ~ (p=0.121 n=50+49) SSA 8.82G ± 3% 8.88G ± 2% +0.65% (p=0.006 n=49+48) Flate 147M ± 4% 147M ± 5% ~ (p=0.844 n=47+48) GoParser 179M ± 4% 178M ± 6% ~ (p=0.785 n=50+50) Reflect 443M ± 6% 441M ± 5% ~ (p=0.850 n=48+47) Tar 126M ± 5% 126M ± 5% ~ (p=0.734 n=50+50) XML 244M ± 5% 244M ± 5% ~ (p=0.594 n=49+50) [Geo mean] 341M 341M +0.11% Change-Id: Ice962f61eb3a524c2db00a166cb582c22caa7d68 Reviewed-on: https://go-review.googlesource.com/39633 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-04 14:31:55 -07:00
linkpatch(ctxt, s, newprog)
ctxt.Arch.Preprocess(ctxt, s, newprog)
ctxt.Arch.Assemble(ctxt, s, newprog)
if ctxt.Errors > 0 {
continue
}
linkpcln(ctxt, s)
if myimportpath != "" {
ctxt.populateDWARF(plist.Curfn, s, myimportpath)
}
}
}
cmd/compile: move Text.From.Sym initialization earlier The initialization of an ATEXT Prog's From.Sym can race with the assemblers in a concurrent compiler. CL 40254 contains an initial, failed attempt to fix that race. This CL takes a different approach: Rather than expose an API to initialize the Prog, expose an API to initialize the Sym. The initialization of the Sym can then be moved earlier in the compiler, avoiding the race. The growth of gc.Func has negligible performance impact; see below. Passes toolstash -cmp. Updates #15756 name old alloc/op new alloc/op delta Template 38.8MB ± 0% 38.8MB ± 0% ~ (p=0.968 n=9+10) Unicode 29.8MB ± 0% 29.8MB ± 0% ~ (p=0.684 n=10+10) GoTypes 113MB ± 0% 113MB ± 0% ~ (p=0.912 n=10+10) SSA 1.25GB ± 0% 1.25GB ± 0% ~ (p=0.481 n=10+10) Flate 25.3MB ± 0% 25.3MB ± 0% ~ (p=0.105 n=10+10) GoParser 31.7MB ± 0% 31.8MB ± 0% +0.09% (p=0.016 n=8+10) Reflect 78.3MB ± 0% 78.2MB ± 0% ~ (p=0.190 n=10+10) Tar 26.5MB ± 0% 26.6MB ± 0% +0.13% (p=0.011 n=10+10) XML 42.4MB ± 0% 42.4MB ± 0% ~ (p=0.971 n=10+10) name old allocs/op new allocs/op delta Template 378k ± 1% 378k ± 0% ~ (p=0.315 n=10+9) Unicode 321k ± 1% 321k ± 0% ~ (p=0.436 n=10+10) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.079 n=10+9) SSA 9.70M ± 0% 9.70M ± 0% -0.04% (p=0.035 n=10+10) Flate 233k ± 1% 234k ± 1% ~ (p=0.529 n=10+10) GoParser 315k ± 0% 316k ± 0% ~ (p=0.095 n=9+10) Reflect 980k ± 0% 980k ± 0% ~ (p=0.436 n=10+10) Tar 249k ± 1% 250k ± 0% ~ (p=0.280 n=10+10) XML 391k ± 1% 391k ± 1% ~ (p=0.481 n=10+10) Change-Id: I3c93033dddd2e1df8cc54a106a6e615d27859e71 Reviewed-on: https://go-review.googlesource.com/40496 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-04-12 13:23:07 -07:00
func (ctxt *Link) InitTextSym(s *LSym, flag int) {
if s == nil {
// func _() { }
return
}
if s.Func() != nil {
ctxt.Diag("InitTextSym double init for %s", s.Name)
}
s.NewFuncInfo()
if s.OnList() {
ctxt.Diag("symbol %s listed multiple times", s.Name)
}
name := strings.Replace(s.Name, "\"\"", ctxt.Pkgpath, -1)
s.Func().FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0)
s.Set(AttrOnList, true)
cmd/internal/obj: stop storing Text flags in From3 Prior to this CL, flags such as NOSPLIT on ATEXT Progs were stored in From3.Offset. Some but not all of those flags were also duplicated into From.Sym.Attribute. This CL migrates all of those flags into From.Sym.Attribute and stops creating a From3. A side-effect of this is that printing an ATEXT Prog can no longer simply dump From3.Offset. That's kind of good, since the raw flag value wasn't very informative anyway, but it did necessitate a bunch of updates to the cmd/asm tests. The reason I'm doing this work now is that avoiding storing flags in both From.Sym and From3.Offset simplifies some other changes to fix the data race first described in CL 40254. This CL almost passes toolstash-check -all. The only changes are in cases where the assembler has decided that a function's flags may be altered, e.g. to make a function with no calls in it NOSPLIT. Prior to this CL, that information was not printed. Sample before: "".Ctz64 t=1 size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Sample after: "".Ctz64 t=1 nosplit size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), NOSPLIT, $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Observe the additional "nosplit" in the first line and the additional "NOSPLIT" in the second line. Updates #15756 Change-Id: I5c59bd8f3bdc7c780361f801d94a261f0aef3d13 Reviewed-on: https://go-review.googlesource.com/40495 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-11 15:15:04 -07:00
s.Set(AttrDuplicateOK, flag&DUPOK != 0)
s.Set(AttrNoSplit, flag&NOSPLIT != 0)
s.Set(AttrReflectMethod, flag&REFLECTMETHOD != 0)
s.Set(AttrWrapper, flag&WRAPPER != 0)
s.Set(AttrNeedCtxt, flag&NEEDCTXT != 0)
s.Set(AttrNoFrame, flag&NOFRAME != 0)
s.Set(AttrTopFrame, flag&TOPFRAME != 0)
s.Type = objabi.STEXT
ctxt.Text = append(ctxt.Text, s)
// Set up DWARF entries for s
ctxt.dwarfSym(s)
}
func (ctxt *Link) Globl(s *LSym, size int64, flag int) {
if s.OnList() {
ctxt.Diag("symbol %s listed multiple times", s.Name)
}
s.Set(AttrOnList, true)
ctxt.Data = append(ctxt.Data, s)
s.Size = size
if s.Type == 0 {
s.Type = objabi.SBSS
}
if flag&DUPOK != 0 {
s.Set(AttrDuplicateOK, true)
}
if flag&RODATA != 0 {
s.Type = objabi.SRODATA
} else if flag&NOPTR != 0 {
if s.Type == objabi.SDATA {
s.Type = objabi.SNOPTRDATA
} else {
s.Type = objabi.SNOPTRBSS
}
} else if flag&TLSBSS != 0 {
s.Type = objabi.STLSBSS
}
if strings.HasPrefix(s.Name, "\"\"."+StaticNamePref) {
s.Set(AttrStatic, true)
}
}
// EmitEntryLiveness generates PCDATA Progs after p to switch to the
// liveness map active at the entry of function s. It returns the last
// Prog generated.
func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
pcdata := ctxt.EmitEntryStackMap(s, p, newprog)
pcdata = ctxt.EmitEntryUnsafePoint(s, pcdata, newprog)
return pcdata
}
// Similar to EmitEntryLiveness, but just emit stack map.
func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
pcdata := Appendp(p, newprog)
pcdata.Pos = s.Func().Text.Pos
pcdata.As = APCDATA
pcdata.From.Type = TYPE_CONST
pcdata.From.Offset = objabi.PCDATA_StackMapIndex
pcdata.To.Type = TYPE_CONST
pcdata.To.Offset = -1 // pcdata starts at -1 at function entry
return pcdata
}
// Similar to EmitEntryLiveness, but just emit unsafe point map.
func (ctxt *Link) EmitEntryUnsafePoint(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
pcdata := Appendp(p, newprog)
pcdata.Pos = s.Func().Text.Pos
cmd/compile, cmd/internal/obj: record register maps in binary This adds FUNCDATA and PCDATA that records the register maps much like the existing live arguments maps and live locals maps. The register map is indexed independently from the argument and locals maps since changes in register liveness tend not to correlate with changes to argument and local liveness. This is the final CL toward adding safe-points everywhere. The following CLs will optimize liveness analysis to bring down the cost. The effect of this CL is: name old time/op new time/op delta Template 195ms ± 2% 197ms ± 1% ~ (p=0.136 n=9+9) Unicode 98.4ms ± 2% 99.7ms ± 1% +1.39% (p=0.004 n=10+10) GoTypes 685ms ± 1% 700ms ± 1% +2.06% (p=0.000 n=9+9) Compiler 3.28s ± 1% 3.34s ± 0% +1.71% (p=0.000 n=9+8) SSA 7.79s ± 1% 7.91s ± 1% +1.55% (p=0.000 n=10+9) Flate 133ms ± 2% 133ms ± 2% ~ (p=0.190 n=10+10) GoParser 161ms ± 2% 164ms ± 3% +1.83% (p=0.015 n=10+10) Reflect 450ms ± 1% 457ms ± 1% +1.62% (p=0.000 n=10+10) Tar 183ms ± 2% 185ms ± 1% +0.91% (p=0.008 n=9+10) XML 234ms ± 1% 238ms ± 1% +1.60% (p=0.000 n=9+9) [Geo mean] 411ms 417ms +1.40% name old exe-bytes new exe-bytes delta HelloSize 1.47M ± 0% 1.51M ± 0% +2.79% (p=0.000 n=10+10) Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps is: name old time/op new time/op delta Template 185ms ± 2% 197ms ± 1% +6.42% (p=0.000 n=10+9) Unicode 96.3ms ± 3% 99.7ms ± 1% +3.60% (p=0.000 n=10+10) GoTypes 658ms ± 0% 700ms ± 1% +6.37% (p=0.000 n=10+9) Compiler 3.14s ± 1% 3.34s ± 0% +6.53% (p=0.000 n=9+8) SSA 7.41s ± 2% 7.91s ± 1% +6.71% (p=0.000 n=9+9) Flate 126ms ± 1% 133ms ± 2% +6.15% (p=0.000 n=10+10) GoParser 153ms ± 1% 164ms ± 3% +6.89% (p=0.000 n=10+10) Reflect 437ms ± 1% 457ms ± 1% +4.59% (p=0.000 n=10+10) Tar 178ms ± 1% 185ms ± 1% +4.18% (p=0.000 n=10+10) XML 223ms ± 1% 238ms ± 1% +6.39% (p=0.000 n=10+9) [Geo mean] 394ms 417ms +5.78% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 38.0MB ± 0% +10.19% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 30.3MB ± 0% +3.56% (p=0.000 n=8+9) GoTypes 113MB ± 0% 125MB ± 0% +10.89% (p=0.000 n=10+10) Compiler 510MB ± 0% 575MB ± 0% +12.79% (p=0.000 n=10+10) SSA 1.46GB ± 0% 1.64GB ± 0% +12.40% (p=0.000 n=10+10) Flate 23.9MB ± 0% 25.9MB ± 0% +8.56% (p=0.000 n=10+10) GoParser 28.0MB ± 0% 30.8MB ± 0% +10.08% (p=0.000 n=10+10) Reflect 77.6MB ± 0% 84.3MB ± 0% +8.63% (p=0.000 n=10+10) Tar 34.1MB ± 0% 37.0MB ± 0% +8.44% (p=0.000 n=10+10) XML 42.7MB ± 0% 47.2MB ± 0% +10.75% (p=0.000 n=10+10) [Geo mean] 76.0MB 83.3MB +9.60% name old allocs/op new allocs/op delta Template 321k ± 0% 337k ± 0% +4.98% (p=0.000 n=10+10) Unicode 337k ± 0% 340k ± 0% +1.04% (p=0.000 n=10+9) GoTypes 1.13M ± 0% 1.18M ± 0% +4.85% (p=0.000 n=10+10) Compiler 4.67M ± 0% 4.96M ± 0% +6.25% (p=0.000 n=10+10) SSA 11.7M ± 0% 12.3M ± 0% +5.69% (p=0.000 n=10+10) Flate 216k ± 0% 226k ± 0% +4.52% (p=0.000 n=10+9) GoParser 271k ± 0% 283k ± 0% +4.52% (p=0.000 n=10+10) Reflect 927k ± 0% 972k ± 0% +4.78% (p=0.000 n=10+10) Tar 318k ± 0% 333k ± 0% +4.56% (p=0.000 n=10+10) XML 376k ± 0% 395k ± 0% +5.04% (p=0.000 n=10+10) [Geo mean] 730k 764k +4.61% name old exe-bytes new exe-bytes delta HelloSize 1.46M ± 0% 1.51M ± 0% +3.66% (p=0.000 n=10+10) For #24543. Change-Id: I91e003dc64151916b384274884bf02a2d6862547 Reviewed-on: https://go-review.googlesource.com/109353 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-03-27 15:50:45 -04:00
pcdata.As = APCDATA
pcdata.From.Type = TYPE_CONST
pcdata.From.Offset = objabi.PCDATA_UnsafePoint
cmd/compile, cmd/internal/obj: record register maps in binary This adds FUNCDATA and PCDATA that records the register maps much like the existing live arguments maps and live locals maps. The register map is indexed independently from the argument and locals maps since changes in register liveness tend not to correlate with changes to argument and local liveness. This is the final CL toward adding safe-points everywhere. The following CLs will optimize liveness analysis to bring down the cost. The effect of this CL is: name old time/op new time/op delta Template 195ms ± 2% 197ms ± 1% ~ (p=0.136 n=9+9) Unicode 98.4ms ± 2% 99.7ms ± 1% +1.39% (p=0.004 n=10+10) GoTypes 685ms ± 1% 700ms ± 1% +2.06% (p=0.000 n=9+9) Compiler 3.28s ± 1% 3.34s ± 0% +1.71% (p=0.000 n=9+8) SSA 7.79s ± 1% 7.91s ± 1% +1.55% (p=0.000 n=10+9) Flate 133ms ± 2% 133ms ± 2% ~ (p=0.190 n=10+10) GoParser 161ms ± 2% 164ms ± 3% +1.83% (p=0.015 n=10+10) Reflect 450ms ± 1% 457ms ± 1% +1.62% (p=0.000 n=10+10) Tar 183ms ± 2% 185ms ± 1% +0.91% (p=0.008 n=9+10) XML 234ms ± 1% 238ms ± 1% +1.60% (p=0.000 n=9+9) [Geo mean] 411ms 417ms +1.40% name old exe-bytes new exe-bytes delta HelloSize 1.47M ± 0% 1.51M ± 0% +2.79% (p=0.000 n=10+10) Compared to just before "cmd/internal/obj: consolidate emitting entry stack map", the cumulative effect of adding stack maps everywhere and register maps is: name old time/op new time/op delta Template 185ms ± 2% 197ms ± 1% +6.42% (p=0.000 n=10+9) Unicode 96.3ms ± 3% 99.7ms ± 1% +3.60% (p=0.000 n=10+10) GoTypes 658ms ± 0% 700ms ± 1% +6.37% (p=0.000 n=10+9) Compiler 3.14s ± 1% 3.34s ± 0% +6.53% (p=0.000 n=9+8) SSA 7.41s ± 2% 7.91s ± 1% +6.71% (p=0.000 n=9+9) Flate 126ms ± 1% 133ms ± 2% +6.15% (p=0.000 n=10+10) GoParser 153ms ± 1% 164ms ± 3% +6.89% (p=0.000 n=10+10) Reflect 437ms ± 1% 457ms ± 1% +4.59% (p=0.000 n=10+10) Tar 178ms ± 1% 185ms ± 1% +4.18% (p=0.000 n=10+10) XML 223ms ± 1% 238ms ± 1% +6.39% (p=0.000 n=10+9) [Geo mean] 394ms 417ms +5.78% name old alloc/op new alloc/op delta Template 34.5MB ± 0% 38.0MB ± 0% +10.19% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 30.3MB ± 0% +3.56% (p=0.000 n=8+9) GoTypes 113MB ± 0% 125MB ± 0% +10.89% (p=0.000 n=10+10) Compiler 510MB ± 0% 575MB ± 0% +12.79% (p=0.000 n=10+10) SSA 1.46GB ± 0% 1.64GB ± 0% +12.40% (p=0.000 n=10+10) Flate 23.9MB ± 0% 25.9MB ± 0% +8.56% (p=0.000 n=10+10) GoParser 28.0MB ± 0% 30.8MB ± 0% +10.08% (p=0.000 n=10+10) Reflect 77.6MB ± 0% 84.3MB ± 0% +8.63% (p=0.000 n=10+10) Tar 34.1MB ± 0% 37.0MB ± 0% +8.44% (p=0.000 n=10+10) XML 42.7MB ± 0% 47.2MB ± 0% +10.75% (p=0.000 n=10+10) [Geo mean] 76.0MB 83.3MB +9.60% name old allocs/op new allocs/op delta Template 321k ± 0% 337k ± 0% +4.98% (p=0.000 n=10+10) Unicode 337k ± 0% 340k ± 0% +1.04% (p=0.000 n=10+9) GoTypes 1.13M ± 0% 1.18M ± 0% +4.85% (p=0.000 n=10+10) Compiler 4.67M ± 0% 4.96M ± 0% +6.25% (p=0.000 n=10+10) SSA 11.7M ± 0% 12.3M ± 0% +5.69% (p=0.000 n=10+10) Flate 216k ± 0% 226k ± 0% +4.52% (p=0.000 n=10+9) GoParser 271k ± 0% 283k ± 0% +4.52% (p=0.000 n=10+10) Reflect 927k ± 0% 972k ± 0% +4.78% (p=0.000 n=10+10) Tar 318k ± 0% 333k ± 0% +4.56% (p=0.000 n=10+10) XML 376k ± 0% 395k ± 0% +5.04% (p=0.000 n=10+10) [Geo mean] 730k 764k +4.61% name old exe-bytes new exe-bytes delta HelloSize 1.46M ± 0% 1.51M ± 0% +3.66% (p=0.000 n=10+10) For #24543. Change-Id: I91e003dc64151916b384274884bf02a2d6862547 Reviewed-on: https://go-review.googlesource.com/109353 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-03-27 15:50:45 -04:00
pcdata.To.Type = TYPE_CONST
pcdata.To.Offset = -1
return pcdata
}
// StartUnsafePoint generates PCDATA Progs after p to mark the
// beginning of an unsafe point. The unsafe point starts immediately
// after p.
// It returns the last Prog generated.
func (ctxt *Link) StartUnsafePoint(p *Prog, newprog ProgAlloc) *Prog {
pcdata := Appendp(p, newprog)
pcdata.As = APCDATA
pcdata.From.Type = TYPE_CONST
pcdata.From.Offset = objabi.PCDATA_UnsafePoint
pcdata.To.Type = TYPE_CONST
pcdata.To.Offset = objabi.PCDATA_UnsafePointUnsafe
return pcdata
}
// EndUnsafePoint generates PCDATA Progs after p to mark the end of an
// unsafe point, restoring the register map index to oldval.
// The unsafe point ends right after p.
// It returns the last Prog generated.
func (ctxt *Link) EndUnsafePoint(p *Prog, newprog ProgAlloc, oldval int64) *Prog {
pcdata := Appendp(p, newprog)
pcdata.As = APCDATA
pcdata.From.Type = TYPE_CONST
pcdata.From.Offset = objabi.PCDATA_UnsafePoint
pcdata.To.Type = TYPE_CONST
pcdata.To.Offset = oldval
return pcdata
}
// MarkUnsafePoints inserts PCDATAs to mark nonpreemptible and restartable
// instruction sequences, based on isUnsafePoint and isRestartable predicate.
// p0 is the start of the instruction stream.
// isUnsafePoint(p) returns true if p is not safe for async preemption.
// isRestartable(p) returns true if we can restart at the start of p (this Prog)
// upon async preemption. (Currently multi-Prog restartable sequence is not
// supported.)
// isRestartable can be nil. In this case it is treated as always returning false.
// If isUnsafePoint(p) and isRestartable(p) are both true, it is treated as
// an unsafe point.
func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, isRestartable func(*Prog) bool) {
if isRestartable == nil {
// Default implementation: nothing is restartable.
isRestartable = func(*Prog) bool { return false }
}
prev := p0
prevPcdata := int64(-1) // entry PC data value
prevRestart := int64(0)
for p := prev.Link; p != nil; p, prev = p.Link, p {
if p.As == APCDATA && p.From.Offset == objabi.PCDATA_UnsafePoint {
prevPcdata = p.To.Offset
continue
}
if prevPcdata == objabi.PCDATA_UnsafePointUnsafe {
continue // already unsafe
}
if isUnsafePoint(p) {
q := ctxt.StartUnsafePoint(prev, newprog)
q.Pc = p.Pc
q.Link = p
// Advance to the end of unsafe point.
for p.Link != nil && isUnsafePoint(p.Link) {
p = p.Link
}
if p.Link == nil {
break // Reached the end, don't bother marking the end
}
p = ctxt.EndUnsafePoint(p, newprog, prevPcdata)
p.Pc = p.Link.Pc
continue
}
if isRestartable(p) {
val := int64(objabi.PCDATA_Restart1)
if val == prevRestart {
val = objabi.PCDATA_Restart2
}
prevRestart = val
q := Appendp(prev, newprog)
q.As = APCDATA
q.From.Type = TYPE_CONST
q.From.Offset = objabi.PCDATA_UnsafePoint
q.To.Type = TYPE_CONST
q.To.Offset = val
q.Pc = p.Pc
q.Link = p
if p.Link == nil {
break // Reached the end, don't bother marking the end
}
if isRestartable(p.Link) {
// Next Prog is also restartable. No need to mark the end
// of this sequence. We'll just go ahead mark the next one.
continue
}
p = Appendp(p, newprog)
p.As = APCDATA
p.From.Type = TYPE_CONST
p.From.Offset = objabi.PCDATA_UnsafePoint
p.To.Type = TYPE_CONST
p.To.Offset = prevPcdata
p.Pc = p.Link.Pc
}
}
}