go/src/cmd/internal/obj/objfile.go

593 lines
18 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package obj
import (
"cmd/internal/dwarf"
"cmd/internal/objabi"
"cmd/internal/sys"
"fmt"
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
"io"
"sort"
"sync"
)
func (ctxt *Link) writeSymDebug(s *LSym) {
ctxt.writeSymDebugNamed(s, s.Name)
}
func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) {
fmt.Fprintf(ctxt.Bso, "%s ", name)
if s.Type != 0 {
fmt.Fprintf(ctxt.Bso, "%v ", s.Type)
}
if s.Static() {
fmt.Fprint(ctxt.Bso, "static ")
}
if s.DuplicateOK() {
fmt.Fprintf(ctxt.Bso, "dupok ")
}
if s.CFunc() {
fmt.Fprintf(ctxt.Bso, "cfunc ")
}
if s.NoSplit() {
fmt.Fprintf(ctxt.Bso, "nosplit ")
}
if s.TopFrame() {
fmt.Fprintf(ctxt.Bso, "topframe ")
}
fmt.Fprintf(ctxt.Bso, "size=%d", s.Size)
if s.Type == objabi.STEXT {
fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x", uint64(s.Func.Args), uint64(s.Func.Locals))
if s.Leaf() {
fmt.Fprintf(ctxt.Bso, " leaf")
}
}
fmt.Fprintf(ctxt.Bso, "\n")
if s.Type == objabi.STEXT {
for p := s.Func.Text; p != nil; p = p.Link {
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
fmt.Fprintf(ctxt.Bso, "\t%#04x ", uint(int(p.Pc)))
if ctxt.Debugasm > 1 {
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
io.WriteString(ctxt.Bso, p.String())
} else {
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
p.InnermostString(ctxt.Bso)
}
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
fmt.Fprintln(ctxt.Bso)
}
}
for i := 0; i < len(s.P); i += 16 {
fmt.Fprintf(ctxt.Bso, "\t%#04x", uint(i))
j := i
for ; j < i+16 && j < len(s.P); j++ {
fmt.Fprintf(ctxt.Bso, " %02x", s.P[j])
}
for ; j < i+16; j++ {
fmt.Fprintf(ctxt.Bso, " ")
}
fmt.Fprintf(ctxt.Bso, " ")
for j = i; j < i+16 && j < len(s.P); j++ {
c := int(s.P[j])
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
b := byte('.')
if ' ' <= c && c <= 0x7e {
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
b = byte(c)
}
cmd/compile: speed up compiling with -S Compiling with -S was not implemented with performance in mind. It allocates profligately. Compiling with -S is ~58% slower, allocates ~47% more memory, and does ~183% more allocations. compilecmp now uses -S to do finer-grained comparisons between compiler versions, so I now care about its performance. This change picks some of the lowest hanging fruit, mostly by modifying printing routines to print directly to a writer, rather than constructing a string first. I have confirmed that compiling std+cmd with "-gcflags=all=-S -p=1" and CGO_ENABLED=0 yields identical results before/after this change. (-p=1 makes package compilation order deterministic. CGO_ENABLED=0 prevents cgo temp workdirs from showing up in filenames.) Using the -S flag, the compiler performance impact is: name old time/op new time/op delta Template 344ms ± 2% 301ms ± 2% -12.45% (p=0.000 n=22+24) Unicode 136ms ± 3% 121ms ± 3% -11.40% (p=0.000 n=24+25) GoTypes 1.24s ± 5% 1.09s ± 3% -12.58% (p=0.000 n=25+25) Compiler 5.66s ± 4% 5.06s ± 2% -10.56% (p=0.000 n=25+20) SSA 19.9s ± 3% 17.2s ± 4% -13.64% (p=0.000 n=25+25) Flate 212ms ± 2% 188ms ± 2% -11.33% (p=0.000 n=25+24) GoParser 278ms ± 3% 242ms ± 1% -12.84% (p=0.000 n=23+24) Reflect 743ms ± 3% 657ms ± 5% -11.56% (p=0.000 n=24+25) Tar 295ms ± 2% 263ms ± 2% -10.78% (p=0.000 n=25+25) XML 409ms ± 2% 360ms ± 3% -12.03% (p=0.000 n=24+25) [Geo mean] 714ms 629ms -11.92% name old user-time/op new user-time/op delta Template 430ms ± 5% 388ms ± 3% -9.76% (p=0.000 n=21+24) Unicode 202ms ±12% 171ms ± 5% -15.21% (p=0.000 n=25+23) GoTypes 1.58s ± 3% 1.42s ± 3% -9.58% (p=0.000 n=24+24) Compiler 7.42s ± 3% 6.68s ± 8% -9.93% (p=0.000 n=25+25) SSA 26.9s ± 3% 22.9s ± 3% -14.85% (p=0.000 n=25+25) Flate 260ms ± 6% 234ms ± 3% -9.69% (p=0.000 n=23+25) GoParser 354ms ± 1% 296ms ± 3% -16.46% (p=0.000 n=23+25) Reflect 953ms ± 2% 865ms ± 4% -9.14% (p=0.000 n=24+24) Tar 380ms ± 2% 348ms ± 2% -8.28% (p=0.000 n=25+22) XML 530ms ± 3% 451ms ± 3% -15.01% (p=0.000 n=24+23) [Geo mean] 929ms 819ms -11.84% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 44.3MB ± 0% -18.24% (p=0.000 n=24+24) Unicode 33.5MB ± 0% 30.6MB ± 0% -8.57% (p=0.000 n=25+25) GoTypes 189MB ± 0% 152MB ± 0% -19.55% (p=0.000 n=25+23) Compiler 875MB ± 0% 703MB ± 0% -19.70% (p=0.000 n=25+25) SSA 3.19GB ± 0% 2.51GB ± 0% -21.50% (p=0.000 n=25+25) Flate 32.9MB ± 0% 27.3MB ± 0% -17.04% (p=0.000 n=25+25) GoParser 43.9MB ± 0% 35.1MB ± 0% -20.19% (p=0.000 n=25+25) Reflect 117MB ± 0% 96MB ± 0% -18.22% (p=0.000 n=24+23) Tar 48.6MB ± 0% 40.6MB ± 0% -16.39% (p=0.000 n=25+24) XML 65.7MB ± 0% 53.9MB ± 0% -17.93% (p=0.000 n=25+23) [Geo mean] 118MB 97MB -17.80% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.60M ± 0% -43.90% (p=0.000 n=25+24) Unicode 539k ± 0% 398k ± 0% -26.20% (p=0.000 n=23+25) GoTypes 3.97M ± 0% 2.19M ± 0% -44.90% (p=0.000 n=25+24) Compiler 17.6M ± 0% 9.5M ± 0% -46.39% (p=0.000 n=22+23) SSA 66.1M ± 0% 34.1M ± 0% -48.41% (p=0.000 n=25+22) Flate 629k ± 0% 365k ± 0% -41.95% (p=0.000 n=25+25) GoParser 929k ± 0% 500k ± 0% -46.11% (p=0.000 n=25+25) Reflect 2.49M ± 0% 1.47M ± 0% -41.00% (p=0.000 n=24+25) Tar 919k ± 0% 534k ± 0% -41.94% (p=0.000 n=25+24) XML 1.28M ± 0% 0.71M ± 0% -44.72% (p=0.000 n=25+24) [Geo mean] 2.32M 1.33M -42.82% This change also speeds up cmd/objdump a modest amount, ~4%. Change-Id: I7c7aa2b365688bc44b3ef6e1d03bcf934699cabc Reviewed-on: https://go-review.googlesource.com/c/go/+/216857 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2020-01-26 09:59:25 -08:00
ctxt.Bso.WriteByte(b)
}
fmt.Fprintf(ctxt.Bso, "\n")
}
sort.Sort(relocByOff(s.R)) // generate stable output
for _, r := range s.R {
name := ""
if r.Sym != nil {
name = r.Sym.Name
} else if r.Type == objabi.R_TLS_LE {
name = "TLS"
}
if ctxt.Arch.InFamily(sys.ARM, sys.PPC64) {
fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s+%x\n", int(r.Off), r.Siz, r.Type, name, uint64(r.Add))
} else {
fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s+%d\n", int(r.Off), r.Siz, r.Type, name, r.Add)
}
}
}
// relocByOff sorts relocations by their offsets.
type relocByOff []Reloc
func (x relocByOff) Len() int { return len(x) }
func (x relocByOff) Less(i, j int) bool { return x[i].Off < x[j].Off }
func (x relocByOff) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
// implement dwarf.Context
type dwCtxt struct{ *Link }
func (c dwCtxt) PtrSize() int {
return c.Arch.PtrSize
}
func (c dwCtxt) AddInt(s dwarf.Sym, size int, i int64) {
ls := s.(*LSym)
ls.WriteInt(c.Link, ls.Size, size, i)
}
func (c dwCtxt) AddUint16(s dwarf.Sym, i uint16) {
c.AddInt(s, 2, int64(i))
}
func (c dwCtxt) AddUint8(s dwarf.Sym, i uint8) {
b := []byte{byte(i)}
c.AddBytes(s, b)
}
func (c dwCtxt) AddBytes(s dwarf.Sym, b []byte) {
ls := s.(*LSym)
ls.WriteBytes(c.Link, ls.Size, b)
}
func (c dwCtxt) AddString(s dwarf.Sym, v string) {
ls := s.(*LSym)
ls.WriteString(c.Link, ls.Size, len(v), v)
ls.WriteInt(c.Link, ls.Size, 1, 0)
}
func (c dwCtxt) AddAddress(s dwarf.Sym, data interface{}, value int64) {
ls := s.(*LSym)
size := c.PtrSize()
if data != nil {
rsym := data.(*LSym)
ls.WriteAddr(c.Link, ls.Size, size, rsym, value)
} else {
ls.WriteInt(c.Link, ls.Size, size, value)
}
}
func (c dwCtxt) AddCURelativeAddress(s dwarf.Sym, data interface{}, value int64) {
ls := s.(*LSym)
rsym := data.(*LSym)
ls.WriteCURelativeAddr(c.Link, ls.Size, rsym, value)
}
func (c dwCtxt) AddSectionOffset(s dwarf.Sym, size int, t interface{}, ofs int64) {
panic("should be used only in the linker")
}
func (c dwCtxt) AddDWARFAddrSectionOffset(s dwarf.Sym, t interface{}, ofs int64) {
size := 4
if isDwarf64(c.Link) {
size = 8
}
ls := s.(*LSym)
rsym := t.(*LSym)
ls.WriteAddr(c.Link, ls.Size, size, rsym, ofs)
r := &ls.R[len(ls.R)-1]
r.Type = objabi.R_DWARFSECREF
}
func (c dwCtxt) AddFileRef(s dwarf.Sym, f interface{}) {
ls := s.(*LSym)
rsym := f.(*LSym)
fidx := c.Link.PosTable.FileIndex(rsym.Name)
// Note the +1 here -- the value we're writing is going to be an
// index into the DWARF line table file section, whose entries
// are numbered starting at 1, not 0.
ls.WriteInt(c.Link, ls.Size, 4, int64(fidx+1))
}
func (c dwCtxt) CurrentOffset(s dwarf.Sym) int64 {
ls := s.(*LSym)
return ls.Size
}
// Here "from" is a symbol corresponding to an inlined or concrete
// function, "to" is the symbol for the corresponding abstract
// function, and "dclIdx" is the index of the symbol of interest with
// respect to the Dcl slice of the original pre-optimization version
// of the inlined function.
func (c dwCtxt) RecordDclReference(from dwarf.Sym, to dwarf.Sym, dclIdx int, inlIndex int) {
ls := from.(*LSym)
tls := to.(*LSym)
ridx := len(ls.R) - 1
c.Link.DwFixups.ReferenceChildDIE(ls, ridx, tls, dclIdx, inlIndex)
}
func (c dwCtxt) RecordChildDieOffsets(s dwarf.Sym, vars []*dwarf.Var, offsets []int32) {
ls := s.(*LSym)
c.Link.DwFixups.RegisterChildDIEOffsets(ls, vars, offsets)
}
func (c dwCtxt) Logf(format string, args ...interface{}) {
c.Link.Logf(format, args...)
}
func isDwarf64(ctxt *Link) bool {
return ctxt.Headtype == objabi.Haix
}
cmd/compile: remove isStmt symbol from FuncInfo As promised in CL 188238, removing the obsolete symbol. Here are the latest stats. This is baselined at "e53edafb66" with only these changes applied, run on magna.cam. The linker looks straight better (in memory and speed). There is still a change I'm working on walking the progs to generate the debug_lines data in the compiler. That will likely result in a compiler speedup. name old time/op new time/op delta Template 324ms ± 3% 317ms ± 3% -2.07% (p=0.043 n=10+10) Unicode 142ms ± 4% 144ms ± 3% ~ (p=0.393 n=10+10) GoTypes 1.05s ± 2% 1.07s ± 2% +1.59% (p=0.019 n=9+9) Compiler 4.09s ± 2% 4.11s ± 1% ~ (p=0.218 n=10+10) SSA 12.5s ± 1% 12.7s ± 1% +1.00% (p=0.035 n=10+10) Flate 199ms ± 7% 203ms ± 5% ~ (p=0.481 n=10+10) GoParser 245ms ± 3% 246ms ± 5% ~ (p=0.780 n=9+10) Reflect 672ms ± 4% 688ms ± 3% +2.42% (p=0.015 n=10+10) Tar 280ms ± 4% 284ms ± 4% ~ (p=0.123 n=10+10) XML 379ms ± 4% 381ms ± 2% ~ (p=0.529 n=10+10) LinkCompiler 1.16s ± 4% 1.12s ± 2% -3.03% (p=0.001 n=10+9) ExternalLinkCompiler 2.28s ± 3% 2.23s ± 3% -2.51% (p=0.011 n=8+9) LinkWithoutDebugCompiler 686ms ± 9% 667ms ± 2% ~ (p=0.277 n=9+8) StdCmd 14.1s ± 1% 14.0s ± 1% ~ (p=0.739 n=10+10) name old user-time/op new user-time/op delta Template 604ms ±23% 564ms ± 7% ~ (p=0.661 n=10+9) Unicode 429ms ±40% 418ms ±37% ~ (p=0.579 n=10+10) GoTypes 2.43s ±12% 2.51s ± 7% ~ (p=0.393 n=10+10) Compiler 9.22s ± 3% 9.27s ± 3% ~ (p=0.720 n=9+10) SSA 26.3s ± 3% 26.6s ± 2% ~ (p=0.579 n=10+10) Flate 328ms ±19% 333ms ±12% ~ (p=0.842 n=10+9) GoParser 387ms ± 5% 378ms ± 9% ~ (p=0.356 n=9+10) Reflect 1.36s ±20% 1.43s ±21% ~ (p=0.631 n=10+10) Tar 469ms ±12% 471ms ±21% ~ (p=0.497 n=9+10) XML 685ms ±18% 698ms ±19% ~ (p=0.739 n=10+10) LinkCompiler 1.86s ±10% 1.87s ±11% ~ (p=0.968 n=10+9) ExternalLinkCompiler 3.20s ±13% 3.01s ± 8% -5.70% (p=0.046 n=8+9) LinkWithoutDebugCompiler 1.08s ±15% 1.09s ±20% ~ (p=0.579 n=10+10) name old alloc/op new alloc/op delta Template 36.3MB ± 0% 36.4MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 28.5MB ± 0% 28.5MB ± 0% ~ (p=0.165 n=10+10) GoTypes 120MB ± 0% 121MB ± 0% +0.29% (p=0.000 n=9+10) Compiler 546MB ± 0% 548MB ± 0% +0.32% (p=0.000 n=10+10) SSA 1.84GB ± 0% 1.85GB ± 0% +0.49% (p=0.000 n=10+10) Flate 22.9MB ± 0% 23.0MB ± 0% +0.25% (p=0.000 n=10+10) GoParser 27.8MB ± 0% 27.9MB ± 0% +0.25% (p=0.000 n=10+8) Reflect 77.5MB ± 0% 77.7MB ± 0% +0.27% (p=0.000 n=9+9) Tar 34.5MB ± 0% 34.6MB ± 0% +0.23% (p=0.000 n=10+10) XML 44.2MB ± 0% 44.4MB ± 0% +0.32% (p=0.000 n=10+10) LinkCompiler 239MB ± 0% 230MB ± 0% -3.86% (p=0.000 n=10+10) ExternalLinkCompiler 243MB ± 0% 243MB ± 0% +0.22% (p=0.000 n=10+10) LinkWithoutDebugCompiler 164MB ± 0% 155MB ± 0% -5.45% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 371k ± 0% 372k ± 0% +0.44% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% +0.05% (p=0.000 n=10+10) GoTypes 1.32M ± 0% 1.32M ± 0% +0.46% (p=0.000 n=10+10) Compiler 5.34M ± 0% 5.37M ± 0% +0.59% (p=0.000 n=10+10) SSA 17.6M ± 0% 17.7M ± 0% +0.63% (p=0.000 n=10+10) Flate 233k ± 0% 234k ± 0% +0.48% (p=0.000 n=10+10) GoParser 309k ± 0% 310k ± 0% +0.40% (p=0.000 n=10+10) Reflect 964k ± 0% 969k ± 0% +0.54% (p=0.000 n=10+10) Tar 346k ± 0% 348k ± 0% +0.48% (p=0.000 n=10+9) XML 424k ± 0% 426k ± 0% +0.51% (p=0.000 n=10+10) LinkCompiler 751k ± 0% 645k ± 0% -14.13% (p=0.000 n=10+10) ExternalLinkCompiler 1.79M ± 0% 1.69M ± 0% -5.30% (p=0.000 n=10+10) LinkWithoutDebugCompiler 217k ± 0% 222k ± 0% +2.02% (p=0.000 n=10+10) name old object-bytes new object-bytes delta Template 547kB ± 0% 559kB ± 0% +2.17% (p=0.000 n=10+10) Unicode 215kB ± 0% 216kB ± 0% +0.60% (p=0.000 n=10+10) GoTypes 1.99MB ± 0% 2.03MB ± 0% +2.02% (p=0.000 n=10+10) Compiler 7.86MB ± 0% 8.07MB ± 0% +2.73% (p=0.000 n=10+10) SSA 26.4MB ± 0% 27.2MB ± 0% +3.27% (p=0.000 n=10+10) Flate 337kB ± 0% 343kB ± 0% +2.02% (p=0.000 n=10+10) GoParser 432kB ± 0% 441kB ± 0% +2.11% (p=0.000 n=10+10) Reflect 1.33MB ± 0% 1.36MB ± 0% +1.87% (p=0.000 n=10+10) Tar 477kB ± 0% 487kB ± 0% +2.24% (p=0.000 n=10+10) XML 617kB ± 0% 632kB ± 0% +2.33% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 109kB ± 0% +0.09% (p=0.000 n=10+10) SSA 137kB ± 0% 137kB ± 0% +0.03% (p=0.000 n=10+10) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 760kB ± 0% 760kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.11MB ± 0% 1.13MB ± 0% +1.10% (p=0.000 n=10+10) CmdGoSize 14.9MB ± 0% 15.0MB ± 0% +0.77% (p=0.000 n=10+10) Change-Id: I42e6087cd6231dbdcfff5464e46d373474e455e1 Reviewed-on: https://go-review.googlesource.com/c/go/+/192417 Reviewed-by: Austin Clements <austin@google.com> Run-TryBot: Austin Clements <austin@google.com>
2019-08-29 16:35:50 -04:00
func (ctxt *Link) dwarfSym(s *LSym) (dwarfInfoSym, dwarfLocSym, dwarfRangesSym, dwarfAbsFnSym, dwarfDebugLines *LSym) {
if s.Type != objabi.STEXT {
ctxt.Diag("dwarfSym of non-TEXT %v", s)
}
[dev.debug] cmd/compile: better DWARF with optimizations on Debuggers use DWARF information to find local variables on the stack and in registers. Prior to this CL, the DWARF information for functions claimed that all variables were on the stack at all times. That's incorrect when optimizations are enabled, and results in debuggers showing data that is out of date or complete gibberish. After this CL, the compiler is capable of representing variable locations more accurately, and attempts to do so. Due to limitations of the SSA backend, it's not possible to be completely correct. There are a number of problems in the current design. One of the easier to understand is that variable names currently must be attached to an SSA value, but not all assignments in the source code actually result in machine code. For example: type myint int var a int b := myint(int) and b := (*uint64)(unsafe.Pointer(a)) don't generate machine code because the underlying representation is the same, so the correct value of b will not be set when the user would expect. Generating the more precise debug information is behind a flag, dwarflocationlists. Because of the issues described above, setting the flag may not make the debugging experience much better, and may actually make it worse in cases where the variable actually is on the stack and the more complicated analysis doesn't realize it. A number of changes are included: - Add a new pseudo-instruction, RegKill, which indicates that the value in the register has been clobbered. - Adjust regalloc to emit RegKills in the right places. Significantly, this means that phis are mixed with StoreReg and RegKills after regalloc. - Track variable decomposition in ssa.LocalSlots. - After the SSA backend is done, analyze the result and build location lists for each LocalSlot. - After assembly is done, update the location lists with the assembled PC offsets, recompose variables, and build DWARF location lists. Emit the list as a new linker symbol, one per function. - In the linker, aggregate the location lists into a .debug_loc section. TODO: - currently disabled for non-X86/AMD64 because there are no data tables. go build -toolexec 'toolstash -cmp' -a std succeeds. With -dwarflocationlists false: before: f02812195637909ff675782c0b46836a8ff01976 after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec benchstat -geomean /tmp/220352263 /tmp/621364410 completed 15 of 15, estimated time remaining 0s (eta 3:52PM) name old time/op new time/op delta Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14) Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15) GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14) Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15) GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15) Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13) Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15) XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15) [Geo mean] 206ms 377ms +82.86% name old user-time/op new user-time/op delta Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15) Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14) GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15) Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15) GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15) Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13) Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15) XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15) [Geo mean] 317ms 583ms +83.72% name old alloc/op new alloc/op delta Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15) Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15) GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14) Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15) GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15) Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15) Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15) XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15) [Geo mean] 42.1MB 75.0MB +78.05% name old allocs/op new allocs/op delta Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15) Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14) GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14) Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15) GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15) Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15) Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15) XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15) [Geo mean] 439k 755k +72.01% name old text-bytes new text-bytes delta HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15) name old data-bytes new data-bytes delta HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal) Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8 Reviewed-on: https://go-review.googlesource.com/41770 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2017-07-21 18:30:19 -04:00
if s.Func.dwarfInfoSym == nil {
s.Func.dwarfInfoSym = &LSym{
Type: objabi.SDWARFINFO,
}
if ctxt.Flag_locationlists {
s.Func.dwarfLocSym = &LSym{
Type: objabi.SDWARFLOC,
}
}
s.Func.dwarfRangesSym = &LSym{
Type: objabi.SDWARFRANGE,
}
s.Func.dwarfDebugLinesSym = &LSym{
Type: objabi.SDWARFLINES,
[dev.debug] cmd/compile: better DWARF with optimizations on Debuggers use DWARF information to find local variables on the stack and in registers. Prior to this CL, the DWARF information for functions claimed that all variables were on the stack at all times. That's incorrect when optimizations are enabled, and results in debuggers showing data that is out of date or complete gibberish. After this CL, the compiler is capable of representing variable locations more accurately, and attempts to do so. Due to limitations of the SSA backend, it's not possible to be completely correct. There are a number of problems in the current design. One of the easier to understand is that variable names currently must be attached to an SSA value, but not all assignments in the source code actually result in machine code. For example: type myint int var a int b := myint(int) and b := (*uint64)(unsafe.Pointer(a)) don't generate machine code because the underlying representation is the same, so the correct value of b will not be set when the user would expect. Generating the more precise debug information is behind a flag, dwarflocationlists. Because of the issues described above, setting the flag may not make the debugging experience much better, and may actually make it worse in cases where the variable actually is on the stack and the more complicated analysis doesn't realize it. A number of changes are included: - Add a new pseudo-instruction, RegKill, which indicates that the value in the register has been clobbered. - Adjust regalloc to emit RegKills in the right places. Significantly, this means that phis are mixed with StoreReg and RegKills after regalloc. - Track variable decomposition in ssa.LocalSlots. - After the SSA backend is done, analyze the result and build location lists for each LocalSlot. - After assembly is done, update the location lists with the assembled PC offsets, recompose variables, and build DWARF location lists. Emit the list as a new linker symbol, one per function. - In the linker, aggregate the location lists into a .debug_loc section. TODO: - currently disabled for non-X86/AMD64 because there are no data tables. go build -toolexec 'toolstash -cmp' -a std succeeds. With -dwarflocationlists false: before: f02812195637909ff675782c0b46836a8ff01976 after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec benchstat -geomean /tmp/220352263 /tmp/621364410 completed 15 of 15, estimated time remaining 0s (eta 3:52PM) name old time/op new time/op delta Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14) Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15) GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14) Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15) GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15) Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13) Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15) XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15) [Geo mean] 206ms 377ms +82.86% name old user-time/op new user-time/op delta Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15) Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14) GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15) Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15) GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15) Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13) Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15) XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15) [Geo mean] 317ms 583ms +83.72% name old alloc/op new alloc/op delta Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15) Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15) GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14) Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15) GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15) Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15) Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15) XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15) [Geo mean] 42.1MB 75.0MB +78.05% name old allocs/op new allocs/op delta Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15) Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14) GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14) Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15) GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15) Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15) Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15) XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15) [Geo mean] 439k 755k +72.01% name old text-bytes new text-bytes delta HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15) name old data-bytes new data-bytes delta HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal) Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8 Reviewed-on: https://go-review.googlesource.com/41770 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2017-07-21 18:30:19 -04:00
}
if s.WasInlined() {
s.Func.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s)
}
}
cmd/compile: remove isStmt symbol from FuncInfo As promised in CL 188238, removing the obsolete symbol. Here are the latest stats. This is baselined at "e53edafb66" with only these changes applied, run on magna.cam. The linker looks straight better (in memory and speed). There is still a change I'm working on walking the progs to generate the debug_lines data in the compiler. That will likely result in a compiler speedup. name old time/op new time/op delta Template 324ms ± 3% 317ms ± 3% -2.07% (p=0.043 n=10+10) Unicode 142ms ± 4% 144ms ± 3% ~ (p=0.393 n=10+10) GoTypes 1.05s ± 2% 1.07s ± 2% +1.59% (p=0.019 n=9+9) Compiler 4.09s ± 2% 4.11s ± 1% ~ (p=0.218 n=10+10) SSA 12.5s ± 1% 12.7s ± 1% +1.00% (p=0.035 n=10+10) Flate 199ms ± 7% 203ms ± 5% ~ (p=0.481 n=10+10) GoParser 245ms ± 3% 246ms ± 5% ~ (p=0.780 n=9+10) Reflect 672ms ± 4% 688ms ± 3% +2.42% (p=0.015 n=10+10) Tar 280ms ± 4% 284ms ± 4% ~ (p=0.123 n=10+10) XML 379ms ± 4% 381ms ± 2% ~ (p=0.529 n=10+10) LinkCompiler 1.16s ± 4% 1.12s ± 2% -3.03% (p=0.001 n=10+9) ExternalLinkCompiler 2.28s ± 3% 2.23s ± 3% -2.51% (p=0.011 n=8+9) LinkWithoutDebugCompiler 686ms ± 9% 667ms ± 2% ~ (p=0.277 n=9+8) StdCmd 14.1s ± 1% 14.0s ± 1% ~ (p=0.739 n=10+10) name old user-time/op new user-time/op delta Template 604ms ±23% 564ms ± 7% ~ (p=0.661 n=10+9) Unicode 429ms ±40% 418ms ±37% ~ (p=0.579 n=10+10) GoTypes 2.43s ±12% 2.51s ± 7% ~ (p=0.393 n=10+10) Compiler 9.22s ± 3% 9.27s ± 3% ~ (p=0.720 n=9+10) SSA 26.3s ± 3% 26.6s ± 2% ~ (p=0.579 n=10+10) Flate 328ms ±19% 333ms ±12% ~ (p=0.842 n=10+9) GoParser 387ms ± 5% 378ms ± 9% ~ (p=0.356 n=9+10) Reflect 1.36s ±20% 1.43s ±21% ~ (p=0.631 n=10+10) Tar 469ms ±12% 471ms ±21% ~ (p=0.497 n=9+10) XML 685ms ±18% 698ms ±19% ~ (p=0.739 n=10+10) LinkCompiler 1.86s ±10% 1.87s ±11% ~ (p=0.968 n=10+9) ExternalLinkCompiler 3.20s ±13% 3.01s ± 8% -5.70% (p=0.046 n=8+9) LinkWithoutDebugCompiler 1.08s ±15% 1.09s ±20% ~ (p=0.579 n=10+10) name old alloc/op new alloc/op delta Template 36.3MB ± 0% 36.4MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 28.5MB ± 0% 28.5MB ± 0% ~ (p=0.165 n=10+10) GoTypes 120MB ± 0% 121MB ± 0% +0.29% (p=0.000 n=9+10) Compiler 546MB ± 0% 548MB ± 0% +0.32% (p=0.000 n=10+10) SSA 1.84GB ± 0% 1.85GB ± 0% +0.49% (p=0.000 n=10+10) Flate 22.9MB ± 0% 23.0MB ± 0% +0.25% (p=0.000 n=10+10) GoParser 27.8MB ± 0% 27.9MB ± 0% +0.25% (p=0.000 n=10+8) Reflect 77.5MB ± 0% 77.7MB ± 0% +0.27% (p=0.000 n=9+9) Tar 34.5MB ± 0% 34.6MB ± 0% +0.23% (p=0.000 n=10+10) XML 44.2MB ± 0% 44.4MB ± 0% +0.32% (p=0.000 n=10+10) LinkCompiler 239MB ± 0% 230MB ± 0% -3.86% (p=0.000 n=10+10) ExternalLinkCompiler 243MB ± 0% 243MB ± 0% +0.22% (p=0.000 n=10+10) LinkWithoutDebugCompiler 164MB ± 0% 155MB ± 0% -5.45% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 371k ± 0% 372k ± 0% +0.44% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% +0.05% (p=0.000 n=10+10) GoTypes 1.32M ± 0% 1.32M ± 0% +0.46% (p=0.000 n=10+10) Compiler 5.34M ± 0% 5.37M ± 0% +0.59% (p=0.000 n=10+10) SSA 17.6M ± 0% 17.7M ± 0% +0.63% (p=0.000 n=10+10) Flate 233k ± 0% 234k ± 0% +0.48% (p=0.000 n=10+10) GoParser 309k ± 0% 310k ± 0% +0.40% (p=0.000 n=10+10) Reflect 964k ± 0% 969k ± 0% +0.54% (p=0.000 n=10+10) Tar 346k ± 0% 348k ± 0% +0.48% (p=0.000 n=10+9) XML 424k ± 0% 426k ± 0% +0.51% (p=0.000 n=10+10) LinkCompiler 751k ± 0% 645k ± 0% -14.13% (p=0.000 n=10+10) ExternalLinkCompiler 1.79M ± 0% 1.69M ± 0% -5.30% (p=0.000 n=10+10) LinkWithoutDebugCompiler 217k ± 0% 222k ± 0% +2.02% (p=0.000 n=10+10) name old object-bytes new object-bytes delta Template 547kB ± 0% 559kB ± 0% +2.17% (p=0.000 n=10+10) Unicode 215kB ± 0% 216kB ± 0% +0.60% (p=0.000 n=10+10) GoTypes 1.99MB ± 0% 2.03MB ± 0% +2.02% (p=0.000 n=10+10) Compiler 7.86MB ± 0% 8.07MB ± 0% +2.73% (p=0.000 n=10+10) SSA 26.4MB ± 0% 27.2MB ± 0% +3.27% (p=0.000 n=10+10) Flate 337kB ± 0% 343kB ± 0% +2.02% (p=0.000 n=10+10) GoParser 432kB ± 0% 441kB ± 0% +2.11% (p=0.000 n=10+10) Reflect 1.33MB ± 0% 1.36MB ± 0% +1.87% (p=0.000 n=10+10) Tar 477kB ± 0% 487kB ± 0% +2.24% (p=0.000 n=10+10) XML 617kB ± 0% 632kB ± 0% +2.33% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 109kB ± 0% +0.09% (p=0.000 n=10+10) SSA 137kB ± 0% 137kB ± 0% +0.03% (p=0.000 n=10+10) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 760kB ± 0% 760kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.11MB ± 0% 1.13MB ± 0% +1.10% (p=0.000 n=10+10) CmdGoSize 14.9MB ± 0% 15.0MB ± 0% +0.77% (p=0.000 n=10+10) Change-Id: I42e6087cd6231dbdcfff5464e46d373474e455e1 Reviewed-on: https://go-review.googlesource.com/c/go/+/192417 Reviewed-by: Austin Clements <austin@google.com> Run-TryBot: Austin Clements <austin@google.com>
2019-08-29 16:35:50 -04:00
return s.Func.dwarfInfoSym, s.Func.dwarfLocSym, s.Func.dwarfRangesSym, s.Func.dwarfAbsFnSym, s.Func.dwarfDebugLinesSym
}
func (s *LSym) Length(dwarfContext interface{}) int64 {
return s.Size
}
// fileSymbol returns a symbol corresponding to the source file of the
// first instruction (prog) of the specified function. This will
// presumably be the file in which the function is defined.
func (ctxt *Link) fileSymbol(fn *LSym) *LSym {
p := fn.Func.Text
if p != nil {
f, _ := linkgetlineFromPos(ctxt, p.Pos)
fsym := ctxt.Lookup(f)
return fsym
}
return nil
}
// populateDWARF fills in the DWARF Debugging Information Entries for
// TEXT symbol 's'. The various DWARF symbols must already have been
// initialized in InitTextSym.
func (ctxt *Link) populateDWARF(curfn interface{}, s *LSym, myimportpath string) {
cmd/compile: remove isStmt symbol from FuncInfo As promised in CL 188238, removing the obsolete symbol. Here are the latest stats. This is baselined at "e53edafb66" with only these changes applied, run on magna.cam. The linker looks straight better (in memory and speed). There is still a change I'm working on walking the progs to generate the debug_lines data in the compiler. That will likely result in a compiler speedup. name old time/op new time/op delta Template 324ms ± 3% 317ms ± 3% -2.07% (p=0.043 n=10+10) Unicode 142ms ± 4% 144ms ± 3% ~ (p=0.393 n=10+10) GoTypes 1.05s ± 2% 1.07s ± 2% +1.59% (p=0.019 n=9+9) Compiler 4.09s ± 2% 4.11s ± 1% ~ (p=0.218 n=10+10) SSA 12.5s ± 1% 12.7s ± 1% +1.00% (p=0.035 n=10+10) Flate 199ms ± 7% 203ms ± 5% ~ (p=0.481 n=10+10) GoParser 245ms ± 3% 246ms ± 5% ~ (p=0.780 n=9+10) Reflect 672ms ± 4% 688ms ± 3% +2.42% (p=0.015 n=10+10) Tar 280ms ± 4% 284ms ± 4% ~ (p=0.123 n=10+10) XML 379ms ± 4% 381ms ± 2% ~ (p=0.529 n=10+10) LinkCompiler 1.16s ± 4% 1.12s ± 2% -3.03% (p=0.001 n=10+9) ExternalLinkCompiler 2.28s ± 3% 2.23s ± 3% -2.51% (p=0.011 n=8+9) LinkWithoutDebugCompiler 686ms ± 9% 667ms ± 2% ~ (p=0.277 n=9+8) StdCmd 14.1s ± 1% 14.0s ± 1% ~ (p=0.739 n=10+10) name old user-time/op new user-time/op delta Template 604ms ±23% 564ms ± 7% ~ (p=0.661 n=10+9) Unicode 429ms ±40% 418ms ±37% ~ (p=0.579 n=10+10) GoTypes 2.43s ±12% 2.51s ± 7% ~ (p=0.393 n=10+10) Compiler 9.22s ± 3% 9.27s ± 3% ~ (p=0.720 n=9+10) SSA 26.3s ± 3% 26.6s ± 2% ~ (p=0.579 n=10+10) Flate 328ms ±19% 333ms ±12% ~ (p=0.842 n=10+9) GoParser 387ms ± 5% 378ms ± 9% ~ (p=0.356 n=9+10) Reflect 1.36s ±20% 1.43s ±21% ~ (p=0.631 n=10+10) Tar 469ms ±12% 471ms ±21% ~ (p=0.497 n=9+10) XML 685ms ±18% 698ms ±19% ~ (p=0.739 n=10+10) LinkCompiler 1.86s ±10% 1.87s ±11% ~ (p=0.968 n=10+9) ExternalLinkCompiler 3.20s ±13% 3.01s ± 8% -5.70% (p=0.046 n=8+9) LinkWithoutDebugCompiler 1.08s ±15% 1.09s ±20% ~ (p=0.579 n=10+10) name old alloc/op new alloc/op delta Template 36.3MB ± 0% 36.4MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 28.5MB ± 0% 28.5MB ± 0% ~ (p=0.165 n=10+10) GoTypes 120MB ± 0% 121MB ± 0% +0.29% (p=0.000 n=9+10) Compiler 546MB ± 0% 548MB ± 0% +0.32% (p=0.000 n=10+10) SSA 1.84GB ± 0% 1.85GB ± 0% +0.49% (p=0.000 n=10+10) Flate 22.9MB ± 0% 23.0MB ± 0% +0.25% (p=0.000 n=10+10) GoParser 27.8MB ± 0% 27.9MB ± 0% +0.25% (p=0.000 n=10+8) Reflect 77.5MB ± 0% 77.7MB ± 0% +0.27% (p=0.000 n=9+9) Tar 34.5MB ± 0% 34.6MB ± 0% +0.23% (p=0.000 n=10+10) XML 44.2MB ± 0% 44.4MB ± 0% +0.32% (p=0.000 n=10+10) LinkCompiler 239MB ± 0% 230MB ± 0% -3.86% (p=0.000 n=10+10) ExternalLinkCompiler 243MB ± 0% 243MB ± 0% +0.22% (p=0.000 n=10+10) LinkWithoutDebugCompiler 164MB ± 0% 155MB ± 0% -5.45% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 371k ± 0% 372k ± 0% +0.44% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% +0.05% (p=0.000 n=10+10) GoTypes 1.32M ± 0% 1.32M ± 0% +0.46% (p=0.000 n=10+10) Compiler 5.34M ± 0% 5.37M ± 0% +0.59% (p=0.000 n=10+10) SSA 17.6M ± 0% 17.7M ± 0% +0.63% (p=0.000 n=10+10) Flate 233k ± 0% 234k ± 0% +0.48% (p=0.000 n=10+10) GoParser 309k ± 0% 310k ± 0% +0.40% (p=0.000 n=10+10) Reflect 964k ± 0% 969k ± 0% +0.54% (p=0.000 n=10+10) Tar 346k ± 0% 348k ± 0% +0.48% (p=0.000 n=10+9) XML 424k ± 0% 426k ± 0% +0.51% (p=0.000 n=10+10) LinkCompiler 751k ± 0% 645k ± 0% -14.13% (p=0.000 n=10+10) ExternalLinkCompiler 1.79M ± 0% 1.69M ± 0% -5.30% (p=0.000 n=10+10) LinkWithoutDebugCompiler 217k ± 0% 222k ± 0% +2.02% (p=0.000 n=10+10) name old object-bytes new object-bytes delta Template 547kB ± 0% 559kB ± 0% +2.17% (p=0.000 n=10+10) Unicode 215kB ± 0% 216kB ± 0% +0.60% (p=0.000 n=10+10) GoTypes 1.99MB ± 0% 2.03MB ± 0% +2.02% (p=0.000 n=10+10) Compiler 7.86MB ± 0% 8.07MB ± 0% +2.73% (p=0.000 n=10+10) SSA 26.4MB ± 0% 27.2MB ± 0% +3.27% (p=0.000 n=10+10) Flate 337kB ± 0% 343kB ± 0% +2.02% (p=0.000 n=10+10) GoParser 432kB ± 0% 441kB ± 0% +2.11% (p=0.000 n=10+10) Reflect 1.33MB ± 0% 1.36MB ± 0% +1.87% (p=0.000 n=10+10) Tar 477kB ± 0% 487kB ± 0% +2.24% (p=0.000 n=10+10) XML 617kB ± 0% 632kB ± 0% +2.33% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 18.5kB ± 0% 18.5kB ± 0% ~ (all equal) Unicode 7.92kB ± 0% 7.92kB ± 0% ~ (all equal) GoTypes 35.0kB ± 0% 35.0kB ± 0% ~ (all equal) Compiler 109kB ± 0% 109kB ± 0% +0.09% (p=0.000 n=10+10) SSA 137kB ± 0% 137kB ± 0% +0.03% (p=0.000 n=10+10) Flate 4.89kB ± 0% 4.89kB ± 0% ~ (all equal) GoParser 8.49kB ± 0% 8.49kB ± 0% ~ (all equal) Reflect 11.4kB ± 0% 11.4kB ± 0% ~ (all equal) Tar 10.5kB ± 0% 10.5kB ± 0% ~ (all equal) XML 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 760kB ± 0% 760kB ± 0% ~ (all equal) CmdGoSize 10.8MB ± 0% 10.8MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.11MB ± 0% 1.13MB ± 0% +1.10% (p=0.000 n=10+10) CmdGoSize 14.9MB ± 0% 15.0MB ± 0% +0.77% (p=0.000 n=10+10) Change-Id: I42e6087cd6231dbdcfff5464e46d373474e455e1 Reviewed-on: https://go-review.googlesource.com/c/go/+/192417 Reviewed-by: Austin Clements <austin@google.com> Run-TryBot: Austin Clements <austin@google.com>
2019-08-29 16:35:50 -04:00
info, loc, ranges, absfunc, lines := ctxt.dwarfSym(s)
[dev.debug] cmd/compile: better DWARF with optimizations on Debuggers use DWARF information to find local variables on the stack and in registers. Prior to this CL, the DWARF information for functions claimed that all variables were on the stack at all times. That's incorrect when optimizations are enabled, and results in debuggers showing data that is out of date or complete gibberish. After this CL, the compiler is capable of representing variable locations more accurately, and attempts to do so. Due to limitations of the SSA backend, it's not possible to be completely correct. There are a number of problems in the current design. One of the easier to understand is that variable names currently must be attached to an SSA value, but not all assignments in the source code actually result in machine code. For example: type myint int var a int b := myint(int) and b := (*uint64)(unsafe.Pointer(a)) don't generate machine code because the underlying representation is the same, so the correct value of b will not be set when the user would expect. Generating the more precise debug information is behind a flag, dwarflocationlists. Because of the issues described above, setting the flag may not make the debugging experience much better, and may actually make it worse in cases where the variable actually is on the stack and the more complicated analysis doesn't realize it. A number of changes are included: - Add a new pseudo-instruction, RegKill, which indicates that the value in the register has been clobbered. - Adjust regalloc to emit RegKills in the right places. Significantly, this means that phis are mixed with StoreReg and RegKills after regalloc. - Track variable decomposition in ssa.LocalSlots. - After the SSA backend is done, analyze the result and build location lists for each LocalSlot. - After assembly is done, update the location lists with the assembled PC offsets, recompose variables, and build DWARF location lists. Emit the list as a new linker symbol, one per function. - In the linker, aggregate the location lists into a .debug_loc section. TODO: - currently disabled for non-X86/AMD64 because there are no data tables. go build -toolexec 'toolstash -cmp' -a std succeeds. With -dwarflocationlists false: before: f02812195637909ff675782c0b46836a8ff01976 after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec benchstat -geomean /tmp/220352263 /tmp/621364410 completed 15 of 15, estimated time remaining 0s (eta 3:52PM) name old time/op new time/op delta Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14) Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15) GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14) Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15) GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15) Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13) Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15) XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15) [Geo mean] 206ms 377ms +82.86% name old user-time/op new user-time/op delta Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15) Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14) GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15) Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15) GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15) Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13) Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15) XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15) [Geo mean] 317ms 583ms +83.72% name old alloc/op new alloc/op delta Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15) Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15) GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14) Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15) GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15) Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15) Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15) XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15) [Geo mean] 42.1MB 75.0MB +78.05% name old allocs/op new allocs/op delta Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15) Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14) GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14) Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15) GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15) Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15) Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15) XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15) [Geo mean] 439k 755k +72.01% name old text-bytes new text-bytes delta HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15) name old data-bytes new data-bytes delta HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal) Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8 Reviewed-on: https://go-review.googlesource.com/41770 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2017-07-21 18:30:19 -04:00
if info.Size != 0 {
ctxt.Diag("makeFuncDebugEntry double process %v", s)
}
var scopes []dwarf.Scope
var inlcalls dwarf.InlCalls
if ctxt.DebugInfo != nil {
scopes, inlcalls = ctxt.DebugInfo(s, info, curfn)
}
var err error
dwctxt := dwCtxt{ctxt}
filesym := ctxt.fileSymbol(s)
fnstate := &dwarf.FnState{
Name: s.Name,
Importpath: myimportpath,
Info: info,
Filesym: filesym,
Loc: loc,
Ranges: ranges,
Absfn: absfunc,
StartPC: s,
Size: s.Size,
External: !s.Static(),
Scopes: scopes,
InlCalls: inlcalls,
UseBASEntries: ctxt.UseBASEntries,
}
if absfunc != nil {
err = dwarf.PutAbstractFunc(dwctxt, fnstate)
if err != nil {
ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err)
}
err = dwarf.PutConcreteFunc(dwctxt, fnstate)
} else {
err = dwarf.PutDefaultFunc(dwctxt, fnstate)
}
if err != nil {
ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err)
}
// Fill in the debug lines symbol.
ctxt.generateDebugLinesSymbol(s, lines)
}
// DwarfIntConst creates a link symbol for an integer constant with the
// given name, type and value.
func (ctxt *Link) DwarfIntConst(myimportpath, name, typename string, val int64) {
if myimportpath == "" {
return
}
s := ctxt.LookupInit(dwarf.ConstInfoPrefix+myimportpath, func(s *LSym) {
s.Type = objabi.SDWARFINFO
ctxt.Data = append(ctxt.Data, s)
})
dwarf.PutIntConst(dwCtxt{ctxt}, s, ctxt.Lookup(dwarf.InfoPrefix+typename), myimportpath+"."+name, val)
}
func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath string) {
absfn := ctxt.DwFixups.AbsFuncDwarfSym(s)
if absfn.Size != 0 {
ctxt.Diag("internal error: DwarfAbstractFunc double process %v", s)
}
if s.Func == nil {
s.Func = new(FuncInfo)
}
scopes, _ := ctxt.DebugInfo(s, absfn, curfn)
dwctxt := dwCtxt{ctxt}
filesym := ctxt.fileSymbol(s)
fnstate := dwarf.FnState{
Name: s.Name,
Importpath: myimportpath,
Info: absfn,
Filesym: filesym,
Absfn: absfn,
External: !s.Static(),
Scopes: scopes,
UseBASEntries: ctxt.UseBASEntries,
}
if err := dwarf.PutAbstractFunc(dwctxt, &fnstate); err != nil {
ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err)
}
}
// This table is designed to aid in the creation of references between
// DWARF subprogram DIEs.
//
// In most cases when one DWARF DIE has to refer to another DWARF DIE,
// the target of the reference has an LSym, which makes it easy to use
// the existing relocation mechanism. For DWARF inlined routine DIEs,
// however, the subprogram DIE has to refer to a child
// parameter/variable DIE of the abstract subprogram. This child DIE
// doesn't have an LSym, and also of interest is the fact that when
// DWARF generation is happening for inlined function F within caller
// G, it's possible that DWARF generation hasn't happened yet for F,
// so there is no way to know the offset of a child DIE within F's
// abstract function. Making matters more complex, each inlined
// instance of F may refer to a subset of the original F's variables
// (depending on what happens with optimization, some vars may be
// eliminated).
//
// The fixup table below helps overcome this hurdle. At the point
// where a parameter/variable reference is made (via a call to
// "ReferenceChildDIE"), a fixup record is generate that records
// the relocation that is targeting that child variable. At a later
// point when the abstract function DIE is emitted, there will be
// a call to "RegisterChildDIEOffsets", at which point the offsets
// needed to apply fixups are captured. Finally, once the parallel
// portion of the compilation is done, fixups can actually be applied
// during the "Finalize" method (this can't be done during the
// parallel portion of the compile due to the possibility of data
// races).
//
// This table is also used to record the "precursor" function node for
// each function that is the target of an inline -- child DIE references
// have to be made with respect to the original pre-optimization
// version of the function (to allow for the fact that each inlined
// body may be optimized differently).
type DwarfFixupTable struct {
ctxt *Link
mu sync.Mutex
symtab map[*LSym]int // maps abstract fn LSYM to index in svec
svec []symFixups
precursor map[*LSym]fnState // maps fn Lsym to precursor Node, absfn sym
}
type symFixups struct {
fixups []relFixup
doffsets []declOffset
inlIndex int32
defseen bool
}
type declOffset struct {
// Index of variable within DCL list of pre-optimization function
dclIdx int32
// Offset of var's child DIE with respect to containing subprogram DIE
offset int32
}
type relFixup struct {
refsym *LSym
relidx int32
dclidx int32
}
type fnState struct {
// precursor function (really *gc.Node)
precursor interface{}
// abstract function symbol
absfn *LSym
}
func NewDwarfFixupTable(ctxt *Link) *DwarfFixupTable {
return &DwarfFixupTable{
ctxt: ctxt,
symtab: make(map[*LSym]int),
precursor: make(map[*LSym]fnState),
}
}
func (ft *DwarfFixupTable) GetPrecursorFunc(s *LSym) interface{} {
if fnstate, found := ft.precursor[s]; found {
return fnstate.precursor
}
return nil
}
func (ft *DwarfFixupTable) SetPrecursorFunc(s *LSym, fn interface{}) {
if _, found := ft.precursor[s]; found {
ft.ctxt.Diag("internal error: DwarfFixupTable.SetPrecursorFunc double call on %v", s)
}
// initialize abstract function symbol now. This is done here so
// as to avoid data races later on during the parallel portion of
// the back end.
absfn := ft.ctxt.LookupDerived(s, dwarf.InfoPrefix+s.Name+dwarf.AbstractFuncSuffix)
absfn.Set(AttrDuplicateOK, true)
absfn.Type = objabi.SDWARFINFO
ft.ctxt.Data = append(ft.ctxt.Data, absfn)
// In the case of "late" inlining (inlines that happen during
// wrapper generation as opposed to the main inlining phase) it's
// possible that we didn't cache the abstract function sym for the
// text symbol -- do so now if needed. See issue 38068.
if s.Func != nil && s.Func.dwarfAbsFnSym == nil {
s.Func.dwarfAbsFnSym = absfn
}
ft.precursor[s] = fnState{precursor: fn, absfn: absfn}
}
// Make a note of a child DIE reference: relocation 'ridx' within symbol 's'
// is targeting child 'c' of DIE with symbol 'tgt'.
func (ft *DwarfFixupTable) ReferenceChildDIE(s *LSym, ridx int, tgt *LSym, dclidx int, inlIndex int) {
// Protect against concurrent access if multiple backend workers
ft.mu.Lock()
defer ft.mu.Unlock()
// Create entry for symbol if not already present.
idx, found := ft.symtab[tgt]
if !found {
ft.svec = append(ft.svec, symFixups{inlIndex: int32(inlIndex)})
idx = len(ft.svec) - 1
ft.symtab[tgt] = idx
}
// Do we have child DIE offsets available? If so, then apply them,
// otherwise create a fixup record.
sf := &ft.svec[idx]
if len(sf.doffsets) > 0 {
found := false
for _, do := range sf.doffsets {
if do.dclIdx == int32(dclidx) {
off := do.offset
s.R[ridx].Add += int64(off)
found = true
break
}
}
if !found {
ft.ctxt.Diag("internal error: DwarfFixupTable.ReferenceChildDIE unable to locate child DIE offset for dclIdx=%d src=%v tgt=%v", dclidx, s, tgt)
}
} else {
sf.fixups = append(sf.fixups, relFixup{s, int32(ridx), int32(dclidx)})
}
}
// Called once DWARF generation is complete for a given abstract function,
// whose children might have been referenced via a call above. Stores
// the offsets for any child DIEs (vars, params) so that they can be
// consumed later in on DwarfFixupTable.Finalize, which applies any
// outstanding fixups.
func (ft *DwarfFixupTable) RegisterChildDIEOffsets(s *LSym, vars []*dwarf.Var, coffsets []int32) {
// Length of these two slices should agree
if len(vars) != len(coffsets) {
ft.ctxt.Diag("internal error: RegisterChildDIEOffsets vars/offsets length mismatch")
return
}
// Generate the slice of declOffset's based in vars/coffsets
doffsets := make([]declOffset, len(coffsets))
for i := range coffsets {
doffsets[i].dclIdx = vars[i].ChildIndex
doffsets[i].offset = coffsets[i]
}
ft.mu.Lock()
defer ft.mu.Unlock()
// Store offsets for this symbol.
idx, found := ft.symtab[s]
if !found {
sf := symFixups{inlIndex: -1, defseen: true, doffsets: doffsets}
ft.svec = append(ft.svec, sf)
ft.symtab[s] = len(ft.svec) - 1
} else {
sf := &ft.svec[idx]
sf.doffsets = doffsets
sf.defseen = true
}
}
func (ft *DwarfFixupTable) processFixups(slot int, s *LSym) {
sf := &ft.svec[slot]
for _, f := range sf.fixups {
dfound := false
for _, doffset := range sf.doffsets {
if doffset.dclIdx == f.dclidx {
f.refsym.R[f.relidx].Add += int64(doffset.offset)
dfound = true
break
}
}
if !dfound {
ft.ctxt.Diag("internal error: DwarfFixupTable has orphaned fixup on %v targeting %v relidx=%d dclidx=%d", f.refsym, s, f.relidx, f.dclidx)
}
}
}
// return the LSym corresponding to the 'abstract subprogram' DWARF
// info entry for a function.
func (ft *DwarfFixupTable) AbsFuncDwarfSym(fnsym *LSym) *LSym {
// Protect against concurrent access if multiple backend workers
ft.mu.Lock()
defer ft.mu.Unlock()
if fnstate, found := ft.precursor[fnsym]; found {
return fnstate.absfn
}
ft.ctxt.Diag("internal error: AbsFuncDwarfSym requested for %v, not seen during inlining", fnsym)
return nil
}
// Called after all functions have been compiled; the main job of this
// function is to identify cases where there are outstanding fixups.
// This scenario crops up when we have references to variables of an
// inlined routine, but that routine is defined in some other package.
// This helper walks through and locate these fixups, then invokes a
// helper to create an abstract subprogram DIE for each one.
func (ft *DwarfFixupTable) Finalize(myimportpath string, trace bool) {
if trace {
ft.ctxt.Logf("DwarfFixupTable.Finalize invoked for %s\n", myimportpath)
}
// Collect up the keys from the precursor map, then sort the
// resulting list (don't want to rely on map ordering here).
fns := make([]*LSym, len(ft.precursor))
idx := 0
for fn := range ft.precursor {
fns[idx] = fn
idx++
}
sort.Sort(BySymName(fns))
// Should not be called during parallel portion of compilation.
if ft.ctxt.InParallel {
ft.ctxt.Diag("internal error: DwarfFixupTable.Finalize call during parallel backend")
}
// Generate any missing abstract functions.
for _, s := range fns {
absfn := ft.AbsFuncDwarfSym(s)
slot, found := ft.symtab[absfn]
if !found || !ft.svec[slot].defseen {
ft.ctxt.GenAbstractFunc(s)
}
}
// Apply fixups.
for _, s := range fns {
absfn := ft.AbsFuncDwarfSym(s)
slot, found := ft.symtab[absfn]
if !found {
ft.ctxt.Diag("internal error: DwarfFixupTable.Finalize orphan abstract function for %v", s)
} else {
ft.processFixups(slot, s)
}
}
}
type BySymName []*LSym
func (s BySymName) Len() int { return len(s) }
func (s BySymName) Less(i, j int) bool { return s[i].Name < s[j].Name }
func (s BySymName) Swap(i, j int) { s[i], s[j] = s[j], s[i] }