go/src/cmd/internal/obj/util.go

491 lines
10 KiB
Go
Raw Normal View History

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package obj
import (
"bytes"
"cmd/internal/objabi"
"fmt"
"strings"
)
const REG_NONE = 0
cmd/compile: add line numbers to values & blocks in ssa.html In order to improve the line numbering for debuggers, it's necessary to trace lines through compilation. This makes it (much) easier to follow. The format of the last column of the ssa.html output was also changed to reduce the spamminess of the file name, which is usually the same and makes it far harder to read instructions and line numbers, and to make it wider and also able to break words when wrapping (long path names still can push off the end otherwise; side-to-side scrolling was tried but was more annoying than the occasional wrapped line). Sample output now, where [...] is elision for sake of making the CL character-counter happy -- and the (##) line numbers are rendered in italics and a smaller font (11 point) under control of a CSS class "line-number". genssa # /Users/drchase/[...]/ssa/testdata/hist.go 00000 (35) TEXT "".main(SB) 00001 (35) FUNCDATA $0, gclocals·7be4bb[...]1e8b(SB) 00002 (35) FUNCDATA $1, gclocals·9ab98a[...]4568(SB) v920 00003 (36) LEAQ ""..autotmp_31-640(SP), DI v858 00004 (36) XORPS X0, X0 v6 00005 (36) LEAQ -48(DI), DI v6 00006 (36) DUFFZERO $277 v576 00007 (36) LEAQ ""..autotmp_31-640(SP), AX v10 00008 (36) TESTB AX, (AX) b1 00009 (36) JMP 10 and from an earlier phase: b18: ← b17 v242 (47) = Copy <mem> v238 v243 (47) = VarKill <mem> {.autotmp_16} v242 v244 (48) = Addr <**bufio.Scanner> {scanner} v2 v245 (48) = Load <*bufio.Scanner> v244 v243 [...] v279 (49) = Store <mem> {int64} v277 v276 v278 v280 (49) = Addr <*error> {.autotmp_18} v2 v281 (49) = Load <error> v280 v279 v282 (49) = Addr <*error> {err} v2 v283 (49) = VarDef <mem> {err} v279 v284 (49) = Store <mem> {error} v282 v281 v283 v285 (47) = VarKill <mem> {.autotmp_18} v284 v286 (47) = VarKill <mem> {.autotmp_17} v285 v287 (50) = Addr <*error> {err} v2 v288 (50) = Load <error> v287 v286 v289 (50) = NeqInter <bool> v288 v51 If v289 → b21 b22 (line 50) Change-Id: I3f46310918f965761f59e6f03ea53067237c28a8 Reviewed-on: https://go-review.googlesource.com/69591 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-10 14:44:15 -04:00
// Line returns a string containing the filename and line number for p
func (p *Prog) Line() string {
return p.Ctxt.OutermostPos(p.Pos).Format(false)
}
cmd/compile: add line numbers to values & blocks in ssa.html In order to improve the line numbering for debuggers, it's necessary to trace lines through compilation. This makes it (much) easier to follow. The format of the last column of the ssa.html output was also changed to reduce the spamminess of the file name, which is usually the same and makes it far harder to read instructions and line numbers, and to make it wider and also able to break words when wrapping (long path names still can push off the end otherwise; side-to-side scrolling was tried but was more annoying than the occasional wrapped line). Sample output now, where [...] is elision for sake of making the CL character-counter happy -- and the (##) line numbers are rendered in italics and a smaller font (11 point) under control of a CSS class "line-number". genssa # /Users/drchase/[...]/ssa/testdata/hist.go 00000 (35) TEXT "".main(SB) 00001 (35) FUNCDATA $0, gclocals·7be4bb[...]1e8b(SB) 00002 (35) FUNCDATA $1, gclocals·9ab98a[...]4568(SB) v920 00003 (36) LEAQ ""..autotmp_31-640(SP), DI v858 00004 (36) XORPS X0, X0 v6 00005 (36) LEAQ -48(DI), DI v6 00006 (36) DUFFZERO $277 v576 00007 (36) LEAQ ""..autotmp_31-640(SP), AX v10 00008 (36) TESTB AX, (AX) b1 00009 (36) JMP 10 and from an earlier phase: b18: ← b17 v242 (47) = Copy <mem> v238 v243 (47) = VarKill <mem> {.autotmp_16} v242 v244 (48) = Addr <**bufio.Scanner> {scanner} v2 v245 (48) = Load <*bufio.Scanner> v244 v243 [...] v279 (49) = Store <mem> {int64} v277 v276 v278 v280 (49) = Addr <*error> {.autotmp_18} v2 v281 (49) = Load <error> v280 v279 v282 (49) = Addr <*error> {err} v2 v283 (49) = VarDef <mem> {err} v279 v284 (49) = Store <mem> {error} v282 v281 v283 v285 (47) = VarKill <mem> {.autotmp_18} v284 v286 (47) = VarKill <mem> {.autotmp_17} v285 v287 (50) = Addr <*error> {err} v2 v288 (50) = Load <error> v287 v286 v289 (50) = NeqInter <bool> v288 v51 If v289 → b21 b22 (line 50) Change-Id: I3f46310918f965761f59e6f03ea53067237c28a8 Reviewed-on: https://go-review.googlesource.com/69591 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-10 14:44:15 -04:00
// LineNumber returns a string containing the line number for p's position
func (p *Prog) LineNumber() string {
pos := p.Ctxt.OutermostPos(p.Pos)
if !pos.IsKnown() {
return "?"
}
return fmt.Sprintf("%d", pos.Line())
}
// FileName returns a string containing the filename for p's position
func (p *Prog) FileName() string {
// TODO LineNumber and FileName cases don't handle full generality of positions,
// but because these are currently used only for GOSSAFUNC debugging output, that
// is okay. The intent is that "LineNumber()" yields the rapidly varying part,
// while "FileName()" yields the longer and slightly more constant material.
pos := p.Ctxt.OutermostPos(p.Pos)
if !pos.IsKnown() {
return "<unknown file name>"
}
return pos.Filename()
}
var armCondCode = []string{
".EQ",
".NE",
".CS",
".CC",
".MI",
".PL",
".VS",
".VC",
".HI",
".LS",
".GE",
".LT",
".GT",
".LE",
"",
".NV",
}
/* ARM scond byte */
const (
C_SCOND = (1 << 4) - 1
C_SBIT = 1 << 4
C_PBIT = 1 << 5
C_WBIT = 1 << 6
C_FBIT = 1 << 7
C_UBIT = 1 << 7
C_SCOND_XOR = 14
)
// CConv formats ARM condition codes.
func CConv(s uint8) string {
if s == 0 {
return ""
}
sc := armCondCode[(s&C_SCOND)^C_SCOND_XOR]
if s&C_SBIT != 0 {
sc += ".S"
}
if s&C_PBIT != 0 {
sc += ".P"
}
if s&C_WBIT != 0 {
sc += ".W"
}
if s&C_UBIT != 0 { /* ambiguous with FBIT */
sc += ".U"
}
return sc
}
func (p *Prog) String() string {
if p == nil {
return "<nil Prog>"
}
cmd/compile: add line numbers to values & blocks in ssa.html In order to improve the line numbering for debuggers, it's necessary to trace lines through compilation. This makes it (much) easier to follow. The format of the last column of the ssa.html output was also changed to reduce the spamminess of the file name, which is usually the same and makes it far harder to read instructions and line numbers, and to make it wider and also able to break words when wrapping (long path names still can push off the end otherwise; side-to-side scrolling was tried but was more annoying than the occasional wrapped line). Sample output now, where [...] is elision for sake of making the CL character-counter happy -- and the (##) line numbers are rendered in italics and a smaller font (11 point) under control of a CSS class "line-number". genssa # /Users/drchase/[...]/ssa/testdata/hist.go 00000 (35) TEXT "".main(SB) 00001 (35) FUNCDATA $0, gclocals·7be4bb[...]1e8b(SB) 00002 (35) FUNCDATA $1, gclocals·9ab98a[...]4568(SB) v920 00003 (36) LEAQ ""..autotmp_31-640(SP), DI v858 00004 (36) XORPS X0, X0 v6 00005 (36) LEAQ -48(DI), DI v6 00006 (36) DUFFZERO $277 v576 00007 (36) LEAQ ""..autotmp_31-640(SP), AX v10 00008 (36) TESTB AX, (AX) b1 00009 (36) JMP 10 and from an earlier phase: b18: ← b17 v242 (47) = Copy <mem> v238 v243 (47) = VarKill <mem> {.autotmp_16} v242 v244 (48) = Addr <**bufio.Scanner> {scanner} v2 v245 (48) = Load <*bufio.Scanner> v244 v243 [...] v279 (49) = Store <mem> {int64} v277 v276 v278 v280 (49) = Addr <*error> {.autotmp_18} v2 v281 (49) = Load <error> v280 v279 v282 (49) = Addr <*error> {err} v2 v283 (49) = VarDef <mem> {err} v279 v284 (49) = Store <mem> {error} v282 v281 v283 v285 (47) = VarKill <mem> {.autotmp_18} v284 v286 (47) = VarKill <mem> {.autotmp_17} v285 v287 (50) = Addr <*error> {err} v2 v288 (50) = Load <error> v287 v286 v289 (50) = NeqInter <bool> v288 v51 If v289 → b21 b22 (line 50) Change-Id: I3f46310918f965761f59e6f03ea53067237c28a8 Reviewed-on: https://go-review.googlesource.com/69591 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-10 14:44:15 -04:00
if p.Ctxt == nil {
return "<Prog without ctxt>"
}
return fmt.Sprintf("%.5d (%v)\t%s", p.Pc, p.Line(), p.InstructionString())
}
// InstructionString returns a string representation of the instruction without preceding
// program counter or file and line number.
func (p *Prog) InstructionString() string {
if p == nil {
return "<nil Prog>"
}
if p.Ctxt == nil {
return "<Prog without ctxt>"
}
sc := CConv(p.Scond)
var buf bytes.Buffer
cmd/compile: add line numbers to values & blocks in ssa.html In order to improve the line numbering for debuggers, it's necessary to trace lines through compilation. This makes it (much) easier to follow. The format of the last column of the ssa.html output was also changed to reduce the spamminess of the file name, which is usually the same and makes it far harder to read instructions and line numbers, and to make it wider and also able to break words when wrapping (long path names still can push off the end otherwise; side-to-side scrolling was tried but was more annoying than the occasional wrapped line). Sample output now, where [...] is elision for sake of making the CL character-counter happy -- and the (##) line numbers are rendered in italics and a smaller font (11 point) under control of a CSS class "line-number". genssa # /Users/drchase/[...]/ssa/testdata/hist.go 00000 (35) TEXT "".main(SB) 00001 (35) FUNCDATA $0, gclocals·7be4bb[...]1e8b(SB) 00002 (35) FUNCDATA $1, gclocals·9ab98a[...]4568(SB) v920 00003 (36) LEAQ ""..autotmp_31-640(SP), DI v858 00004 (36) XORPS X0, X0 v6 00005 (36) LEAQ -48(DI), DI v6 00006 (36) DUFFZERO $277 v576 00007 (36) LEAQ ""..autotmp_31-640(SP), AX v10 00008 (36) TESTB AX, (AX) b1 00009 (36) JMP 10 and from an earlier phase: b18: ← b17 v242 (47) = Copy <mem> v238 v243 (47) = VarKill <mem> {.autotmp_16} v242 v244 (48) = Addr <**bufio.Scanner> {scanner} v2 v245 (48) = Load <*bufio.Scanner> v244 v243 [...] v279 (49) = Store <mem> {int64} v277 v276 v278 v280 (49) = Addr <*error> {.autotmp_18} v2 v281 (49) = Load <error> v280 v279 v282 (49) = Addr <*error> {err} v2 v283 (49) = VarDef <mem> {err} v279 v284 (49) = Store <mem> {error} v282 v281 v283 v285 (47) = VarKill <mem> {.autotmp_18} v284 v286 (47) = VarKill <mem> {.autotmp_17} v285 v287 (50) = Addr <*error> {err} v2 v288 (50) = Load <error> v287 v286 v289 (50) = NeqInter <bool> v288 v51 If v289 → b21 b22 (line 50) Change-Id: I3f46310918f965761f59e6f03ea53067237c28a8 Reviewed-on: https://go-review.googlesource.com/69591 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-10 14:44:15 -04:00
fmt.Fprintf(&buf, "%v%s", p.As, sc)
sep := "\t"
cmd/internal/obj: change Prog.From3 to RestArgs ([]Addr) This change makes it easier to express instructions with arbitrary number of operands. Rationale: previous approach with operand "hiding" does not scale well, AVX and especially AVX512 have many instructions with 3+ operands. x86 asm backend is updated to handle up to 6 explicit operands. It also fixes issue with 4-th immediate operand type checks. All `ytab` tables are updated accordingly. Changes to non-x86 backends only include these patterns: `p.From3 = X` => `p.SetFrom3(X)` `p.From3.X = Y` => `p.GetFrom3().X = Y` Over time, other backends can adapt Prog.RestArgs and reduce the amount of workarounds. -- Performance -- x/benchmark/build: $ benchstat upstream.bench patched.bench name old time/op new time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old binary-size new binary-size delta Build-48 10.3M ± 0% 10.3M ± 0% ~ (all equal) name old build-time/op new build-time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old build-peak-RSS-bytes new build-peak-RSS-bytes delta Build-48 145MB ± 5% 148MB ± 5% ~ (p=0.218 n=10+10) name old build-user+sys-time/op new build-user+sys-time/op delta Build-48 21.0s ± 2% 21.2s ± 2% ~ (p=0.075 n=10+10) Microbenchmark shows a slight slowdown. name old time/op new time/op delta AMD64asm-4 49.5ms ± 1% 49.9ms ± 1% +0.67% (p=0.001 n=23+15) func BenchmarkAMD64asm(b *testing.B) { for i := 0; i < b.N; i++ { TestAMD64EndToEnd(nil) TestAMD64Encoder(nil) } } Change-Id: I4f1d37b5c2c966da3f2127705ccac9bff0038183 Reviewed-on: https://go-review.googlesource.com/63490 Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-09-13 14:32:08 +03:00
if p.From.Type != TYPE_NONE {
fmt.Fprintf(&buf, "%s%v", sep, Dconv(p, &p.From))
sep = ", "
}
if p.Reg != REG_NONE {
// Should not happen but might as well show it if it does.
fmt.Fprintf(&buf, "%s%v", sep, Rconv(int(p.Reg)))
sep = ", "
}
cmd/internal/obj: change Prog.From3 to RestArgs ([]Addr) This change makes it easier to express instructions with arbitrary number of operands. Rationale: previous approach with operand "hiding" does not scale well, AVX and especially AVX512 have many instructions with 3+ operands. x86 asm backend is updated to handle up to 6 explicit operands. It also fixes issue with 4-th immediate operand type checks. All `ytab` tables are updated accordingly. Changes to non-x86 backends only include these patterns: `p.From3 = X` => `p.SetFrom3(X)` `p.From3.X = Y` => `p.GetFrom3().X = Y` Over time, other backends can adapt Prog.RestArgs and reduce the amount of workarounds. -- Performance -- x/benchmark/build: $ benchstat upstream.bench patched.bench name old time/op new time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old binary-size new binary-size delta Build-48 10.3M ± 0% 10.3M ± 0% ~ (all equal) name old build-time/op new build-time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old build-peak-RSS-bytes new build-peak-RSS-bytes delta Build-48 145MB ± 5% 148MB ± 5% ~ (p=0.218 n=10+10) name old build-user+sys-time/op new build-user+sys-time/op delta Build-48 21.0s ± 2% 21.2s ± 2% ~ (p=0.075 n=10+10) Microbenchmark shows a slight slowdown. name old time/op new time/op delta AMD64asm-4 49.5ms ± 1% 49.9ms ± 1% +0.67% (p=0.001 n=23+15) func BenchmarkAMD64asm(b *testing.B) { for i := 0; i < b.N; i++ { TestAMD64EndToEnd(nil) TestAMD64Encoder(nil) } } Change-Id: I4f1d37b5c2c966da3f2127705ccac9bff0038183 Reviewed-on: https://go-review.googlesource.com/63490 Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-09-13 14:32:08 +03:00
for i := range p.RestArgs {
fmt.Fprintf(&buf, "%s%v", sep, Dconv(p, &p.RestArgs[i]))
sep = ", "
}
cmd/internal/obj: change Prog.From3 to RestArgs ([]Addr) This change makes it easier to express instructions with arbitrary number of operands. Rationale: previous approach with operand "hiding" does not scale well, AVX and especially AVX512 have many instructions with 3+ operands. x86 asm backend is updated to handle up to 6 explicit operands. It also fixes issue with 4-th immediate operand type checks. All `ytab` tables are updated accordingly. Changes to non-x86 backends only include these patterns: `p.From3 = X` => `p.SetFrom3(X)` `p.From3.X = Y` => `p.GetFrom3().X = Y` Over time, other backends can adapt Prog.RestArgs and reduce the amount of workarounds. -- Performance -- x/benchmark/build: $ benchstat upstream.bench patched.bench name old time/op new time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old binary-size new binary-size delta Build-48 10.3M ± 0% 10.3M ± 0% ~ (all equal) name old build-time/op new build-time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old build-peak-RSS-bytes new build-peak-RSS-bytes delta Build-48 145MB ± 5% 148MB ± 5% ~ (p=0.218 n=10+10) name old build-user+sys-time/op new build-user+sys-time/op delta Build-48 21.0s ± 2% 21.2s ± 2% ~ (p=0.075 n=10+10) Microbenchmark shows a slight slowdown. name old time/op new time/op delta AMD64asm-4 49.5ms ± 1% 49.9ms ± 1% +0.67% (p=0.001 n=23+15) func BenchmarkAMD64asm(b *testing.B) { for i := 0; i < b.N; i++ { TestAMD64EndToEnd(nil) TestAMD64Encoder(nil) } } Change-Id: I4f1d37b5c2c966da3f2127705ccac9bff0038183 Reviewed-on: https://go-review.googlesource.com/63490 Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-09-13 14:32:08 +03:00
cmd/internal/obj: stop storing Text flags in From3 Prior to this CL, flags such as NOSPLIT on ATEXT Progs were stored in From3.Offset. Some but not all of those flags were also duplicated into From.Sym.Attribute. This CL migrates all of those flags into From.Sym.Attribute and stops creating a From3. A side-effect of this is that printing an ATEXT Prog can no longer simply dump From3.Offset. That's kind of good, since the raw flag value wasn't very informative anyway, but it did necessitate a bunch of updates to the cmd/asm tests. The reason I'm doing this work now is that avoiding storing flags in both From.Sym and From3.Offset simplifies some other changes to fix the data race first described in CL 40254. This CL almost passes toolstash-check -all. The only changes are in cases where the assembler has decided that a function's flags may be altered, e.g. to make a function with no calls in it NOSPLIT. Prior to this CL, that information was not printed. Sample before: "".Ctz64 t=1 size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Sample after: "".Ctz64 t=1 nosplit size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), NOSPLIT, $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Observe the additional "nosplit" in the first line and the additional "NOSPLIT" in the second line. Updates #15756 Change-Id: I5c59bd8f3bdc7c780361f801d94a261f0aef3d13 Reviewed-on: https://go-review.googlesource.com/40495 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-11 15:15:04 -07:00
if p.As == ATEXT {
// If there are attributes, print them. Otherwise, skip the comma.
// In short, print one of these two:
// TEXT foo(SB), DUPOK|NOSPLIT, $0
// TEXT foo(SB), $0
s := p.From.Sym.Attribute.TextAttrString()
if s != "" {
fmt.Fprintf(&buf, "%s%s", sep, s)
sep = ", "
}
}
if p.To.Type != TYPE_NONE {
fmt.Fprintf(&buf, "%s%v", sep, Dconv(p, &p.To))
}
cmd/internal/obj: change Prog.From3 to RestArgs ([]Addr) This change makes it easier to express instructions with arbitrary number of operands. Rationale: previous approach with operand "hiding" does not scale well, AVX and especially AVX512 have many instructions with 3+ operands. x86 asm backend is updated to handle up to 6 explicit operands. It also fixes issue with 4-th immediate operand type checks. All `ytab` tables are updated accordingly. Changes to non-x86 backends only include these patterns: `p.From3 = X` => `p.SetFrom3(X)` `p.From3.X = Y` => `p.GetFrom3().X = Y` Over time, other backends can adapt Prog.RestArgs and reduce the amount of workarounds. -- Performance -- x/benchmark/build: $ benchstat upstream.bench patched.bench name old time/op new time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old binary-size new binary-size delta Build-48 10.3M ± 0% 10.3M ± 0% ~ (all equal) name old build-time/op new build-time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old build-peak-RSS-bytes new build-peak-RSS-bytes delta Build-48 145MB ± 5% 148MB ± 5% ~ (p=0.218 n=10+10) name old build-user+sys-time/op new build-user+sys-time/op delta Build-48 21.0s ± 2% 21.2s ± 2% ~ (p=0.075 n=10+10) Microbenchmark shows a slight slowdown. name old time/op new time/op delta AMD64asm-4 49.5ms ± 1% 49.9ms ± 1% +0.67% (p=0.001 n=23+15) func BenchmarkAMD64asm(b *testing.B) { for i := 0; i < b.N; i++ { TestAMD64EndToEnd(nil) TestAMD64Encoder(nil) } } Change-Id: I4f1d37b5c2c966da3f2127705ccac9bff0038183 Reviewed-on: https://go-review.googlesource.com/63490 Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-09-13 14:32:08 +03:00
if p.RegTo2 != REG_NONE {
fmt.Fprintf(&buf, "%s%v", sep, Rconv(int(p.RegTo2)))
}
return buf.String()
}
func (ctxt *Link) NewProg() *Prog {
cmd/compile: add Prog cache to Progs The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe. This CL moves Prog allocation to its clients, the compiler and the assembler. The assembler is so fast and generates so few Progs that it does not need optimization of Prog allocation. I could not generate measureable changes. And even if I could, the assembly is a miniscule portion of build times. The compiler already has a natural place to manage Prog allocation; this CL migrates the Prog cache there. It will be made concurrency-safe in a later CL by partitioning the Prog cache into chunks and assigning each chunk to a different goroutine to manage. This CL does cause a performance degradation when the compiler is invoked with the -S flag (to dump assembly). However, such usage is rare and almost always done manually. The one instance I know of in a test is TestAssembly in cmd/compile/internal/gc, and I did not detect a measurable performance impact there. Passes toolstash-check -all. Minor compiler performance impact. Updates #15756 Performance impact from just this CL: name old time/op new time/op delta Template 213ms ± 4% 213ms ± 4% ~ (p=0.571 n=49+49) Unicode 89.1ms ± 3% 89.4ms ± 3% ~ (p=0.388 n=47+48) GoTypes 581ms ± 2% 584ms ± 3% +0.56% (p=0.019 n=47+48) SSA 6.48s ± 2% 6.53s ± 2% +0.84% (p=0.000 n=47+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.832 n=49+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.815 n=48+47) Reflect 371ms ± 4% 371ms ± 3% ~ (p=0.617 n=50+47) Tar 112ms ± 4% 112ms ± 3% ~ (p=0.724 n=49+49) XML 208ms ± 3% 208ms ± 4% ~ (p=0.678 n=49+50) [Geo mean] 284ms 285ms +0.18% name old user-ns/op new user-ns/op delta Template 251M ± 7% 252M ±11% ~ (p=0.704 n=49+50) Unicode 107M ± 7% 108M ± 5% +1.25% (p=0.036 n=50+49) GoTypes 738M ± 3% 740M ± 3% ~ (p=0.305 n=49+48) SSA 8.83G ± 2% 8.86G ± 4% ~ (p=0.098 n=47+50) Flate 146M ± 6% 147M ± 3% ~ (p=0.584 n=48+41) GoParser 178M ± 6% 179M ± 5% +0.93% (p=0.036 n=49+48) Reflect 441M ± 4% 446M ± 7% ~ (p=0.218 n=44+49) Tar 126M ± 5% 126M ± 5% ~ (p=0.766 n=48+49) XML 245M ± 5% 244M ± 4% ~ (p=0.359 n=50+50) [Geo mean] 341M 342M +0.51% Performance impact from this CL combined with its parent: name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.685 n=47+50) Unicode 89.8ms ± 6% 90.5ms ± 6% ~ (p=0.055 n=50+50) GoTypes 584ms ± 3% 585ms ± 2% ~ (p=0.710 n=49+47) SSA 6.50s ± 2% 6.53s ± 2% +0.39% (p=0.011 n=46+50) Flate 128ms ± 3% 128ms ± 4% ~ (p=0.855 n=47+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.666 n=49+49) Reflect 371ms ± 3% 372ms ± 3% ~ (p=0.298 n=48+48) Tar 112ms ± 5% 113ms ± 3% ~ (p=0.107 n=49+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.881 n=50+49) [Geo mean] 285ms 285ms +0.26% name old user-ns/op new user-ns/op delta Template 254M ± 9% 252M ± 8% ~ (p=0.290 n=49+50) Unicode 106M ± 6% 108M ± 7% +1.44% (p=0.034 n=50+50) GoTypes 741M ± 4% 743M ± 4% ~ (p=0.992 n=50+49) SSA 8.86G ± 2% 8.83G ± 3% ~ (p=0.158 n=47+49) Flate 147M ± 4% 148M ± 5% ~ (p=0.832 n=50+49) GoParser 179M ± 5% 178M ± 5% ~ (p=0.370 n=48+50) Reflect 441M ± 6% 445M ± 7% ~ (p=0.246 n=45+47) Tar 126M ± 6% 126M ± 6% ~ (p=0.815 n=49+50) XML 244M ± 3% 245M ± 4% ~ (p=0.190 n=50+50) [Geo mean] 342M 342M +0.17% Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855 Reviewed-on: https://go-review.googlesource.com/39634 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-05 07:05:35 -07:00
p := new(Prog)
p.Ctxt = ctxt
return p
}
cmd/compile: add Prog cache to Progs The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe. This CL moves Prog allocation to its clients, the compiler and the assembler. The assembler is so fast and generates so few Progs that it does not need optimization of Prog allocation. I could not generate measureable changes. And even if I could, the assembly is a miniscule portion of build times. The compiler already has a natural place to manage Prog allocation; this CL migrates the Prog cache there. It will be made concurrency-safe in a later CL by partitioning the Prog cache into chunks and assigning each chunk to a different goroutine to manage. This CL does cause a performance degradation when the compiler is invoked with the -S flag (to dump assembly). However, such usage is rare and almost always done manually. The one instance I know of in a test is TestAssembly in cmd/compile/internal/gc, and I did not detect a measurable performance impact there. Passes toolstash-check -all. Minor compiler performance impact. Updates #15756 Performance impact from just this CL: name old time/op new time/op delta Template 213ms ± 4% 213ms ± 4% ~ (p=0.571 n=49+49) Unicode 89.1ms ± 3% 89.4ms ± 3% ~ (p=0.388 n=47+48) GoTypes 581ms ± 2% 584ms ± 3% +0.56% (p=0.019 n=47+48) SSA 6.48s ± 2% 6.53s ± 2% +0.84% (p=0.000 n=47+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.832 n=49+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.815 n=48+47) Reflect 371ms ± 4% 371ms ± 3% ~ (p=0.617 n=50+47) Tar 112ms ± 4% 112ms ± 3% ~ (p=0.724 n=49+49) XML 208ms ± 3% 208ms ± 4% ~ (p=0.678 n=49+50) [Geo mean] 284ms 285ms +0.18% name old user-ns/op new user-ns/op delta Template 251M ± 7% 252M ±11% ~ (p=0.704 n=49+50) Unicode 107M ± 7% 108M ± 5% +1.25% (p=0.036 n=50+49) GoTypes 738M ± 3% 740M ± 3% ~ (p=0.305 n=49+48) SSA 8.83G ± 2% 8.86G ± 4% ~ (p=0.098 n=47+50) Flate 146M ± 6% 147M ± 3% ~ (p=0.584 n=48+41) GoParser 178M ± 6% 179M ± 5% +0.93% (p=0.036 n=49+48) Reflect 441M ± 4% 446M ± 7% ~ (p=0.218 n=44+49) Tar 126M ± 5% 126M ± 5% ~ (p=0.766 n=48+49) XML 245M ± 5% 244M ± 4% ~ (p=0.359 n=50+50) [Geo mean] 341M 342M +0.51% Performance impact from this CL combined with its parent: name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.685 n=47+50) Unicode 89.8ms ± 6% 90.5ms ± 6% ~ (p=0.055 n=50+50) GoTypes 584ms ± 3% 585ms ± 2% ~ (p=0.710 n=49+47) SSA 6.50s ± 2% 6.53s ± 2% +0.39% (p=0.011 n=46+50) Flate 128ms ± 3% 128ms ± 4% ~ (p=0.855 n=47+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.666 n=49+49) Reflect 371ms ± 3% 372ms ± 3% ~ (p=0.298 n=48+48) Tar 112ms ± 5% 113ms ± 3% ~ (p=0.107 n=49+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.881 n=50+49) [Geo mean] 285ms 285ms +0.26% name old user-ns/op new user-ns/op delta Template 254M ± 9% 252M ± 8% ~ (p=0.290 n=49+50) Unicode 106M ± 6% 108M ± 7% +1.44% (p=0.034 n=50+50) GoTypes 741M ± 4% 743M ± 4% ~ (p=0.992 n=50+49) SSA 8.86G ± 2% 8.83G ± 3% ~ (p=0.158 n=47+49) Flate 147M ± 4% 148M ± 5% ~ (p=0.832 n=50+49) GoParser 179M ± 5% 178M ± 5% ~ (p=0.370 n=48+50) Reflect 441M ± 6% 445M ± 7% ~ (p=0.246 n=45+47) Tar 126M ± 6% 126M ± 6% ~ (p=0.815 n=49+50) XML 244M ± 3% 245M ± 4% ~ (p=0.190 n=50+50) [Geo mean] 342M 342M +0.17% Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855 Reviewed-on: https://go-review.googlesource.com/39634 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-05 07:05:35 -07:00
func (ctxt *Link) CanReuseProgs() bool {
return !ctxt.Debugasm
}
func (ctxt *Link) Dconv(a *Addr) string {
return Dconv(nil, a)
}
func Dconv(p *Prog, a *Addr) string {
var str string
switch a.Type {
default:
str = fmt.Sprintf("type=%d", a.Type)
case TYPE_NONE:
str = ""
if a.Name != NAME_NONE || a.Reg != 0 || a.Sym != nil {
str = fmt.Sprintf("%v(%v)(NONE)", Mconv(a), Rconv(int(a.Reg)))
}
case TYPE_REG:
// TODO(rsc): This special case is for x86 instructions like
// PINSRQ CX,$1,X6
// where the $1 is included in the p->to Addr.
// Move into a new field.
if a.Offset != 0 {
str = fmt.Sprintf("$%d,%v", a.Offset, Rconv(int(a.Reg)))
break
}
str = Rconv(int(a.Reg))
if a.Name != NAME_NONE || a.Sym != nil {
str = fmt.Sprintf("%v(%v)(REG)", Mconv(a), Rconv(int(a.Reg)))
}
case TYPE_BRANCH:
if a.Sym != nil {
str = fmt.Sprintf("%s(SB)", a.Sym.Name)
} else if p != nil && p.Pcond != nil {
str = fmt.Sprint(p.Pcond.Pc)
} else if a.Val != nil {
str = fmt.Sprint(a.Val.(*Prog).Pc)
} else {
str = fmt.Sprintf("%d(PC)", a.Offset)
}
case TYPE_INDIR:
str = fmt.Sprintf("*%s", Mconv(a))
case TYPE_MEM:
str = Mconv(a)
if a.Index != REG_NONE {
str += fmt.Sprintf("(%v*%d)", Rconv(int(a.Index)), int(a.Scale))
}
case TYPE_CONST:
if a.Reg != 0 {
str = fmt.Sprintf("$%v(%v)", Mconv(a), Rconv(int(a.Reg)))
} else {
str = fmt.Sprintf("$%v", Mconv(a))
}
case TYPE_TEXTSIZE:
if a.Val.(int32) == objabi.ArgsSizeUnknown {
str = fmt.Sprintf("$%d", a.Offset)
} else {
str = fmt.Sprintf("$%d-%d", a.Offset, a.Val.(int32))
}
case TYPE_FCONST:
str = fmt.Sprintf("%.17g", a.Val.(float64))
// Make sure 1 prints as 1.0
if !strings.ContainsAny(str, ".e") {
str += ".0"
}
str = fmt.Sprintf("$(%s)", str)
case TYPE_SCONST:
str = fmt.Sprintf("$%q", a.Val.(string))
case TYPE_ADDR:
str = fmt.Sprintf("$%s", Mconv(a))
case TYPE_SHIFT:
v := int(a.Offset)
ops := "<<>>->@>"
switch objabi.GOARCH {
case "arm":
op := ops[((v>>5)&3)<<1:]
if v&(1<<4) != 0 {
str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
} else {
str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
}
if a.Reg != 0 {
str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
}
case "arm64":
op := ops[((v>>22)&3)<<1:]
str = fmt.Sprintf("R%d%c%c%d", (v>>16)&31, op[0], op[1], (v>>10)&63)
default:
panic("TYPE_SHIFT is not supported on " + objabi.GOARCH)
}
case TYPE_REGREG:
str = fmt.Sprintf("(%v, %v)", Rconv(int(a.Reg)), Rconv(int(a.Offset)))
case TYPE_REGREG2:
str = fmt.Sprintf("%v, %v", Rconv(int(a.Offset)), Rconv(int(a.Reg)))
case TYPE_REGLIST:
str = regListConv(int(a.Offset))
}
return str
}
func Mconv(a *Addr) string {
var str string
switch a.Name {
default:
str = fmt.Sprintf("name=%d", a.Name)
case NAME_NONE:
switch {
case a.Reg == REG_NONE:
str = fmt.Sprint(a.Offset)
case a.Offset == 0:
str = fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
case a.Offset != 0:
str = fmt.Sprintf("%d(%v)", a.Offset, Rconv(int(a.Reg)))
}
// Note: a.Reg == REG_NONE encodes the default base register for the NAME_ type.
case NAME_EXTERN:
reg := "SB"
if a.Reg != REG_NONE {
reg = Rconv(int(a.Reg))
}
if a.Sym != nil {
str = fmt.Sprintf("%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg)
} else {
str = fmt.Sprintf("%s(%s)", offConv(a.Offset), reg)
}
case NAME_GOTREF:
reg := "SB"
if a.Reg != REG_NONE {
reg = Rconv(int(a.Reg))
}
if a.Sym != nil {
str = fmt.Sprintf("%s%s@GOT(%s)", a.Sym.Name, offConv(a.Offset), reg)
} else {
str = fmt.Sprintf("%s@GOT(%s)", offConv(a.Offset), reg)
}
case NAME_STATIC:
reg := "SB"
if a.Reg != REG_NONE {
reg = Rconv(int(a.Reg))
}
if a.Sym != nil {
str = fmt.Sprintf("%s<>%s(%s)", a.Sym.Name, offConv(a.Offset), reg)
} else {
str = fmt.Sprintf("<>%s(%s)", offConv(a.Offset), reg)
}
case NAME_AUTO:
reg := "SP"
if a.Reg != REG_NONE {
reg = Rconv(int(a.Reg))
}
if a.Sym != nil {
str = fmt.Sprintf("%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg)
} else {
str = fmt.Sprintf("%s(%s)", offConv(a.Offset), reg)
}
case NAME_PARAM:
reg := "FP"
if a.Reg != REG_NONE {
reg = Rconv(int(a.Reg))
}
if a.Sym != nil {
str = fmt.Sprintf("%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg)
} else {
str = fmt.Sprintf("%s(%s)", offConv(a.Offset), reg)
}
}
return str
}
func offConv(off int64) string {
if off == 0 {
return ""
}
return fmt.Sprintf("%+d", off)
}
type regSet struct {
lo int
hi int
Rconv func(int) string
}
// Few enough architectures that a linear scan is fastest.
// Not even worth sorting.
var regSpace []regSet
/*
Each architecture defines a register space as a unique
integer range.
Here is the list of architectures and the base of their register spaces.
*/
const (
// Because of masking operations in the encodings, each register
// space should start at 0 modulo some power of 2.
RBase386 = 1 * 1024
RBaseAMD64 = 2 * 1024
RBaseARM = 3 * 1024
RBasePPC64 = 4 * 1024 // range [4k, 8k)
RBaseARM64 = 8 * 1024 // range [8k, 13k)
RBaseMIPS = 13 * 1024 // range [13k, 14k)
RBaseS390X = 14 * 1024 // range [14k, 15k)
)
// RegisterRegister binds a pretty-printer (Rconv) for register
// numbers to a given register number range. Lo is inclusive,
// hi exclusive (valid registers are lo through hi-1).
func RegisterRegister(lo, hi int, Rconv func(int) string) {
regSpace = append(regSpace, regSet{lo, hi, Rconv})
}
func Rconv(reg int) string {
if reg == REG_NONE {
return "NONE"
}
for i := range regSpace {
rs := &regSpace[i]
if rs.lo <= reg && reg < rs.hi {
return rs.Rconv(reg)
}
}
return fmt.Sprintf("R???%d", reg)
}
func regListConv(list int) string {
str := ""
for i := 0; i < 16; i++ { // TODO: 16 is ARM-specific.
if list&(1<<uint(i)) != 0 {
if str == "" {
str += "["
} else {
str += ","
}
// This is ARM-specific; R10 is g.
if i == 10 {
str += "g"
} else {
str += fmt.Sprintf("R%d", i)
}
}
}
str += "]"
return str
}
type opSet struct {
lo As
names []string
}
// Not even worth sorting
var aSpace []opSet
// RegisterOpcode binds a list of instruction names
// to a given instruction number range.
func RegisterOpcode(lo As, Anames []string) {
if len(Anames) > AllowedOpCodes {
panic(fmt.Sprintf("too many instructions, have %d max %d", len(Anames), AllowedOpCodes))
}
aSpace = append(aSpace, opSet{lo, Anames})
}
func (a As) String() string {
if 0 <= a && int(a) < len(Anames) {
return Anames[a]
}
for i := range aSpace {
as := &aSpace[i]
if as.lo <= a && int(a-as.lo) < len(as.names) {
return as.names[a-as.lo]
}
}
return fmt.Sprintf("A???%d", a)
}
var Anames = []string{
"XXX",
"CALL",
"DUFFCOPY",
"DUFFZERO",
"END",
"FUNCDATA",
"JMP",
"NOP",
"PCDATA",
"RET",
"TEXT",
"UNDEF",
}
func Bool2int(b bool) int {
// The compiler currently only optimizes this form.
// See issue 6011.
var i int
if b {
i = 1
} else {
i = 0
}
return i
}