mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: use per-goroutine sequence numbers in tracer
Currently tracer uses global sequencer and it introduces significant slowdown on parallel machines (up to 10x). Replace the global sequencer with per-goroutine sequencer. If we assign per-goroutine sequence numbers to only 3 types of events (start, unblock and syscall exit), it is enough to restore consistent partial ordering of all events. Even these events don't need sequence numbers all the time (if goroutine starts on the same P where it was unblocked, then start does not need sequence number). The burden of restoring the order is put on trace parser. Details of the algorithm are described in the comments. On http benchmark with GOMAXPROCS=48: no tracing: 5026 ns/op tracing: 27803 ns/op (+453%) with this change: 6369 ns/op (+26%, mostly for traceback) Also trace size is reduced by ~22%. Average event size before: 4.63 bytes/event, after: 3.62 bytes/event. Besides running trace tests, I've also tested with manually broken cputicks (random skew for each event, per-P skew and episodic random skew). In all cases broken timestamps were detected and no test failures. Change-Id: I078bde421ccc386a66f6c2051ab207bcd5613efa Reviewed-on: https://go-review.googlesource.com/21512 Run-TryBot: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
ba966f5d89
commit
a3703618ea
6 changed files with 479 additions and 184 deletions
|
|
@ -11,7 +11,6 @@ import (
|
|||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
|
@ -135,7 +134,12 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
|
|||
}
|
||||
off += n
|
||||
typ := buf[0] << 2 >> 2
|
||||
narg := buf[0] >> 6
|
||||
narg := buf[0]>>6 + 1
|
||||
inlineArgs := byte(4)
|
||||
if ver < 1007 {
|
||||
narg++
|
||||
inlineArgs++
|
||||
}
|
||||
if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
|
||||
err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
|
||||
return
|
||||
|
|
@ -180,8 +184,8 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
|
|||
continue
|
||||
}
|
||||
ev := rawEvent{typ: typ, off: off0}
|
||||
if narg < 3 {
|
||||
for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
|
||||
if narg < inlineArgs {
|
||||
for i := 0; i < int(narg); i++ {
|
||||
var v uint64
|
||||
v, off, err = readVal(r, off)
|
||||
if err != nil {
|
||||
|
|
@ -191,7 +195,7 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
|
|||
ev.args = append(ev.args, v)
|
||||
}
|
||||
} else {
|
||||
// If narg == 3, the first value is length of the event in bytes.
|
||||
// More than inlineArgs args, the first value is length of the event in bytes.
|
||||
var v uint64
|
||||
v, off, err = readVal(r, off)
|
||||
if err != nil {
|
||||
|
|
@ -250,34 +254,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
|
|||
var lastP int
|
||||
lastGs := make(map[int]uint64) // last goroutine running on P
|
||||
stacks = make(map[uint64][]*Frame)
|
||||
batches := make(map[int][]*Event) // events by P
|
||||
for _, raw := range rawEvents {
|
||||
desc := EventDescriptions[raw.typ]
|
||||
if desc.Name == "" {
|
||||
err = fmt.Errorf("missing description for event type %v", raw.typ)
|
||||
return
|
||||
}
|
||||
if raw.typ != EvStack {
|
||||
narg := len(desc.Args)
|
||||
if desc.Stack {
|
||||
narg++
|
||||
}
|
||||
if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
|
||||
narg++ // sequence number
|
||||
narg++ // timestamp
|
||||
}
|
||||
if len(raw.args) != narg {
|
||||
err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
|
||||
desc.Name, raw.off, narg, len(raw.args))
|
||||
return
|
||||
}
|
||||
narg := argNum(raw, ver)
|
||||
if len(raw.args) != narg {
|
||||
err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
|
||||
desc.Name, raw.off, narg, len(raw.args))
|
||||
return
|
||||
}
|
||||
switch raw.typ {
|
||||
case EvBatch:
|
||||
lastGs[lastP] = lastG
|
||||
lastP = int(raw.args[0])
|
||||
lastG = lastGs[lastP]
|
||||
lastSeq = int64(raw.args[1])
|
||||
lastTs = int64(raw.args[2])
|
||||
if ver < 1007 {
|
||||
lastSeq = int64(raw.args[1])
|
||||
lastTs = int64(raw.args[2])
|
||||
} else {
|
||||
lastTs = int64(raw.args[1])
|
||||
}
|
||||
case EvFrequency:
|
||||
ticksPerSec = int64(raw.args[0])
|
||||
if ticksPerSec <= 0 {
|
||||
|
|
@ -328,18 +328,26 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
|
|||
}
|
||||
default:
|
||||
e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
|
||||
e.seq = lastSeq + int64(raw.args[0])
|
||||
e.Ts = lastTs + int64(raw.args[1])
|
||||
lastSeq = e.seq
|
||||
lastTs = e.Ts
|
||||
for i := range desc.Args {
|
||||
e.Args[i] = raw.args[i+2]
|
||||
var argOffset int
|
||||
if ver < 1007 {
|
||||
e.seq = lastSeq + int64(raw.args[0])
|
||||
e.Ts = lastTs + int64(raw.args[1])
|
||||
lastSeq = e.seq
|
||||
argOffset = 2
|
||||
} else {
|
||||
e.Ts = lastTs + int64(raw.args[0])
|
||||
argOffset = 1
|
||||
}
|
||||
if desc.Stack {
|
||||
e.StkID = raw.args[len(desc.Args)+2]
|
||||
lastTs = e.Ts
|
||||
for i := argOffset; i < narg; i++ {
|
||||
if i == narg-1 && desc.Stack {
|
||||
e.StkID = raw.args[i]
|
||||
} else {
|
||||
e.Args[i-argOffset] = raw.args[i]
|
||||
}
|
||||
}
|
||||
switch raw.typ {
|
||||
case EvGoStart:
|
||||
case EvGoStart, EvGoStartLocal:
|
||||
lastG = e.Args[0]
|
||||
e.G = lastG
|
||||
case EvGCStart, EvGCDone, EvGCScanStart, EvGCScanDone:
|
||||
|
|
@ -349,28 +357,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
|
|||
EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
|
||||
EvGoSysBlock:
|
||||
lastG = 0
|
||||
case EvGoSysExit:
|
||||
// EvGoSysExit emission is delayed until the thread has a P.
|
||||
// Give it the real sequence number and time stamp.
|
||||
e.seq = int64(e.Args[1])
|
||||
if e.Args[2] != 0 {
|
||||
e.Ts = int64(e.Args[2])
|
||||
}
|
||||
case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
|
||||
e.G = e.Args[0]
|
||||
}
|
||||
events = append(events, e)
|
||||
batches[lastP] = append(batches[lastP], e)
|
||||
}
|
||||
}
|
||||
if len(events) == 0 {
|
||||
if len(batches) == 0 {
|
||||
err = fmt.Errorf("trace is empty")
|
||||
return
|
||||
}
|
||||
|
||||
// Sort by sequence number and translate cpu ticks to real time.
|
||||
sort.Sort(eventList(events))
|
||||
if ticksPerSec == 0 {
|
||||
err = fmt.Errorf("no EvFrequency event")
|
||||
return
|
||||
}
|
||||
if ver < 1007 {
|
||||
events, err = order1005(batches)
|
||||
} else {
|
||||
events, err = order1007(batches)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Translate cpu ticks to real time.
|
||||
minTs := events[0].Ts
|
||||
// Use floating point to avoid integer overflows.
|
||||
freq := 1e9 / float64(ticksPerSec)
|
||||
|
|
@ -382,7 +392,6 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
|
|||
}
|
||||
if ev.Type == EvGoSysExit {
|
||||
ev.P = SyscallP
|
||||
ev.G = ev.Args[0]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -543,19 +552,15 @@ func postProcessTrace(ver int, events []*Event) error {
|
|||
p.evSweep.Link = ev
|
||||
p.evSweep = nil
|
||||
case EvGoWaiting:
|
||||
g1 := gs[ev.Args[0]]
|
||||
if g1.state != gRunnable {
|
||||
return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
|
||||
if g.state != gRunnable {
|
||||
return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
|
||||
}
|
||||
g1.state = gWaiting
|
||||
gs[ev.Args[0]] = g1
|
||||
g.state = gWaiting
|
||||
case EvGoInSyscall:
|
||||
g1 := gs[ev.Args[0]]
|
||||
if g1.state != gRunnable {
|
||||
return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
|
||||
if g.state != gRunnable {
|
||||
return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
|
||||
}
|
||||
g1.state = gWaiting
|
||||
gs[ev.Args[0]] = g1
|
||||
g.state = gWaiting
|
||||
case EvGoCreate:
|
||||
if err := checkRunning(p, g, ev, true); err != nil {
|
||||
return err
|
||||
|
|
@ -666,18 +671,6 @@ func postProcessTrace(ver int, events []*Event) error {
|
|||
// TODO(dvyukov): restore stacks for EvGoStart events.
|
||||
// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
|
||||
|
||||
// Last, after all the other consistency checks,
|
||||
// make sure time stamps respect sequence numbers.
|
||||
// The tests will skip (not fail) the test case if they see this error,
|
||||
// so check everything else that could possibly be wrong first.
|
||||
lastTs := int64(0)
|
||||
for _, ev := range events {
|
||||
if ev.Ts < lastTs {
|
||||
return ErrTimeOrder
|
||||
}
|
||||
lastTs = ev.Ts
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -773,32 +766,53 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
|
|||
return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
|
||||
}
|
||||
|
||||
type eventList []*Event
|
||||
|
||||
func (l eventList) Len() int {
|
||||
return len(l)
|
||||
}
|
||||
|
||||
func (l eventList) Less(i, j int) bool {
|
||||
return l[i].seq < l[j].seq
|
||||
}
|
||||
|
||||
func (l eventList) Swap(i, j int) {
|
||||
l[i], l[j] = l[j], l[i]
|
||||
}
|
||||
|
||||
// Print dumps events to stdout. For debugging.
|
||||
func Print(events []*Event) {
|
||||
for _, ev := range events {
|
||||
desc := EventDescriptions[ev.Type]
|
||||
fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
|
||||
for i, a := range desc.Args {
|
||||
fmt.Printf(" %v=%v", a, ev.Args[i])
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
PrintEvent(ev)
|
||||
}
|
||||
}
|
||||
|
||||
// PrintEvent dumps the event to stdout. For debugging.
|
||||
func PrintEvent(ev *Event) {
|
||||
desc := EventDescriptions[ev.Type]
|
||||
fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
|
||||
for i, a := range desc.Args {
|
||||
fmt.Printf(" %v=%v", a, ev.Args[i])
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
|
||||
// argNum returns total number of args for the event accounting for timestamps,
|
||||
// sequence numbers and differences between trace format versions.
|
||||
func argNum(raw rawEvent, ver int) int {
|
||||
desc := EventDescriptions[raw.typ]
|
||||
if raw.typ == EvStack {
|
||||
return len(raw.args)
|
||||
}
|
||||
narg := len(desc.Args)
|
||||
if desc.Stack {
|
||||
narg++
|
||||
}
|
||||
switch raw.typ {
|
||||
case EvBatch, EvFrequency, EvTimerGoroutine:
|
||||
if ver < 1007 {
|
||||
narg++ // there was an unused arg before 1.7
|
||||
}
|
||||
case EvGCStart, EvGoStart, EvGoUnblock:
|
||||
if ver < 1007 {
|
||||
narg-- // 1.7 added an additional seq arg
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
narg++ // timestamp
|
||||
if ver < 1007 {
|
||||
narg++ // sequence
|
||||
}
|
||||
}
|
||||
return narg
|
||||
}
|
||||
|
||||
// Event types in the trace.
|
||||
// Verbatim copy from src/runtime/trace.go.
|
||||
const (
|
||||
|
|
@ -809,21 +823,21 @@ const (
|
|||
EvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
|
||||
EvProcStart = 5 // start of P [timestamp, thread id]
|
||||
EvProcStop = 6 // stop of P [timestamp]
|
||||
EvGCStart = 7 // GC start [timestamp, stack id]
|
||||
EvGCStart = 7 // GC start [timestamp, seq, stack id]
|
||||
EvGCDone = 8 // GC done [timestamp]
|
||||
EvGCScanStart = 9 // GC scan start [timestamp]
|
||||
EvGCScanDone = 10 // GC scan done [timestamp]
|
||||
EvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
|
||||
EvGCSweepDone = 12 // GC sweep done [timestamp]
|
||||
EvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
|
||||
EvGoStart = 14 // goroutine starts running [timestamp, goroutine id]
|
||||
EvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq]
|
||||
EvGoEnd = 15 // goroutine ends [timestamp]
|
||||
EvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack]
|
||||
EvGoSched = 17 // goroutine calls Gosched [timestamp, stack]
|
||||
EvGoPreempt = 18 // goroutine is preempted [timestamp, stack]
|
||||
EvGoSleep = 19 // goroutine calls Sleep [timestamp, stack]
|
||||
EvGoBlock = 20 // goroutine blocks [timestamp, stack]
|
||||
EvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
|
||||
EvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
|
||||
EvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack]
|
||||
EvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack]
|
||||
EvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack]
|
||||
|
|
@ -831,7 +845,7 @@ const (
|
|||
EvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack]
|
||||
EvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack]
|
||||
EvGoSysCall = 28 // syscall enter [timestamp, stack]
|
||||
EvGoSysExit = 29 // syscall exit [timestamp, goroutine id, real timestamp]
|
||||
EvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
|
||||
EvGoSysBlock = 30 // syscall blocks [timestamp]
|
||||
EvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
|
||||
EvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
|
||||
|
|
@ -840,7 +854,10 @@ const (
|
|||
EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
|
||||
EvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
|
||||
EvString = 37 // string dictionary entry [ID, length, string]
|
||||
EvCount = 38
|
||||
EvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
|
||||
EvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
|
||||
EvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
|
||||
EvCount = 41
|
||||
)
|
||||
|
||||
var EventDescriptions = [EvCount]struct {
|
||||
|
|
@ -850,27 +867,27 @@ var EventDescriptions = [EvCount]struct {
|
|||
Args []string
|
||||
}{
|
||||
EvNone: {"None", 1005, false, []string{}},
|
||||
EvBatch: {"Batch", 1005, false, []string{"p", "seq", "ticks"}},
|
||||
EvFrequency: {"Frequency", 1005, false, []string{"freq", "unused"}},
|
||||
EvBatch: {"Batch", 1005, false, []string{"p", "ticks"}}, // in 1.5 format it was {"p", "seq", "ticks"}
|
||||
EvFrequency: {"Frequency", 1005, false, []string{"freq"}}, // in 1.5 format it was {"freq", "unused"}
|
||||
EvStack: {"Stack", 1005, false, []string{"id", "siz"}},
|
||||
EvGomaxprocs: {"Gomaxprocs", 1005, true, []string{"procs"}},
|
||||
EvProcStart: {"ProcStart", 1005, false, []string{"thread"}},
|
||||
EvProcStop: {"ProcStop", 1005, false, []string{}},
|
||||
EvGCStart: {"GCStart", 1005, true, []string{}},
|
||||
EvGCStart: {"GCStart", 1005, true, []string{"seq"}}, // in 1.5 format it was {}
|
||||
EvGCDone: {"GCDone", 1005, false, []string{}},
|
||||
EvGCScanStart: {"GCScanStart", 1005, false, []string{}},
|
||||
EvGCScanDone: {"GCScanDone", 1005, false, []string{}},
|
||||
EvGCSweepStart: {"GCSweepStart", 1005, true, []string{}},
|
||||
EvGCSweepDone: {"GCSweepDone", 1005, false, []string{}},
|
||||
EvGoCreate: {"GoCreate", 1005, true, []string{"g", "stack"}},
|
||||
EvGoStart: {"GoStart", 1005, false, []string{"g"}},
|
||||
EvGoStart: {"GoStart", 1005, false, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
|
||||
EvGoEnd: {"GoEnd", 1005, false, []string{}},
|
||||
EvGoStop: {"GoStop", 1005, true, []string{}},
|
||||
EvGoSched: {"GoSched", 1005, true, []string{}},
|
||||
EvGoPreempt: {"GoPreempt", 1005, true, []string{}},
|
||||
EvGoSleep: {"GoSleep", 1005, true, []string{}},
|
||||
EvGoBlock: {"GoBlock", 1005, true, []string{}},
|
||||
EvGoUnblock: {"GoUnblock", 1005, true, []string{"g"}},
|
||||
EvGoUnblock: {"GoUnblock", 1005, true, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
|
||||
EvGoBlockSend: {"GoBlockSend", 1005, true, []string{}},
|
||||
EvGoBlockRecv: {"GoBlockRecv", 1005, true, []string{}},
|
||||
EvGoBlockSelect: {"GoBlockSelect", 1005, true, []string{}},
|
||||
|
|
@ -884,7 +901,10 @@ var EventDescriptions = [EvCount]struct {
|
|||
EvGoInSyscall: {"GoInSyscall", 1005, false, []string{"g"}},
|
||||
EvHeapAlloc: {"HeapAlloc", 1005, false, []string{"mem"}},
|
||||
EvNextGC: {"NextGC", 1005, false, []string{"mem"}},
|
||||
EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g", "unused"}},
|
||||
EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g"}}, // in 1.5 format it was {"g", "unused"}
|
||||
EvFutileWakeup: {"FutileWakeup", 1005, false, []string{}},
|
||||
EvString: {"String", 1007, false, []string{}},
|
||||
EvGoStartLocal: {"GoStartLocal", 1007, false, []string{"g"}},
|
||||
EvGoUnblockLocal: {"GoUnblockLocal", 1007, true, []string{"g"}},
|
||||
EvGoSysExitLocal: {"GoSysExitLocal", 1007, false, []string{"g", "ts"}},
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue