go/src/runtime/pprof/pprof_test.go

1546 lines
41 KiB
Go
Raw Normal View History

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !js
// +build !js
package pprof
import (
"bytes"
"context"
"fmt"
"internal/profile"
"internal/testenv"
"io"
"math"
"math/big"
"os"
"os/exec"
"regexp"
"runtime"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
_ "unsafe"
)
func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
// We only need to get one 100 Hz clock tick, so we've got
// a large safety buffer.
// But do at least 500 iterations (which should take about 100ms),
// otherwise TestCPUProfileMultithreaded can fail if only one
// thread is scheduled during the testing period.
t0 := time.Now()
accum := *y
for i := 0; i < 500 || time.Since(t0) < dur; i++ {
accum = f(accum)
}
*y = accum
}
var (
salt1 = 0
salt2 = 0
)
// The actual CPU hogging function.
// Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime.
func cpuHog1(x int) int {
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
return cpuHog0(x, 1e5)
}
func cpuHog0(x, n int) int {
foo := x
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
for i := 0; i < n; i++ {
if foo > 0 {
foo *= foo
} else {
foo *= foo + 1
}
}
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
return foo
}
func cpuHog2(x int) int {
foo := x
for i := 0; i < 1e5; i++ {
if foo > 0 {
foo *= foo
} else {
foo *= foo + 2
}
}
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
return foo
}
// Return a list of functions that we don't want to ever appear in CPU
// profiles. For gccgo, that list includes the sigprof handler itself.
func avoidFunctions() []string {
if runtime.Compiler == "gccgo" {
return []string{"runtime.sigprof"}
}
return nil
}
func TestCPUProfile(t *testing.T) {
testCPUProfile(t, stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(cpuHog1, &salt1, dur)
})
}
func TestCPUProfileMultithreaded(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
testCPUProfile(t, stackContains, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, avoidFunctions(), func(dur time.Duration) {
c := make(chan int)
go func() {
cpuHogger(cpuHog1, &salt1, dur)
c <- 1
}()
cpuHogger(cpuHog2, &salt2, dur)
<-c
})
}
// containsInlinedCall reports whether the function body for the function f is
// known to contain an inlined function call within the first maxBytes bytes.
func containsInlinedCall(f interface{}, maxBytes int) bool {
_, found := findInlinedCall(f, maxBytes)
return found
}
// findInlinedCall returns the PC of an inlined function call within
// the function body for the function f if any.
func findInlinedCall(f interface{}, maxBytes int) (pc uint64, found bool) {
fFunc := runtime.FuncForPC(uintptr(funcPC(f)))
if fFunc == nil || fFunc.Entry() == 0 {
panic("failed to locate function entry")
}
for offset := 0; offset < maxBytes; offset++ {
innerPC := fFunc.Entry() + uintptr(offset)
inner := runtime.FuncForPC(innerPC)
if inner == nil {
// No function known for this PC value.
// It might simply be misaligned, so keep searching.
continue
}
if inner.Entry() != fFunc.Entry() {
// Scanned past f and didn't find any inlined functions.
break
}
if inner.Name() != fFunc.Name() {
// This PC has f as its entry-point, but is not f. Therefore, it must be a
// function inlined into f.
return uint64(innerPC), true
}
}
return 0, false
}
func TestCPUProfileInlining(t *testing.T) {
if !containsInlinedCall(inlinedCaller, 4<<10) {
t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(inlinedCaller, &salt1, dur)
})
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
// Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
for _, loc := range p.Location {
hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
for _, line := range loc.Line {
if line.Function.Name == "runtime/pprof.inlinedCallee" {
hasInlinedCallee = true
}
if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
hasInlinedCallerAfterInlinedCallee = true
}
}
if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
}
}
}
func inlinedCaller(x int) int {
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
x = inlinedCallee(x, 1e5)
return x
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
func inlinedCallee(x, n int) int {
return cpuHog0(x, n)
}
//go:noinline
func dumpCallers(pcs []uintptr) {
if pcs == nil {
return
}
skip := 2 // Callers and dumpCallers
runtime.Callers(skip, pcs)
}
//go:noinline
func inlinedCallerDump(pcs []uintptr) {
inlinedCalleeDump(pcs)
}
func inlinedCalleeDump(pcs []uintptr) {
dumpCallers(pcs)
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
func TestCPUProfileRecursion(t *testing.T) {
p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(recursionCaller, &salt1, dur)
})
// check the Location encoding was not confused by recursive calls.
for i, loc := range p.Location {
recursionFunc := 0
for _, line := range loc.Line {
if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
recursionFunc++
}
}
if recursionFunc > 1 {
t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
}
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
}
func recursionCaller(x int) int {
y := recursionCallee(3, x)
return y
}
func recursionCallee(n, x int) int {
if n == 0 {
return 1
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
y := inlinedCallee(x, 1e4)
return y * recursionCallee(n-1, x)
}
func recursionChainTop(x int, pcs []uintptr) {
if x < 0 {
return
}
recursionChainMiddle(x, pcs)
}
func recursionChainMiddle(x int, pcs []uintptr) {
recursionChainBottom(x, pcs)
}
func recursionChainBottom(x int, pcs []uintptr) {
// This will be called each time, we only care about the last. We
// can't make this conditional or this function won't be inlined.
dumpCallers(pcs)
recursionChainTop(x-1, pcs)
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil {
t.Fatal(err)
}
for _, sample := range p.Sample {
count := uintptr(sample.Value[0])
f(count, sample.Location, sample.Label)
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
return p
}
func cpuProfilingBroken() bool {
switch runtime.GOOS {
case "plan9":
// Profiling unimplemented.
return true
case "aix":
// See https://golang.org/issue/45170.
return true
case "ios", "dragonfly", "netbsd", "illumos", "solaris":
// See https://golang.org/issue/13841.
return true
case "openbsd":
if runtime.GOARCH == "arm" || runtime.GOARCH == "arm64" {
// See https://golang.org/issue/13841.
return true
}
}
return false
}
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
// as interpreted by matches, and returns the parsed profile.
func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
switch runtime.GOOS {
case "darwin":
out, err := exec.Command("uname", "-a").CombinedOutput()
if err != nil {
t.Fatal(err)
}
vers := string(out)
t.Logf("uname -a: %v", vers)
case "plan9":
t.Skip("skipping on plan9")
}
broken := cpuProfilingBroken()
maxDuration := 5 * time.Second
if testing.Short() && broken {
// If it's expected to be broken, no point waiting around.
maxDuration /= 10
}
// If we're running a long test, start with a long duration
// for tests that try to make sure something *doesn't* happen.
duration := 5 * time.Second
if testing.Short() {
duration = 100 * time.Millisecond
}
// Profiling tests are inherently flaky, especially on a
// loaded system, such as when this test is running with
// several others under go test std. If a test fails in a way
// that could mean it just didn't run long enough, try with a
// longer duration.
for duration <= maxDuration {
var prof bytes.Buffer
if err := StartCPUProfile(&prof); err != nil {
t.Fatal(err)
}
f(duration)
StopCPUProfile()
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
return p
}
duration *= 2
if duration <= maxDuration {
t.Logf("retrying with %s duration", duration)
}
}
if broken {
t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH)
}
// Ignore the failure if the tests are running in a QEMU-based emulator,
// QEMU is not perfect at emulating everything.
// IN_QEMU environmental variable is set by some of the Go builders.
// IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605.
if os.Getenv("IN_QEMU") == "1" {
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
}
t.FailNow()
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
return nil
}
func contains(slice []string, s string) bool {
for i := range slice {
if slice[i] == s {
return true
}
}
return false
}
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
// stackContains matches if a function named spec appears anywhere in the stack trace.
func stackContains(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
for _, loc := range stk {
for _, line := range loc.Line {
if strings.Contains(line.Function.Name, spec) {
return true
}
}
}
return false
}
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
ok = true
// Check that profile is well formed, contains 'need', and does not contain
// anything from 'avoid'.
have := make([]uintptr, len(need))
avoidSamples := make([]uintptr, len(avoid))
var samples uintptr
var buf bytes.Buffer
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk)
samples += count
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
for i, spec := range need {
if matches(spec, count, stk, labels) {
have[i] += count
}
}
for i, name := range avoid {
for _, loc := range stk {
for _, line := range loc.Line {
if strings.Contains(line.Function.Name, name) {
avoidSamples[i] += count
}
}
}
}
fmt.Fprintf(&buf, "\n")
})
t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String())
if samples < 10 && runtime.GOOS == "windows" {
// On some windows machines we end up with
// not enough samples due to coarse timer
// resolution. Let it go.
t.Log("too few samples on Windows (golang.org/issue/10842)")
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
return p, false
}
// Check that we got a reasonable number of samples.
// We used to always require at least ideal/4 samples,
// but that is too hard to guarantee on a loaded system.
// Now we accept 10 or more samples, which we take to be
// enough to show that at least some profiling is occurring.
if ideal := uintptr(duration * 100 / time.Second); samples == 0 || (samples < ideal/4 && samples < 10) {
t.Logf("too few samples; got %d, want at least %d, ideally %d", samples, ideal/4, ideal)
ok = false
}
for i, name := range avoid {
bad := avoidSamples[i]
if bad != 0 {
t.Logf("found %d samples in avoid-function %s\n", bad, name)
ok = false
}
}
if len(need) == 0 {
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
return p, ok
}
var total uintptr
for i, name := range need {
total += have[i]
t.Logf("%s: %d\n", name, have[i])
}
if total == 0 {
t.Logf("no samples in expected functions")
ok = false
}
// We'd like to check a reasonable minimum, like
// total / len(have) / smallconstant, but this test is
// pretty flaky (see bug 7095). So we'll just test to
// make sure we got at least one sample.
min := uintptr(1)
for i, name := range need {
if have[i] < min {
t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
ok = false
}
}
runtime/pprof: correctly encode inlined functions in CPU profile The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
2019-11-02 17:16:36 +09:00
return p, ok
}
// Fork can hang if preempted with signals frequently enough (see issue 5517).
// Ensure that we do not do this.
func TestCPUProfileWithFork(t *testing.T) {
testenv.MustHaveExec(t)
heap := 1 << 30
if runtime.GOOS == "android" {
// Use smaller size for Android to avoid crash.
heap = 100 << 20
}
if runtime.GOOS == "windows" && runtime.GOARCH == "arm" {
// Use smaller heap for Windows/ARM to avoid crash.
heap = 100 << 20
}
if testing.Short() {
heap = 100 << 20
}
// This makes fork slower.
garbage := make([]byte, heap)
// Need to touch the slice, otherwise it won't be paged in.
done := make(chan bool)
go func() {
for i := range garbage {
garbage[i] = 42
}
done <- true
}()
<-done
var prof bytes.Buffer
if err := StartCPUProfile(&prof); err != nil {
t.Fatal(err)
}
defer StopCPUProfile()
for i := 0; i < 10; i++ {
exec.Command(os.Args[0], "-h").CombinedOutput()
}
}
// Test that profiler does not observe runtime.gogo as "user" goroutine execution.
// If it did, it would see inconsistent state and would either record an incorrect stack
// or crash because the stack was malformed.
func TestGoroutineSwitch(t *testing.T) {
if runtime.Compiler == "gccgo" {
t.Skip("not applicable for gccgo")
}
// How much to try. These defaults take about 1 seconds
// on a 2012 MacBook Pro. The ones in short mode take
// about 0.1 seconds.
tries := 10
count := 1000000
if testing.Short() {
tries = 1
}
for try := 0; try < tries; try++ {
var prof bytes.Buffer
if err := StartCPUProfile(&prof); err != nil {
t.Fatal(err)
}
for i := 0; i < count; i++ {
runtime.Gosched()
}
StopCPUProfile()
// Read profile to look for entries for gogo with an attempt at a traceback.
// "runtime.gogo" is OK, because that's the part of the context switch
// before the actual switch begins. But we should not see "gogo",
// aka "gogo<>(SB)", which does the actual switch and is marked SPWRITE.
parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, _ map[string][]string) {
// An entry with two frames with 'System' in its top frame
// exists to record a PC without a traceback. Those are okay.
if len(stk) == 2 {
name := stk[1].Line[0].Function.Name
if name == "runtime._System" || name == "runtime._ExternalCode" || name == "runtime._GC" {
return
}
}
// An entry with just one frame is OK too:
// it knew to stop at gogo.
if len(stk) == 1 {
return
}
// Otherwise, should not see gogo.
// The place we'd see it would be the inner most frame.
name := stk[0].Line[0].Function.Name
if name == "gogo" {
var buf bytes.Buffer
fprintStack(&buf, stk)
t.Fatalf("found profile entry for gogo:\n%s", buf.String())
}
})
}
}
func fprintStack(w io.Writer, stk []*profile.Location) {
for _, loc := range stk {
fmt.Fprintf(w, " %#x", loc.Address)
fmt.Fprintf(w, " (")
for i, line := range loc.Line {
if i > 0 {
fmt.Fprintf(w, " ")
}
fmt.Fprintf(w, "%s:%d", line.Function.Name, line.Line)
}
fmt.Fprintf(w, ")")
}
fmt.Fprintf(w, "\n")
}
// Test that profiling of division operations is okay, especially on ARM. See issue 6681.
func TestMathBigDivide(t *testing.T) {
testCPUProfile(t, nil, nil, nil, func(duration time.Duration) {
t := time.After(duration)
pi := new(big.Int)
for {
for i := 0; i < 100; i++ {
n := big.NewInt(2646693125139304345)
d := big.NewInt(842468587426513207)
pi.Div(n, d)
}
select {
case <-t:
return
default:
}
}
})
}
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
// stackContainsAll matches if all functions in spec (comma-separated) appear somewhere in the stack trace.
func stackContainsAll(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
for _, f := range strings.Split(spec, ",") {
if !stackContains(f, count, stk, labels) {
return false
}
}
return true
}
func TestMorestack(t *testing.T) {
testCPUProfile(t, stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions(), func(duration time.Duration) {
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
t := time.After(duration)
c := make(chan bool)
for {
go func() {
growstack1()
c <- true
}()
select {
case <-t:
return
case <-c:
}
}
})
}
//go:noinline
func growstack1() {
growstack(10)
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
}
//go:noinline
func growstack(n int) {
var buf [8 << 18]byte
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
use(buf)
if n > 0 {
growstack(n - 1)
}
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
}
//go:noinline
func use(x [8 << 18]byte) {}
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
func TestBlockProfile(t *testing.T) {
type TestCase struct {
name string
f func()
stk []string
re string
}
tests := [...]TestCase{
{
name: "chan recv",
f: blockChanRecv,
stk: []string{
"runtime.chanrecv1",
"runtime/pprof.blockChanRecv",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{
name: "chan send",
f: blockChanSend,
stk: []string{
"runtime.chansend1",
"runtime/pprof.blockChanSend",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chansend1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanSend\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{
name: "chan close",
f: blockChanClose,
stk: []string{
"runtime.chanrecv1",
"runtime/pprof.blockChanClose",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanClose\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{
name: "select recv async",
f: blockSelectRecvAsync,
stk: []string{
"runtime.selectgo",
"runtime/pprof.blockSelectRecvAsync",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{
name: "select send sync",
f: blockSelectSendSync,
stk: []string{
"runtime.selectgo",
"runtime/pprof.blockSelectSendSync",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{
name: "mutex",
f: blockMutex,
stk: []string{
"sync.(*Mutex).Lock",
"runtime/pprof.blockMutex",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+ .*/src/sync/mutex\.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockMutex\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{
name: "cond",
f: blockCond,
stk: []string{
"sync.(*Cond).Wait",
"runtime/pprof.blockCond",
"runtime/pprof.TestBlockProfile",
},
re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+ .*/src/sync/cond\.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockCond\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
}
// Generate block profile
runtime.SetBlockProfileRate(1)
defer runtime.SetBlockProfileRate(0)
for _, test := range tests {
test.f()
}
t.Run("debug=1", func(t *testing.T) {
var w bytes.Buffer
Lookup("block").WriteTo(&w, 1)
prof := w.String()
if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
t.Fatalf("Bad profile header:\n%v", prof)
}
if strings.HasSuffix(prof, "#\t0x0\n\n") {
t.Errorf("Useless 0 suffix:\n%v", prof)
}
for _, test := range tests {
if !regexp.MustCompile(strings.ReplaceAll(test.re, "\t", "\t+")).MatchString(prof) {
t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
}
}
})
t.Run("proto", func(t *testing.T) {
// proto format
var w bytes.Buffer
Lookup("block").WriteTo(&w, 0)
p, err := profile.Parse(&w)
if err != nil {
t.Fatalf("failed to parse profile: %v", err)
}
t.Logf("parsed proto: %s", p)
if err := p.CheckValid(); err != nil {
t.Fatalf("invalid profile: %v", err)
}
stks := stacks(p)
for _, test := range tests {
if !containsStack(stks, test.stk) {
t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk)
}
}
})
}
func stacks(p *profile.Profile) (res [][]string) {
for _, s := range p.Sample {
var stk []string
for _, l := range s.Location {
for _, line := range l.Line {
stk = append(stk, line.Function.Name)
}
}
res = append(res, stk)
}
return res
}
func containsStack(got [][]string, want []string) bool {
for _, stk := range got {
if len(stk) < len(want) {
continue
}
for i, f := range want {
if f != stk[i] {
break
}
if i == len(want)-1 {
return true
}
}
}
return false
}
const blockDelay = 10 * time.Millisecond
func blockChanRecv() {
c := make(chan bool)
go func() {
time.Sleep(blockDelay)
c <- true
}()
<-c
}
func blockChanSend() {
c := make(chan bool)
go func() {
time.Sleep(blockDelay)
<-c
}()
c <- true
}
func blockChanClose() {
c := make(chan bool)
go func() {
time.Sleep(blockDelay)
close(c)
}()
<-c
}
func blockSelectRecvAsync() {
const numTries = 3
c := make(chan bool, 1)
c2 := make(chan bool, 1)
go func() {
for i := 0; i < numTries; i++ {
time.Sleep(blockDelay)
c <- true
}
}()
for i := 0; i < numTries; i++ {
select {
case <-c:
case <-c2:
}
}
}
func blockSelectSendSync() {
c := make(chan bool)
c2 := make(chan bool)
go func() {
time.Sleep(blockDelay)
<-c
}()
select {
case c <- true:
case c2 <- true:
}
}
func blockMutex() {
var mu sync.Mutex
mu.Lock()
go func() {
time.Sleep(blockDelay)
mu.Unlock()
}()
// Note: Unlock releases mu before recording the mutex event,
// so it's theoretically possible for this to proceed and
// capture the profile before the event is recorded. As long
// as this is blocked before the unlock happens, it's okay.
mu.Lock()
}
func blockCond() {
var mu sync.Mutex
c := sync.NewCond(&mu)
mu.Lock()
go func() {
time.Sleep(blockDelay)
mu.Lock()
c.Signal()
mu.Unlock()
}()
c.Wait()
mu.Unlock()
}
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
// See http://golang.org/cl/299991.
func TestBlockProfileBias(t *testing.T) {
rate := int(1000) // arbitrary value
runtime.SetBlockProfileRate(rate)
defer runtime.SetBlockProfileRate(0)
// simulate blocking events
blockFrequentShort(rate)
blockInfrequentLong(rate)
var w bytes.Buffer
Lookup("block").WriteTo(&w, 0)
p, err := profile.Parse(&w)
if err != nil {
t.Fatalf("failed to parse profile: %v", err)
}
t.Logf("parsed proto: %s", p)
il := float64(-1) // blockInfrequentLong duration
fs := float64(-1) // blockFrequentShort duration
for _, s := range p.Sample {
for _, l := range s.Location {
for _, line := range l.Line {
if len(s.Value) < 2 {
t.Fatal("block profile has less than 2 sample types")
}
if line.Function.Name == "runtime/pprof.blockInfrequentLong" {
il = float64(s.Value[1])
} else if line.Function.Name == "runtime/pprof.blockFrequentShort" {
fs = float64(s.Value[1])
}
}
}
}
if il == -1 || fs == -1 {
t.Fatal("block profile is missing expected functions")
}
// stddev of bias from 100 runs on local machine multiplied by 10x
const threshold = 0.2
if bias := (il - fs) / il; math.Abs(bias) > threshold {
t.Fatalf("bias: abs(%f) > %f", bias, threshold)
} else {
t.Logf("bias: abs(%f) < %f", bias, threshold)
}
}
// blockFrequentShort produces 100000 block events with an average duration of
// rate / 10.
func blockFrequentShort(rate int) {
for i := 0; i < 100000; i++ {
blockevent(int64(rate/10), 1)
}
}
// blockFrequentShort produces 10000 block events with an average duration of
// rate.
func blockInfrequentLong(rate int) {
for i := 0; i < 10000; i++ {
blockevent(int64(rate), 1)
}
}
// Used by TestBlockProfileBias.
//go:linkname blockevent runtime.blockevent
func blockevent(cycles int64, skip int)
func TestMutexProfile(t *testing.T) {
// Generate mutex profile
old := runtime.SetMutexProfileFraction(1)
defer runtime.SetMutexProfileFraction(old)
if old != 0 {
t.Fatalf("need MutexProfileRate 0, got %d", old)
}
blockMutex()
t.Run("debug=1", func(t *testing.T) {
var w bytes.Buffer
Lookup("mutex").WriteTo(&w, 1)
prof := w.String()
t.Logf("received profile: %v", prof)
if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
t.Errorf("Bad profile header:\n%v", prof)
}
prof = strings.Trim(prof, "\n")
lines := strings.Split(prof, "\n")
if len(lines) != 6 {
t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
}
if len(lines) < 6 {
return
}
// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
t.Errorf("%q didn't match %q", lines[3], r2)
}
r3 := "^#.*runtime/pprof.blockMutex.*$"
if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
t.Errorf("%q didn't match %q", lines[5], r3)
}
t.Logf(prof)
})
t.Run("proto", func(t *testing.T) {
// proto format
var w bytes.Buffer
Lookup("mutex").WriteTo(&w, 0)
p, err := profile.Parse(&w)
if err != nil {
t.Fatalf("failed to parse profile: %v", err)
}
t.Logf("parsed proto: %s", p)
if err := p.CheckValid(); err != nil {
t.Fatalf("invalid profile: %v", err)
}
stks := stacks(p)
for _, want := range [][]string{
{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"},
} {
if !containsStack(stks, want) {
t.Errorf("No matching stack entry for %+v", want)
}
}
})
}
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
func func1(c chan int) { <-c }
func func2(c chan int) { <-c }
func func3(c chan int) { <-c }
func func4(c chan int) { <-c }
func TestGoroutineCounts(t *testing.T) {
// Setting GOMAXPROCS to 1 ensures we can force all goroutines to the
// desired blocking point.
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
c := make(chan int)
for i := 0; i < 100; i++ {
switch {
case i%10 == 0:
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
go func1(c)
case i%2 == 0:
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
go func2(c)
default:
go func3(c)
}
// Let goroutines block on channel
for j := 0; j < 5; j++ {
runtime.Gosched()
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
}
}
ctx := context.Background()
// ... and again, with labels this time (just with fewer iterations to keep
// sorting deterministic).
Do(ctx, Labels("label", "value"), func(context.Context) {
for i := 0; i < 89; i++ {
switch {
case i%10 == 0:
go func1(c)
case i%2 == 0:
go func2(c)
default:
go func3(c)
}
// Let goroutines block on channel
for j := 0; j < 5; j++ {
runtime.Gosched()
}
}
})
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
var w bytes.Buffer
goroutineProf := Lookup("goroutine")
// Check debug profile
goroutineProf.WriteTo(&w, 1)
prof := w.String()
labels := labelMap{"label": "value"}
labelStr := "\n# labels: " + labels.String()
if !containsInOrder(prof, "\n50 @ ", "\n44 @", labelStr,
"\n40 @", "\n36 @", labelStr, "\n10 @", "\n9 @", labelStr, "\n1 @") {
t.Errorf("expected sorted goroutine counts with Labels:\n%s", prof)
}
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
// Check proto profile
w.Reset()
goroutineProf.WriteTo(&w, 0)
p, err := profile.Parse(&w)
if err != nil {
t.Errorf("error parsing protobuf profile: %v", err)
}
if err := p.CheckValid(); err != nil {
t.Errorf("protobuf profile is invalid: %v", err)
}
expectedLabels := map[int64]map[string]string{
50: map[string]string{},
44: map[string]string{"label": "value"},
40: map[string]string{},
36: map[string]string{"label": "value"},
10: map[string]string{},
9: map[string]string{"label": "value"},
1: map[string]string{},
}
if !containsCountsLabels(p, expectedLabels) {
t.Errorf("expected count profile to contain goroutines with counts and labels %v, got %v",
expectedLabels, p)
}
runtime/pprof: sort counted profiles by count This is especially helpful in programs with very large numbers of goroutines: the bulk of the goroutines will show up at the top. Before: 1 @ 0x86ab8 0x86893 0x82164 0x8e7ce 0x7b798 0x5b871 # 0x86ab8 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:545 # 0x86893 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:507 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8e7ce runtime/pprof_test.TestGoroutineCounts+0x15e /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e5b6 0x5b871 # 0x8e5b6 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e656 0x5b871 # 0x8e656 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8e606 0x5b871 # 0x8e606 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 After: 50 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ecc6 0x5b871 # 0x8ecc6 runtime/pprof_test.func3+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:584 40 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec76 0x5b871 # 0x8ec76 runtime/pprof_test.func2+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:583 10 @ 0x2d373 0x2d434 0x560f 0x516b 0x8ec26 0x5b871 # 0x8ec26 runtime/pprof_test.func1+0x36 /Users/rsc/go/src/runtime/pprof/pprof_test.go:582 1 @ 0x2d373 0x2d434 0x560f 0x516b 0x7cd42 0x7b861 0x2297 0x2cf90 0x5b871 # 0x7cd42 testing.RunTests+0x8d2 /Users/rsc/go/src/testing/testing.go:583 # 0x7b861 testing.(*M).Run+0x81 /Users/rsc/go/src/testing/testing.go:515 # 0x2297 main.main+0x117 runtime/pprof/_test/_testmain.go:72 # 0x2cf90 runtime.main+0x2b0 /Users/rsc/go/src/runtime/proc.go:188 1 @ 0x87128 0x86f03 0x82164 0x8ee30 0x7b798 0x5b871 # 0x87128 runtime/pprof.writeRuntimeProfile+0xb8 /Users/rsc/go/src/runtime/pprof/pprof.go:566 # 0x86f03 runtime/pprof.writeGoroutine+0x93 /Users/rsc/go/src/runtime/pprof/pprof.go:528 # 0x82164 runtime/pprof.(*Profile).WriteTo+0xd4 /Users/rsc/go/src/runtime/pprof/pprof.go:236 # 0x8ee30 runtime/pprof_test.TestGoroutineCounts+0x150 /Users/rsc/go/src/runtime/pprof/pprof_test.go:603 # 0x7b798 testing.tRunner+0x98 /Users/rsc/go/src/testing/testing.go:473 Change-Id: I43de9eee2d96f9c46f7b0fbe099a0571164324f5 Reviewed-on: https://go-review.googlesource.com/20107 Reviewed-by: Ian Lance Taylor <iant@golang.org>
2016-03-02 12:32:38 -05:00
close(c)
time.Sleep(10 * time.Millisecond) // let goroutines exit
}
func containsInOrder(s string, all ...string) bool {
for _, t := range all {
i := strings.Index(s, t)
if i < 0 {
return false
}
s = s[i+len(t):]
}
return true
}
func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[string]string) bool {
m := make(map[int64]int)
type nkey struct {
count int64
key, val string
}
n := make(map[nkey]int)
for c, kv := range countLabels {
m[c]++
for k, v := range kv {
n[nkey{
count: c,
key: k,
val: v,
}]++
}
}
for _, s := range prof.Sample {
// The count is the single value in the sample
if len(s.Value) != 1 {
return false
}
m[s.Value[0]]--
for k, vs := range s.Label {
for _, v := range vs {
n[nkey{
count: s.Value[0],
key: k,
val: v,
}]--
}
}
}
for _, n := range m {
if n > 0 {
return false
}
}
for _, ncnt := range n {
if ncnt != 0 {
return false
}
}
return true
}
var emptyCallStackTestRun int64
// Issue 18836.
func TestEmptyCallStack(t *testing.T) {
name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun)
emptyCallStackTestRun++
t.Parallel()
var buf bytes.Buffer
p := NewProfile(name)
p.Add("foo", 47674)
p.WriteTo(&buf, 1)
p.Remove("foo")
got := buf.String()
prefix := name + " profile: total 1\n"
if !strings.HasPrefix(got, prefix) {
t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix)
}
lostevent := "lostProfileEvent"
if !strings.Contains(got, lostevent) {
t.Fatalf("got:\n\t%q\ndoes not contain:\n\t%q\n", got, lostevent)
}
}
runtime: handle morestack system stack transition in gentraceback gentraceback handles system stack transitions, but only when they're done by systemstack(). Handle morestack() too. I tried to do this generically but systemstack and morestack are actually *very* different functions. Most notably, systemstack returns "normally", just messes with $sp along the way. morestack never returns at all -- it calls newstack, and newstack then jumps both stacks and functions back to whoever called morestack. I couldn't think of a way to handle both of them generically. So don't. The current implementation does not include systemstack on the generated traceback. That's partly because I don't know how to find its stack frame reliably, and partly because the current structure of the code wants to do the transition before the call, not after. If we're willing to assume that morestack's stack frame is 0 size, this could probably be fixed. For posterity's sake, alternatives tried: - Have morestack put a dummy function into schedbuf, like systemstack does. This actually worked (see patchset 1) but more by a series of coincidences than by deliberate design. The biggest coincidence was that because morestack_switch was a RET and its stack was 0 size, it actually worked to jump back to it at the end of newstack -- it would just return to the caller of morestack. Way too subtle for me, and also a little slower than just jumping directly. - Put morestack's PC and SP into schedbuf, so that gentraceback could treat it like a normal function except for the changing SP. This was a terrible idea and caused newstack to reenter morestack in a completely unreasonable state. To make testing possible I did a small redesign of testCPUProfile to take a callback that defines how to check if the conditions pass to it are satisfied. This seemed better than making the syntax of the "need" strings even more complicated. Updates #25943 Change-Id: I9271a30a976f80a093a3d4d1c7e9ec226faf74b4 Reviewed-on: https://go-review.googlesource.com/126795 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-08-17 16:32:08 -04:00
// stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key
// and value and has funcname somewhere in the stack.
func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
semi := strings.Index(spec, ";")
if semi == -1 {
panic("no semicolon in key/value spec")
}
kv := strings.SplitN(spec[semi+1:], "=", 2)
if len(kv) != 2 {
panic("missing = in key/value spec")
}
if !contains(labels[kv[0]], kv[1]) {
return false
}
return stackContains(spec[:semi], count, stk, labels)
}
func TestCPUProfileLabel(t *testing.T) {
testCPUProfile(t, stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) {
Do(context.Background(), Labels("key", "value"), func(context.Context) {
cpuHogger(cpuHog1, &salt1, dur)
})
})
}
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
func TestLabelRace(t *testing.T) {
// Test the race detector annotations for synchronization
// between settings labels and consuming them from the
// profile.
testCPUProfile(t, stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, nil, func(dur time.Duration) {
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
start := time.Now()
var wg sync.WaitGroup
for time.Since(start) < dur {
var salts [10]int
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
for i := 0; i < 10; i++ {
wg.Add(1)
go func(j int) {
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
Do(context.Background(), Labels("key", "value"), func(context.Context) {
cpuHogger(cpuHog1, &salts[j], time.Millisecond)
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
})
wg.Done()
}(i)
runtime: fix false positive race in profile label reading Because profile labels are copied from the goroutine into the tag buffer by the signal handler, there's a carefully-crafted set of race detector annotations to create the necessary happens-before edges between setting a goroutine's profile label and retrieving it from the profile tag buffer. Given the constraints of the signal handler, we have to approximate the true synchronization behavior. Currently, that approximation is too weak. Ideally, runtime_setProfLabel would perform a store-release on &getg().labels and copying each label into the profile would perform a load-acquire on &getg().labels. This would create the necessary happens-before edges through each individual g.labels object. Since we can't do this in the signal handler, we instead synchronize on a "labelSync" global. The problem occurs with the following sequence: 1. Goroutine 1 calls setProfLabel, which does a store-release on labelSync. 2. Goroutine 2 calls setProfLabel, which does a store-release on labelSync. 3. Goroutine 3 reads the profile, which does a load-acquire on labelSync. The problem is that the load-acquire only synchronizes with the *most recent* store-release to labelSync, and the two store-releases don't synchronize with each other. So, once goroutine 3 touches the label set by goroutine 1, we report a race. The solution is to use racereleasemerge. This is like a read-modify-write, rather than just a store-release. Each RMW of labelSync in runtime_setProfLabel synchronizes with the previous RMW of labelSync, and this ultimately carries forward to the load-acquire, so it synchronizes with *all* setProfLabel operations, not just the most recent. Change-Id: Iab58329b156122002fff12cfe64fbeacb31c9613 Reviewed-on: https://go-review.googlesource.com/56670 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
2017-08-17 18:40:07 -04:00
}
wg.Wait()
}
})
}
// Check that there is no deadlock when the program receives SIGPROF while in
// 64bit atomics' critical section. Used to happen on mips{,le}. See #20146.
func TestAtomicLoadStore64(t *testing.T) {
f, err := os.CreateTemp("", "profatomic")
if err != nil {
t.Fatalf("TempFile: %v", err)
}
defer os.Remove(f.Name())
defer f.Close()
if err := StartCPUProfile(f); err != nil {
t.Fatal(err)
}
defer StopCPUProfile()
var flag uint64
done := make(chan bool, 1)
go func() {
for atomic.LoadUint64(&flag) == 0 {
runtime.Gosched()
}
done <- true
}()
time.Sleep(50 * time.Millisecond)
atomic.StoreUint64(&flag, 1)
<-done
}
func TestTracebackAll(t *testing.T) {
// With gccgo, if a profiling signal arrives at the wrong time
// during traceback, it may crash or hang. See issue #29448.
f, err := os.CreateTemp("", "proftraceback")
if err != nil {
t.Fatalf("TempFile: %v", err)
}
defer os.Remove(f.Name())
defer f.Close()
if err := StartCPUProfile(f); err != nil {
t.Fatal(err)
}
defer StopCPUProfile()
ch := make(chan int)
defer close(ch)
count := 10
for i := 0; i < count; i++ {
go func() {
<-ch // block
}()
}
N := 10000
if testing.Short() {
N = 500
}
buf := make([]byte, 10*1024)
for i := 0; i < N; i++ {
runtime.Stack(buf, true)
}
}
// TestTryAdd tests the cases that are hard to test with real program execution.
//
// For example, the current go compilers may not always inline functions
// involved in recursion but that may not be true in the future compilers. This
// tests such cases by using fake call sequences and forcing the profile build
// utilizing translateCPUProfile defined in proto_test.go
func TestTryAdd(t *testing.T) {
if _, found := findInlinedCall(inlinedCallerDump, 4<<10); !found {
t.Skip("Can't determine whether anything was inlined into inlinedCallerDump.")
}
// inlinedCallerDump
// inlinedCalleeDump
pcs := make([]uintptr, 2)
inlinedCallerDump(pcs)
inlinedCallerStack := make([]uint64, 2)
for i := range pcs {
inlinedCallerStack[i] = uint64(pcs[i])
}
if _, found := findInlinedCall(recursionChainBottom, 4<<10); !found {
t.Skip("Can't determine whether anything was inlined into recursionChainBottom.")
}
// recursionChainTop
// recursionChainMiddle
// recursionChainBottom
// recursionChainTop
// recursionChainMiddle
// recursionChainBottom
pcs = make([]uintptr, 6)
recursionChainTop(1, pcs)
recursionStack := make([]uint64, len(pcs))
for i := range pcs {
recursionStack[i] = uint64(pcs[i])
}
period := int64(2000 * 1000) // 1/500*1e9 nanosec.
testCases := []struct {
name string
input []uint64 // following the input format assumed by profileBuilder.addCPUData.
wantLocs [][]string // ordered location entries with function names.
wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs.
}{{
// Sanity test for a normal, complete stack trace.
name: "full_stack_trace",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
},
wantLocs: [][]string{
{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
},
wantSamples: []*profile.Sample{
{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
name: "bug35538",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
// Fake frame: tryAdd will have inlinedCallerDump
// (stack[1]) on the deck when it encounters the next
// inline function. It should accept this.
7, 0, 10, inlinedCallerStack[0], inlinedCallerStack[1], inlinedCallerStack[0], inlinedCallerStack[1],
5, 0, 20, inlinedCallerStack[0], inlinedCallerStack[1],
},
wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
wantSamples: []*profile.Sample{
{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
name: "bug38096",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
// count (data[2]) == 0 && len(stk) == 1 is an overflow
// entry. The "stk" entry is actually the count.
4, 0, 0, 4242,
},
wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}},
wantSamples: []*profile.Sample{
{Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
// If a function is directly called recursively then it must
// not be inlined in the caller.
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
//
// N.B. We're generating an impossible profile here, with a
// recursive inlineCalleeDump call. This is simulating a non-Go
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
// function that looks like an inlined Go function other than
// its recursive property. See pcDeck.tryAdd.
name: "directly_recursive_func_is_not_inlined",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
5, 0, 30, inlinedCallerStack[0], inlinedCallerStack[0],
4, 0, 40, inlinedCallerStack[0],
},
// inlinedCallerDump shows up here because
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
// runtime_expandFinalInlineFrame adds it to the stack frame.
wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump"}, {"runtime/pprof.inlinedCallerDump"}},
wantSamples: []*profile.Sample{
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
},
}, {
name: "recursion_chain_inline",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
9, 0, 10, recursionStack[0], recursionStack[1], recursionStack[2], recursionStack[3], recursionStack[4], recursionStack[5],
},
wantLocs: [][]string{
{"runtime/pprof.recursionChainBottom"},
{
"runtime/pprof.recursionChainMiddle",
"runtime/pprof.recursionChainTop",
"runtime/pprof.recursionChainBottom",
},
{
"runtime/pprof.recursionChainMiddle",
"runtime/pprof.recursionChainTop",
"runtime/pprof.TestTryAdd", // inlined into the test.
},
},
wantSamples: []*profile.Sample{
{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}, {ID: 3}}},
},
}, {
name: "truncated_stack_trace_later",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
4, 0, 60, inlinedCallerStack[0],
},
wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
wantSamples: []*profile.Sample{
{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
{Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
name: "truncated_stack_trace_first",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
4, 0, 70, inlinedCallerStack[0],
5, 0, 80, inlinedCallerStack[0], inlinedCallerStack[1],
},
wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
// We can recover the inlined caller from a truncated stack.
name: "truncated_stack_trace_only",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
4, 0, 70, inlinedCallerStack[0],
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
},
wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
// The same location is used for duplicated stacks.
name: "truncated_stack_trace_twice",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
4, 0, 70, inlinedCallerStack[0],
// Fake frame: add a fake call to
// inlinedCallerDump to prevent this sample
// from getting merged into above.
5, 0, 80, inlinedCallerStack[1], inlinedCallerStack[0],
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
},
wantLocs: [][]string{
{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
{"runtime/pprof.inlinedCallerDump"},
},
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
runtime/pprof: expand final stack frame to avoid truncation When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
2020-02-28 14:16:41 -05:00
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
},
}}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
p, err := translateCPUProfile(tc.input)
if err != nil {
t.Fatalf("translating profile: %v", err)
}
t.Logf("Profile: %v\n", p)
// One location entry with all inlined functions.
var gotLoc [][]string
for _, loc := range p.Location {
var names []string
for _, line := range loc.Line {
names = append(names, line.Function.Name)
}
gotLoc = append(gotLoc, names)
}
if got, want := fmtJSON(gotLoc), fmtJSON(tc.wantLocs); got != want {
t.Errorf("Got Location = %+v\n\twant %+v", got, want)
}
// All samples should point to one location.
var gotSamples []*profile.Sample
for _, sample := range p.Sample {
var locs []*profile.Location
for _, loc := range sample.Location {
locs = append(locs, &profile.Location{ID: loc.ID})
}
gotSamples = append(gotSamples, &profile.Sample{Value: sample.Value, Location: locs})
}
if got, want := fmtJSON(gotSamples), fmtJSON(tc.wantSamples); got != want {
t.Errorf("Got Samples = %+v\n\twant %+v", got, want)
}
})
}
}