mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime,runtime/pprof: clean up goroutine leak profile writing
Cleaned up goroutine leak profile extraction:
- removed the acquisition of goroutineProfile semaphore
- inlined the call to saveg when recording stacks instead of using
doRecordGoroutineProfile, which had side-effects over
goroutineProfile fields.
Added regression tests for goroutine leak profiling frontend for binary
and debug=1 profile formats.
Added stress tests for concurrent goroutine and goroutine leak profile requests.
Change-Id: I55c1bcef11e9a7fb7699b4c5a2353e594d3e7173
GitHub-Last-Rev: 5e9eb3b1d8
GitHub-Pull-Request: golang/go#76045
Reviewed-on: https://go-review.googlesource.com/c/go/+/714580
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
b8b84b789e
commit
8873e8bea2
3 changed files with 253 additions and 16 deletions
|
|
@ -1261,7 +1261,7 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P
|
||||||
|
|
||||||
//go:linkname pprof_goroutineLeakProfileWithLabels
|
//go:linkname pprof_goroutineLeakProfileWithLabels
|
||||||
func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
|
func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
|
||||||
return goroutineLeakProfileWithLabelsConcurrent(p, labels)
|
return goroutineLeakProfileWithLabels(p, labels)
|
||||||
}
|
}
|
||||||
|
|
||||||
// labels may be nil. If labels is non-nil, it must have the same length as p.
|
// labels may be nil. If labels is non-nil, it must have the same length as p.
|
||||||
|
|
@ -1323,30 +1323,26 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab
|
||||||
return work.goroutineLeak.count, false
|
return work.goroutineLeak.count, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the same semaphore as goroutineProfileWithLabelsConcurrent,
|
|
||||||
// because ultimately we still use goroutine profiles.
|
|
||||||
semacquire(&goroutineProfile.sema)
|
|
||||||
|
|
||||||
// Unlike in goroutineProfileWithLabelsConcurrent, we don't need to
|
|
||||||
// save the current goroutine stack, because it is obviously not leaked.
|
|
||||||
|
|
||||||
pcbuf := makeProfStack() // see saveg() for explanation
|
pcbuf := makeProfStack() // see saveg() for explanation
|
||||||
|
|
||||||
// Prepare a profile large enough to store all leaked goroutines.
|
// Prepare a profile large enough to store all leaked goroutines.
|
||||||
n = work.goroutineLeak.count
|
n = work.goroutineLeak.count
|
||||||
|
|
||||||
if n > len(p) {
|
if n > len(p) {
|
||||||
// There's not enough space in p to store the whole profile, so (per the
|
// There's not enough space in p to store the whole profile, so
|
||||||
// contract of runtime.GoroutineProfile) we're not allowed to write to p
|
// we're not allowed to write to p at all and must return n, false.
|
||||||
// at all and must return n, false.
|
|
||||||
semrelease(&goroutineProfile.sema)
|
|
||||||
return n, false
|
return n, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visit each leaked goroutine and try to record its stack.
|
// Visit each leaked goroutine and try to record its stack.
|
||||||
|
var offset int
|
||||||
forEachGRace(func(gp1 *g) {
|
forEachGRace(func(gp1 *g) {
|
||||||
if readgstatus(gp1) == _Gleaked {
|
if readgstatus(gp1)&^_Gscan == _Gleaked {
|
||||||
doRecordGoroutineProfile(gp1, pcbuf)
|
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &p[offset], pcbuf) })
|
||||||
|
if labels != nil {
|
||||||
|
labels[offset] = gp1.labels
|
||||||
|
}
|
||||||
|
offset++
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -1354,7 +1350,6 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab
|
||||||
raceacquire(unsafe.Pointer(&labelSync))
|
raceacquire(unsafe.Pointer(&labelSync))
|
||||||
}
|
}
|
||||||
|
|
||||||
semrelease(&goroutineProfile.sema)
|
|
||||||
return n, true
|
return n, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -228,6 +228,31 @@ var mutexProfile = &Profile{
|
||||||
write: writeMutex,
|
write: writeMutex,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// goroutineLeakProfileLock ensures that the goroutine leak profile writer observes the
|
||||||
|
// leaked goroutines discovered during the goroutine leak detection GC cycle
|
||||||
|
// that was triggered by the profile request.
|
||||||
|
// This prevents a race condition between the garbage collector and the profile writer
|
||||||
|
// when multiple profile requests are issued concurrently: the status of leaked goroutines
|
||||||
|
// is reset to _Gwaiting at the beginning of a leak detection cycle, which may lead the
|
||||||
|
// profile writer of another concurrent request to produce an incomplete profile.
|
||||||
|
//
|
||||||
|
// Example trace:
|
||||||
|
//
|
||||||
|
// G1 | GC | G2
|
||||||
|
// ----------------------+-----------------------------+---------------------
|
||||||
|
// Request profile | . | .
|
||||||
|
// . | . | Request profile
|
||||||
|
// . | [G1] Resets leaked g status | .
|
||||||
|
// . | [G1] Leaks detected | .
|
||||||
|
// . | <New cycle> | .
|
||||||
|
// . | [G2] Resets leaked g status | .
|
||||||
|
// Write profile | . | .
|
||||||
|
// . | [G2] Leaks detected | .
|
||||||
|
// . | . | Write profile
|
||||||
|
// ----------------------+-----------------------------+---------------------
|
||||||
|
// Incomplete profile |+++++++++++++++++++++++++++++| Complete profile
|
||||||
|
var goroutineLeakProfileLock sync.Mutex
|
||||||
|
|
||||||
func lockProfiles() {
|
func lockProfiles() {
|
||||||
profiles.mu.Lock()
|
profiles.mu.Lock()
|
||||||
if profiles.m == nil {
|
if profiles.m == nil {
|
||||||
|
|
@ -763,6 +788,15 @@ func writeGoroutine(w io.Writer, debug int) error {
|
||||||
// writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection.
|
// writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection.
|
||||||
// It then writes the goroutine profile, filtering for leaked goroutines.
|
// It then writes the goroutine profile, filtering for leaked goroutines.
|
||||||
func writeGoroutineLeak(w io.Writer, debug int) error {
|
func writeGoroutineLeak(w io.Writer, debug int) error {
|
||||||
|
// Acquire the goroutine leak detection lock and release
|
||||||
|
// it after the goroutine leak profile is written.
|
||||||
|
//
|
||||||
|
// While the critical section is long, this is needed to prevent
|
||||||
|
// a race condition between the garbage collector and the goroutine
|
||||||
|
// leak profile writer when multiple profile requests are issued concurrently.
|
||||||
|
goroutineLeakProfileLock.Lock()
|
||||||
|
defer goroutineLeakProfileLock.Unlock()
|
||||||
|
|
||||||
// Run the GC with leak detection first so that leaked goroutines
|
// Run the GC with leak detection first so that leaked goroutines
|
||||||
// may transition to the leaked state.
|
// may transition to the leaked state.
|
||||||
runtime_goroutineLeakGC()
|
runtime_goroutineLeakGC()
|
||||||
|
|
|
||||||
|
|
@ -774,7 +774,11 @@ func TestMorestack(t *testing.T) {
|
||||||
for {
|
for {
|
||||||
go func() {
|
go func() {
|
||||||
growstack1()
|
growstack1()
|
||||||
c <- true
|
// NOTE(vsaioc): This goroutine may leak without this select.
|
||||||
|
select {
|
||||||
|
case c <- true:
|
||||||
|
case <-time.After(duration):
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
select {
|
select {
|
||||||
case <-t:
|
case <-t:
|
||||||
|
|
@ -1565,6 +1569,210 @@ func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[strin
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func goroutineLeakExample() {
|
||||||
|
<-make(chan struct{})
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGoroutineLeakProfileConcurrency(t *testing.T) {
|
||||||
|
const leakCount = 3
|
||||||
|
|
||||||
|
testenv.MustHaveParallelism(t)
|
||||||
|
regexLeakCount := regexp.MustCompile("goroutineleak profile: total ")
|
||||||
|
whiteSpace := regexp.MustCompile("\\s+")
|
||||||
|
|
||||||
|
// Regular goroutine profile. Used to check that there is no interference between
|
||||||
|
// the two profile types.
|
||||||
|
goroutineProf := Lookup("goroutine")
|
||||||
|
goroutineLeakProf := goroutineLeakProfile
|
||||||
|
|
||||||
|
// Check that a profile with debug information contains
|
||||||
|
includesLeak := func(t *testing.T, name, s string) {
|
||||||
|
if !strings.Contains(s, "runtime/pprof.goroutineLeakExample") {
|
||||||
|
t.Errorf("%s profile does not contain expected leaked goroutine (runtime/pprof.goroutineLeakExample): %s", name, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
checkFrame := func(i int, j int, locations []*profile.Location, expectedFunctionName string) {
|
||||||
|
if len(locations) <= i {
|
||||||
|
t.Errorf("leaked goroutine stack locations out of range at %d of %d", i+1, len(locations))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
location := locations[i]
|
||||||
|
if len(location.Line) <= j {
|
||||||
|
t.Errorf("leaked goroutine stack location lines out of range at %d of %d", j+1, len(location.Line))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if location.Line[j].Function.Name != expectedFunctionName {
|
||||||
|
t.Errorf("leaked goroutine stack expected %s as the location[%d].Line[%d] but found %s (%s:%d)", expectedFunctionName, i, j, location.Line[j].Function.Name, location.Line[j].Function.Filename, location.Line[j].Line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We use this helper to count the total number of leaked goroutines in the profile.
|
||||||
|
//
|
||||||
|
// NOTE(vsaioc): This value should match for the number of leaks produced in this test,
|
||||||
|
// but other tests could also leak goroutines, in which case we would have a mismatch
|
||||||
|
// when bulk-running tests.
|
||||||
|
//
|
||||||
|
// The two mismatching outcomes are therefore:
|
||||||
|
// - More leaks than expected, which is a correctness issue with other tests.
|
||||||
|
// In this case, this test effectively checks other tests wrt
|
||||||
|
// goroutine leaks during bulk executions (e.g., running all.bash).
|
||||||
|
//
|
||||||
|
// - Fewer leaks than expected; this is an unfortunate symptom of scheduling
|
||||||
|
// non-determinism, which may occur once in a blue moon. We make
|
||||||
|
// a best-effort attempt to allow the expected leaks to occur, by yielding
|
||||||
|
// the main thread, but it is never a guarantee.
|
||||||
|
countLeaks := func(t *testing.T, number int, s string) {
|
||||||
|
// Strip the profile header
|
||||||
|
parts := regexLeakCount.Split(s, -1)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
t.Fatalf("goroutineleak profile does not contain 'goroutineleak profile: total ': %s\nparts: %v", s, parts)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
parts = whiteSpace.Split(parts[1], -1)
|
||||||
|
|
||||||
|
count, err := strconv.ParseInt(parts[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("goroutineleak profile count is not a number: %s\nerror: %v", s, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the total number of leaked goroutines is exactly the expected number.
|
||||||
|
if count != int64(number) {
|
||||||
|
t.Errorf("goroutineleak profile does not contain exactly %d leaked goroutines: %d", number, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
checkLeakStack := func(t *testing.T) func(pc uintptr, locations []*profile.Location, _ map[string][]string) {
|
||||||
|
return func(pc uintptr, locations []*profile.Location, _ map[string][]string) {
|
||||||
|
if pc != leakCount {
|
||||||
|
t.Errorf("expected %d leaked goroutines with specific stack configurations, but found %d", leakCount, pc)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch len(locations) {
|
||||||
|
case 4:
|
||||||
|
// We expect a receive operation. This is the typical stack.
|
||||||
|
checkFrame(0, 0, locations, "runtime.gopark")
|
||||||
|
checkFrame(1, 0, locations, "runtime.chanrecv")
|
||||||
|
checkFrame(2, 0, locations, "runtime.chanrecv1")
|
||||||
|
switch len(locations[3].Line) {
|
||||||
|
case 2:
|
||||||
|
// Running `go func() { goroutineLeakExample() }()` will produce a stack with 2 lines.
|
||||||
|
// The anonymous function will have the call to goroutineLeakExample inlined.
|
||||||
|
checkFrame(3, 1, locations, "runtime/pprof.TestGoroutineLeakProfileConcurrency.func5")
|
||||||
|
fallthrough
|
||||||
|
case 1:
|
||||||
|
// Running `go goroutineLeakExample()` will produce a stack with 1 line.
|
||||||
|
checkFrame(3, 0, locations, "runtime/pprof.goroutineLeakExample")
|
||||||
|
default:
|
||||||
|
t.Errorf("leaked goroutine stack location expected 1 or 2 lines in the 4th location but found %d", len(locations[3].Line))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
message := fmt.Sprintf("leaked goroutine stack expected 4 or 5 locations but found %d", len(locations))
|
||||||
|
for _, location := range locations {
|
||||||
|
for _, line := range location.Line {
|
||||||
|
message += fmt.Sprintf("\n%s:%d", line.Function.Name, line.Line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Errorf("%s", message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Leak some goroutines that will feature in the goroutine leak profile
|
||||||
|
for i := 0; i < leakCount; i++ {
|
||||||
|
go goroutineLeakExample()
|
||||||
|
go func() {
|
||||||
|
// Leak another goroutine that will feature a slightly different stack.
|
||||||
|
// This includes the frame runtime/pprof.TestGoroutineLeakProfileConcurrency.func1.
|
||||||
|
goroutineLeakExample()
|
||||||
|
panic("unreachable")
|
||||||
|
}()
|
||||||
|
// Yield several times to allow the goroutines to leak.
|
||||||
|
runtime.Gosched()
|
||||||
|
runtime.Gosched()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Give all goroutines a chance to leak.
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
|
||||||
|
t.Run("profile contains leak", func(t *testing.T) {
|
||||||
|
var w strings.Builder
|
||||||
|
goroutineLeakProf.WriteTo(&w, 0)
|
||||||
|
parseProfile(t, []byte(w.String()), checkLeakStack(t))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("leak persists between sequential profiling runs", func(t *testing.T) {
|
||||||
|
for i := 0; i < 2; i++ {
|
||||||
|
var w strings.Builder
|
||||||
|
goroutineLeakProf.WriteTo(&w, 0)
|
||||||
|
parseProfile(t, []byte(w.String()), checkLeakStack(t))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Concurrent calls to the goroutine leak profiler should not trigger data races
|
||||||
|
// or corruption.
|
||||||
|
t.Run("overlapping profile requests", func(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < 2; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for ctx.Err() == nil {
|
||||||
|
var w strings.Builder
|
||||||
|
goroutineLeakProf.WriteTo(&w, 1)
|
||||||
|
countLeaks(t, 2*leakCount, w.String())
|
||||||
|
includesLeak(t, "goroutineleak", w.String())
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
})
|
||||||
|
|
||||||
|
// Concurrent calls to the goroutine leak profiler should not trigger data races
|
||||||
|
// or corruption, or interfere with regular goroutine profiles.
|
||||||
|
t.Run("overlapping goroutine and goroutine leak profile requests", func(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < 2; i++ {
|
||||||
|
wg.Add(2)
|
||||||
|
Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for ctx.Err() == nil {
|
||||||
|
var w strings.Builder
|
||||||
|
goroutineLeakProf.WriteTo(&w, 1)
|
||||||
|
countLeaks(t, 2*leakCount, w.String())
|
||||||
|
includesLeak(t, "goroutineleak", w.String())
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for ctx.Err() == nil {
|
||||||
|
var w strings.Builder
|
||||||
|
goroutineProf.WriteTo(&w, 1)
|
||||||
|
// The regular goroutine profile should see the leaked
|
||||||
|
// goroutines. We simply check that the goroutine leak
|
||||||
|
// profile does not corrupt the goroutine profile state.
|
||||||
|
includesLeak(t, "goroutine", w.String())
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestGoroutineProfileConcurrency(t *testing.T) {
|
func TestGoroutineProfileConcurrency(t *testing.T) {
|
||||||
testenv.MustHaveParallelism(t)
|
testenv.MustHaveParallelism(t)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue