runtime/pprof: remove hard-coded sleep in CPU profile reader

The CPU profiler reader goroutine has a hard-coded 100ms sleep between
reads of the CPU profile sample buffer. This is done because waking up
the CPU profile reader is not signal-safe on some platforms. As a
consequence, stopping the profiler takes 200ms (one iteration to read
the last samples and one to see the "eof"), and on many-core systems the
reader does not wake up frequently enought to keep up with incoming
data.

This CL removes the sleep where it is safe to do so, following a
suggestion by Austin Clements in the comments on CL 445375. We let the
reader fully block, and wake up the reader when the buffer is over
half-full.

Fixes #63043
Updates #56029

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-arm64-longtest,gotip-linux-386-longtest
Change-Id: I9f7e7e9918a4a6f16e80f6aaf33103126568a81f
Reviewed-on: https://go-review.googlesource.com/c/go/+/610815
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
This commit is contained in:
Nick Ripley 2025-11-21 16:01:29 +00:00
parent b604148c4e
commit 121bc3e464
3 changed files with 103 additions and 3 deletions

View file

@ -924,7 +924,10 @@ func profileWriter(w io.Writer) {
b := newProfileBuilder(w)
var err error
for {
time.Sleep(100 * time.Millisecond)
if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
// see runtime_pprof_readProfile
time.Sleep(100 * time.Millisecond)
}
data, tags, eof := readProfile()
if e := b.addCPUData(data, tags); e != nil && err == nil {
err = e

View file

@ -38,6 +38,10 @@ import (
// be returned by read. By definition, r ≤ rNext ≤ w (before wraparound),
// and rNext is only used by the reader, so it can be accessed without atomics.
//
// If the reader is blocked waiting for more data, the writer will wake it up if
// either the buffer is more than half full, or when the writer sets the eof
// marker or writes overflow entries (described below.)
//
// If the writer gets ahead of the reader, so that the buffer fills,
// future writes are discarded and replaced in the output stream by an
// overflow entry, which has size 2+hdrsize+1, time set to the time of
@ -378,11 +382,28 @@ func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []u
// Racing with reader setting flag bits in b.w, to avoid lost wakeups.
old := b.w.load()
new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1)
// We re-load b.r here to reduce the likelihood of early wakeups
// if the reader already consumed some data between the last
// time we read b.r and now. This isn't strictly necessary.
unread := countSub(new.dataCount(), b.r.load().dataCount())
if unread < 0 {
// The new count overflowed and wrapped around.
unread += len(b.data)
}
wakeupThreshold := len(b.data) / 2
if unread < wakeupThreshold {
// Carry over the sleeping flag since we're not planning
// to wake the reader yet
new |= old & profReaderSleeping
}
if !b.w.cas(old, new) {
continue
}
// If there was a reader, wake it up.
if old&profReaderSleeping != 0 {
// If we've hit our high watermark for data in the buffer,
// and there is a reader, wake it up.
if unread >= wakeupThreshold && old&profReaderSleeping != 0 {
// NB: if we reach this point, then the sleeping bit is
// cleared in the new b.w value
notewakeup(&b.wait)
}
break

View file

@ -5,8 +5,12 @@
package runtime_test
import (
"fmt"
"regexp"
"runtime"
. "runtime"
"slices"
"sync"
"testing"
"time"
"unsafe"
@ -190,3 +194,75 @@ func TestProfBufDoubleWakeup(t *testing.T) {
}
}
}
func TestProfBufWakeup(t *testing.T) {
b := NewProfBuf(2, 16, 2)
var wg sync.WaitGroup
wg.Go(func() {
read := 0
for {
rdata, _, eof := b.Read(ProfBufBlocking)
if read == 0 && len(rdata) < 8 {
t.Errorf("first wake up at less than half full, got %x", rdata)
}
read += len(rdata)
if eof {
return
}
}
})
// Under the hood profBuf uses notetsleepg when the reader blocks.
// Different platforms have different implementations, leading to
// different statuses we need to look for to determine whether the
// reader is blocked.
var waitStatus string
switch runtime.GOOS {
case "js":
waitStatus = "waiting"
case "wasip1":
waitStatus = "runnable"
default:
waitStatus = "syscall"
}
// Ensure that the reader is blocked
awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
// NB: this writes 6 words not 4. Fine for the test.
// The reader shouldn't wake up for this
b.Write(nil, 1, []uint64{1, 2}, []uintptr{3, 4})
// The reader should still be blocked
//
// TODO(nick): this is racy. We could Gosched and still have the reader
// blocked in a buggy implementation because it just didn't get a chance
// to run
awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
b.Write(nil, 1, []uint64{5, 6}, []uintptr{7, 8})
b.Close()
// Wait here so we can be sure the reader got the data
wg.Wait()
}
// see also runtime/pprof tests
func awaitBlockedGoroutine(state, fName string) {
re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime_test\.%s`, regexp.QuoteMeta(state), fName)
r := regexp.MustCompile(re)
buf := make([]byte, 64<<10)
for {
Gosched()
n := Stack(buf, true)
if n == len(buf) {
// Buffer wasn't large enough for a full goroutine dump.
// Resize it and try again.
buf = make([]byte, 2*len(buf))
continue
}
const count = 1
if len(r.FindAll(buf[:n], -1)) >= count {
return
}
}
}