mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime/pprof: remove hard-coded sleep in CPU profile reader
The CPU profiler reader goroutine has a hard-coded 100ms sleep between reads of the CPU profile sample buffer. This is done because waking up the CPU profile reader is not signal-safe on some platforms. As a consequence, stopping the profiler takes 200ms (one iteration to read the last samples and one to see the "eof"), and on many-core systems the reader does not wake up frequently enought to keep up with incoming data. This CL removes the sleep where it is safe to do so, following a suggestion by Austin Clements in the comments on CL 445375. We let the reader fully block, and wake up the reader when the buffer is over half-full. Fixes #63043 Updates #56029 Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-arm64-longtest,gotip-linux-386-longtest Change-Id: I9f7e7e9918a4a6f16e80f6aaf33103126568a81f Reviewed-on: https://go-review.googlesource.com/c/go/+/610815 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Mark Freeman <markfreeman@google.com>
This commit is contained in:
parent
b604148c4e
commit
121bc3e464
3 changed files with 103 additions and 3 deletions
|
|
@ -924,7 +924,10 @@ func profileWriter(w io.Writer) {
|
|||
b := newProfileBuilder(w)
|
||||
var err error
|
||||
for {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
|
||||
// see runtime_pprof_readProfile
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
data, tags, eof := readProfile()
|
||||
if e := b.addCPUData(data, tags); e != nil && err == nil {
|
||||
err = e
|
||||
|
|
|
|||
|
|
@ -38,6 +38,10 @@ import (
|
|||
// be returned by read. By definition, r ≤ rNext ≤ w (before wraparound),
|
||||
// and rNext is only used by the reader, so it can be accessed without atomics.
|
||||
//
|
||||
// If the reader is blocked waiting for more data, the writer will wake it up if
|
||||
// either the buffer is more than half full, or when the writer sets the eof
|
||||
// marker or writes overflow entries (described below.)
|
||||
//
|
||||
// If the writer gets ahead of the reader, so that the buffer fills,
|
||||
// future writes are discarded and replaced in the output stream by an
|
||||
// overflow entry, which has size 2+hdrsize+1, time set to the time of
|
||||
|
|
@ -378,11 +382,28 @@ func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []u
|
|||
// Racing with reader setting flag bits in b.w, to avoid lost wakeups.
|
||||
old := b.w.load()
|
||||
new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1)
|
||||
// We re-load b.r here to reduce the likelihood of early wakeups
|
||||
// if the reader already consumed some data between the last
|
||||
// time we read b.r and now. This isn't strictly necessary.
|
||||
unread := countSub(new.dataCount(), b.r.load().dataCount())
|
||||
if unread < 0 {
|
||||
// The new count overflowed and wrapped around.
|
||||
unread += len(b.data)
|
||||
}
|
||||
wakeupThreshold := len(b.data) / 2
|
||||
if unread < wakeupThreshold {
|
||||
// Carry over the sleeping flag since we're not planning
|
||||
// to wake the reader yet
|
||||
new |= old & profReaderSleeping
|
||||
}
|
||||
if !b.w.cas(old, new) {
|
||||
continue
|
||||
}
|
||||
// If there was a reader, wake it up.
|
||||
if old&profReaderSleeping != 0 {
|
||||
// If we've hit our high watermark for data in the buffer,
|
||||
// and there is a reader, wake it up.
|
||||
if unread >= wakeupThreshold && old&profReaderSleeping != 0 {
|
||||
// NB: if we reach this point, then the sleeping bit is
|
||||
// cleared in the new b.w value
|
||||
notewakeup(&b.wait)
|
||||
}
|
||||
break
|
||||
|
|
|
|||
|
|
@ -5,8 +5,12 @@
|
|||
package runtime_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"runtime"
|
||||
. "runtime"
|
||||
"slices"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
|
@ -190,3 +194,75 @@ func TestProfBufDoubleWakeup(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestProfBufWakeup(t *testing.T) {
|
||||
b := NewProfBuf(2, 16, 2)
|
||||
var wg sync.WaitGroup
|
||||
wg.Go(func() {
|
||||
read := 0
|
||||
for {
|
||||
rdata, _, eof := b.Read(ProfBufBlocking)
|
||||
if read == 0 && len(rdata) < 8 {
|
||||
t.Errorf("first wake up at less than half full, got %x", rdata)
|
||||
}
|
||||
read += len(rdata)
|
||||
if eof {
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Under the hood profBuf uses notetsleepg when the reader blocks.
|
||||
// Different platforms have different implementations, leading to
|
||||
// different statuses we need to look for to determine whether the
|
||||
// reader is blocked.
|
||||
var waitStatus string
|
||||
switch runtime.GOOS {
|
||||
case "js":
|
||||
waitStatus = "waiting"
|
||||
case "wasip1":
|
||||
waitStatus = "runnable"
|
||||
default:
|
||||
waitStatus = "syscall"
|
||||
}
|
||||
|
||||
// Ensure that the reader is blocked
|
||||
awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
|
||||
// NB: this writes 6 words not 4. Fine for the test.
|
||||
// The reader shouldn't wake up for this
|
||||
b.Write(nil, 1, []uint64{1, 2}, []uintptr{3, 4})
|
||||
|
||||
// The reader should still be blocked
|
||||
//
|
||||
// TODO(nick): this is racy. We could Gosched and still have the reader
|
||||
// blocked in a buggy implementation because it just didn't get a chance
|
||||
// to run
|
||||
awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
|
||||
b.Write(nil, 1, []uint64{5, 6}, []uintptr{7, 8})
|
||||
b.Close()
|
||||
|
||||
// Wait here so we can be sure the reader got the data
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// see also runtime/pprof tests
|
||||
func awaitBlockedGoroutine(state, fName string) {
|
||||
re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime_test\.%s`, regexp.QuoteMeta(state), fName)
|
||||
r := regexp.MustCompile(re)
|
||||
|
||||
buf := make([]byte, 64<<10)
|
||||
for {
|
||||
Gosched()
|
||||
n := Stack(buf, true)
|
||||
if n == len(buf) {
|
||||
// Buffer wasn't large enough for a full goroutine dump.
|
||||
// Resize it and try again.
|
||||
buf = make([]byte, 2*len(buf))
|
||||
continue
|
||||
}
|
||||
const count = 1
|
||||
if len(r.FindAll(buf[:n], -1)) >= count {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue