runtime: make alloc count metrics truly monotonic

Right now we export alloc count metrics via the runtime/metrics package and mark them as monotonic, but that's not actually true. As an optimization, the runtime assumes a span is always fully allocated before being uncached, and updates the accounting as such. In the rare case that it's wrong, the span has enough information to back out what did not get allocated. This change uses 16 bits of padding in the mspan to house another field that represents the amount of mspan slots filled just as the mspan is cached. This is information is enough to get an exact count, allowing us to make the metrics truly monotonic. Change-Id: Iaff3ca43f8745dc1bbb0232372423e014b89b920 Reviewed-on: https://go-review.googlesource.com/c/go/+/377516 Reviewed-by: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2025-12-08 06:10:04 +00:00 · 2022-01-10 22:59:26 +00:00 · 2022-01-10 22:59:26 +00:00 · 79db59ded9
commit 79db59ded9
parent a0f77e56b7
3 changed files with 125 additions and 45 deletions
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@ -9,6 +9,7 @@ import (
 	"runtime/metrics"
 	"sort"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 	"unsafe"
@ -319,3 +320,88 @@ func BenchmarkReadMetricsLatency(b *testing.B) {
 	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
 	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
 }
+
+var readMetricsSink [1024]interface{}
+
+func TestReadMetricsCumulative(t *testing.T) {
+	// Set up the set of metrics marked cumulative.
+	descs := metrics.All()
+	var samples [2][]metrics.Sample
+	samples[0] = make([]metrics.Sample, len(descs))
+	samples[1] = make([]metrics.Sample, len(descs))
+	total := 0
+	for i := range samples[0] {
+		if !descs[i].Cumulative {
+			continue
+		}
+		samples[0][total].Name = descs[i].Name
+		total++
+	}
+	samples[0] = samples[0][:total]
+	samples[1] = samples[1][:total]
+	copy(samples[1], samples[0])
+
+	// Start some noise in the background.
+	var wg sync.WaitGroup
+	wg.Add(1)
+	done := make(chan struct{})
+	go func() {
+		defer wg.Done()
+		for {
+			// Add more things here that could influence metrics.
+			for i := 0; i < len(readMetricsSink); i++ {
+				readMetricsSink[i] = make([]byte, 1024)
+				select {
+				case <-done:
+					return
+				default:
+				}
+			}
+			runtime.GC()
+		}
+	}()
+
+	sum := func(us []uint64) uint64 {
+		total := uint64(0)
+		for _, u := range us {
+			total += u
+		}
+		return total
+	}
+
+	// Populate the first generation.
+	metrics.Read(samples[0])
+
+	// Check to make sure that these metrics only grow monotonically.
+	for gen := 1; gen < 10; gen++ {
+		metrics.Read(samples[gen%2])
+		for i := range samples[gen%2] {
+			name := samples[gen%2][i].Name
+			vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
+
+			switch vNew.Kind() {
+			case metrics.KindUint64:
+				new := vNew.Uint64()
+				old := vOld.Uint64()
+				if new < old {
+					t.Errorf("%s decreased: %d < %d", name, new, old)
+				}
+			case metrics.KindFloat64:
+				new := vNew.Float64()
+				old := vOld.Float64()
+				if new < old {
+					t.Errorf("%s decreased: %f < %f", name, new, old)
+				}
+			case metrics.KindFloat64Histogram:
+				new := sum(vNew.Float64Histogram().Counts)
+				old := sum(vOld.Float64Histogram().Counts)
+				if new < old {
+					t.Errorf("%s counts decreased: %d < %d", name, new, old)
+				}
+			}
+		}
+	}
+	close(done)
+
+	wg.Wait()
+}