go/src/internal/runtime/gc/scan/scan_test.go

266 lines
7.7 KiB
Go
Raw Normal View History

// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package scan_test
import (
"fmt"
"internal/cpu"
"internal/goarch"
"internal/runtime/gc"
"internal/runtime/gc/scan"
"math/bits"
"math/rand/v2"
"slices"
"sync"
"testing"
"unsafe"
)
type scanFunc func(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32)
func testScanSpanPacked(t *testing.T, scanF scanFunc) {
scanR := scan.ScanSpanPackedReference
// Construct a fake memory
mem, free := makeMem(t, 1)
defer free()
for i := range mem {
// Use values > heap.PageSize because a scan function can discard
// pointers smaller than this.
mem[i] = uintptr(int(gc.PageSize) + i + 1)
}
// Construct a random pointer mask
rnd := rand.New(rand.NewPCG(42, 42))
var ptrs gc.PtrMask
for i := range ptrs {
ptrs[i] = uintptr(rnd.Uint64())
}
bufF := make([]uintptr, gc.PageWords)
bufR := make([]uintptr, gc.PageWords)
testObjs(t, func(t *testing.T, sizeClass int, objs *gc.ObjMask) {
nF := scanF(unsafe.Pointer(&mem[0]), &bufF[0], objs, uintptr(sizeClass), &ptrs)
nR := scanR(unsafe.Pointer(&mem[0]), &bufR[0], objs, uintptr(sizeClass), &ptrs)
if nR != nF {
t.Errorf("want %d count, got %d", nR, nF)
} else if !slices.Equal(bufF[:nF], bufR[:nR]) {
t.Errorf("want scanned pointers %d, got %d", bufR[:nR], bufF[:nF])
}
})
}
func testObjs(t *testing.T, f func(t *testing.T, sizeClass int, objMask *gc.ObjMask)) {
for sizeClass := range gc.NumSizeClasses {
if sizeClass == 0 {
continue
}
size := uintptr(gc.SizeClassToSize[sizeClass])
if size > gc.MinSizeForMallocHeader {
break // Pointer/scalar metadata is not packed for larger sizes.
}
t.Run(fmt.Sprintf("size=%d", size), func(t *testing.T) {
// Scan a few objects near i to test boundary conditions.
const objMask = 0x101
nObj := uintptr(gc.SizeClassToNPages[sizeClass]) * gc.PageSize / size
for i := range nObj - uintptr(bits.Len(objMask)-1) {
t.Run(fmt.Sprintf("objs=0x%x<<%d", objMask, i), func(t *testing.T) {
var objs gc.ObjMask
objs[i/goarch.PtrBits] = objMask << (i % goarch.PtrBits)
f(t, sizeClass, &objs)
})
}
})
}
}
var dataCacheSizes = sync.OnceValue(func() []uintptr {
cs := cpu.DataCacheSizes()
for i, c := range cs {
fmt.Printf("# L%d cache: %d (%d Go pages)\n", i+1, c, c/gc.PageSize)
}
return cs
})
func BenchmarkScanSpanPacked(b *testing.B) {
benchmarkCacheSizes(b, benchmarkScanSpanPackedAllSizeClasses)
}
func benchmarkCacheSizes(b *testing.B, fn func(b *testing.B, heapPages int)) {
cacheSizes := dataCacheSizes()
b.Run("cache=tiny/pages=1", func(b *testing.B) {
fn(b, 1)
})
for i, cacheBytes := range cacheSizes {
pages := int(cacheBytes*3/4) / gc.PageSize
b.Run(fmt.Sprintf("cache=L%d/pages=%d", i+1, pages), func(b *testing.B) {
fn(b, pages)
})
}
if len(cacheSizes) == 0 {
return
}
ramPages := int(cacheSizes[len(cacheSizes)-1]*3/2) / gc.PageSize
b.Run(fmt.Sprintf("cache=ram/pages=%d", ramPages), func(b *testing.B) {
fn(b, ramPages)
})
}
func benchmarkScanSpanPackedAllSizeClasses(b *testing.B, nPages int) {
for sc := range gc.NumSizeClasses {
if sc == 0 {
continue
}
size := gc.SizeClassToSize[sc]
if size >= gc.MinSizeForMallocHeader {
break
}
b.Run(fmt.Sprintf("sizeclass=%d", sc), func(b *testing.B) {
benchmarkScanSpanPacked(b, nPages, sc)
})
}
}
func benchmarkScanSpanPacked(b *testing.B, nPages int, sizeClass int) {
rnd := rand.New(rand.NewPCG(42, 42))
// Construct a fake memory
mem, free := makeMem(b, nPages)
defer free()
for i := range mem {
// Use values > heap.PageSize because a scan function can discard
// pointers smaller than this.
mem[i] = uintptr(int(gc.PageSize) + i + 1)
}
// Construct a random pointer mask
ptrs := make([]gc.PtrMask, nPages)
for i := range ptrs {
for j := range ptrs[i] {
ptrs[i][j] = uintptr(rnd.Uint64())
}
}
// Visit the pages in a random order
pageOrder := rnd.Perm(nPages)
// Create the scan buffer.
buf := make([]uintptr, gc.PageWords)
// Sweep from 0 marks to all marks. We'll use the same marks for each page
// because I don't think that predictability matters.
objBytes := uintptr(gc.SizeClassToSize[sizeClass])
nObj := gc.PageSize / objBytes
markOrder := rnd.Perm(int(nObj))
const steps = 11
for i := 0; i < steps; i++ {
frac := float64(i) / float64(steps-1)
// Set frac marks.
nMarks := int(float64(len(markOrder))*frac + 0.5)
var objMarks gc.ObjMask
for _, mark := range markOrder[:nMarks] {
objMarks[mark/goarch.PtrBits] |= 1 << (mark % goarch.PtrBits)
}
greyClusters := 0
for page := range ptrs {
greyClusters += countGreyClusters(sizeClass, &objMarks, &ptrs[page])
}
// Report MB/s of how much memory they're actually hitting. This assumes
// 64 byte cache lines (TODO: Should it assume 128 byte cache lines?)
// and expands each access to the whole cache line. This is useful for
// comparing against memory bandwidth.
//
// TODO: Add a benchmark that just measures single core memory bandwidth
// for comparison. (See runtime memcpy benchmarks.)
//
// TODO: Should there be a separate measure where we don't expand to
// cache lines?
avgBytes := int64(greyClusters) * int64(cpu.CacheLineSize) / int64(len(ptrs))
b.Run(fmt.Sprintf("pct=%d", int(100*frac)), func(b *testing.B) {
b.Run("impl=Reference", func(b *testing.B) {
b.SetBytes(avgBytes)
for i := range b.N {
page := pageOrder[i%len(pageOrder)]
scan.ScanSpanPackedReference(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
}
})
b.Run("impl=Go", func(b *testing.B) {
b.SetBytes(avgBytes)
for i := range b.N {
page := pageOrder[i%len(pageOrder)]
scan.ScanSpanPackedGo(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
}
})
if scan.HasFastScanSpanPacked() {
b.Run("impl=Platform", func(b *testing.B) {
b.SetBytes(avgBytes)
for i := range b.N {
page := pageOrder[i%len(pageOrder)]
scan.ScanSpanPacked(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
}
})
b.Run("impl=PlatformAsm", func(b *testing.B) {
b.SetBytes(avgBytes)
for i := range b.N {
page := pageOrder[i%len(pageOrder)]
scan.ScanSpanPackedAsm(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
}
})
}
})
}
}
func countGreyClusters(sizeClass int, objMarks *gc.ObjMask, ptrMask *gc.PtrMask) int {
clusters := 0
lastCluster := -1
expandBy := uintptr(gc.SizeClassToSize[sizeClass]) / goarch.PtrSize
for word := range gc.PageWords {
objI := uintptr(word) / expandBy
if objMarks[objI/goarch.PtrBits]&(1<<(objI%goarch.PtrBits)) == 0 {
continue
}
if ptrMask[word/goarch.PtrBits]&(1<<(word%goarch.PtrBits)) == 0 {
continue
}
c := word * 8 / goarch.PtrBits
if c != lastCluster {
lastCluster = c
clusters++
}
}
return clusters
}
func BenchmarkScanMaxBandwidth(b *testing.B) {
// Measure the theoretical "maximum" bandwidth of scanning by reproducing
// the memory access pattern of a full page scan, but using memcpy as the
// kernel instead of scanning.
benchmarkCacheSizes(b, func(b *testing.B, heapPages int) {
mem, free := makeMem(b, heapPages)
defer free()
for i := range mem {
mem[i] = uintptr(int(gc.PageSize) + i + 1)
}
buf := make([]uintptr, gc.PageWords)
// Visit the pages in a random order
rnd := rand.New(rand.NewPCG(42, 42))
pageOrder := rnd.Perm(heapPages)
b.SetBytes(int64(gc.PageSize))
b.ResetTimer()
for i := range b.N {
page := pageOrder[i%len(pageOrder)]
copy(buf, mem[gc.PageWords*page:])
}
})
}