runtime: use bit-parallel operations to compute heap bit summaries

The new implementation is much faster in all cases.

name                                             old time/op  new time/op  delta
PallocBitsSummarize/Unpacked00-16                 142ns ± 1%     7ns ± 2%  -94.75%  (p=0.000 n=10+9)
PallocBitsSummarize/UnpackedFFFFFFFFFFFFFFFF-16   172ns ± 0%    24ns ± 0%  -86.02%  (p=0.000 n=9+9)
PallocBitsSummarize/UnpackedAA-16                 145ns ± 0%    32ns ± 0%  -78.16%  (p=0.000 n=8+10)
PallocBitsSummarize/UnpackedAAAAAAAAAAAAAAAA-16   172ns ± 0%    33ns ± 0%  -80.95%  (p=0.000 n=9+9)
PallocBitsSummarize/Unpacked80000000AAAAAAAA-16   162ns ± 1%    60ns ± 0%  -62.69%  (p=0.000 n=10+9)
PallocBitsSummarize/UnpackedAAAAAAAA00000001-16   163ns ± 0%    68ns ± 1%  -58.47%  (p=0.000 n=8+10)
PallocBitsSummarize/UnpackedBBBBBBBBBBBBBBBB-16   172ns ± 0%    35ns ± 0%  -79.70%  (p=0.000 n=9+9)
PallocBitsSummarize/Unpacked80000000BBBBBBBB-16   161ns ± 0%    63ns ± 0%  -60.61%  (p=0.000 n=8+10)
PallocBitsSummarize/UnpackedBBBBBBBB00000001-16   163ns ± 0%    60ns ± 0%  -63.14%  (p=0.000 n=9+10)
PallocBitsSummarize/UnpackedCCCCCCCCCCCCCCCC-16   172ns ± 0%    39ns ± 0%  -77.41%  (p=0.000 n=7+10)
PallocBitsSummarize/Unpacked4444444444444444-16   172ns ± 0%    39ns ± 0%  -77.42%  (p=0.000 n=7+10)
PallocBitsSummarize/Unpacked4040404040404040-16   173ns ± 2%    51ns ± 1%  -70.55%  (p=0.000 n=10+10)
PallocBitsSummarize/Unpacked4000400040004000-16   160ns ± 1%    53ns ± 0%  -66.78%  (p=0.000 n=10+10)
PallocBitsSummarize/Unpacked1000404044CCAAFF-16   169ns ± 1%    59ns ± 1%  -65.28%  (p=0.000 n=10+10)

Change-Id: I94daa645b76a9cf9c93edeb2058d7132216fcb72
Reviewed-on: https://go-review.googlesource.com/c/go/+/240900
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Keith Randall 2020-07-03 11:28:50 -07:00
parent 88c094c96a
commit 4e5ed83e8d
2 changed files with 107 additions and 74 deletions

View file

@ -101,7 +101,7 @@ func invertPallocBits(b *PallocBits) {
// Ensures two packed summaries are identical, and reports a detailed description
// of the difference if they're not.
func checkPallocSum(t *testing.T, got, want PallocSum) {
func checkPallocSum(t testing.TB, got, want PallocSum) {
if got.Start() != want.Start() {
t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start())
}
@ -297,17 +297,29 @@ func TestPallocBitsSummarize(t *testing.T) {
// Benchmarks how quickly we can summarize a PallocBits.
func BenchmarkPallocBitsSummarize(b *testing.B) {
buf0 := new(PallocBits)
buf1 := new(PallocBits)
for i := 0; i < len(buf1); i++ {
buf1[i] = ^uint64(0)
patterns := []uint64{
0,
^uint64(0),
0xaa,
0xaaaaaaaaaaaaaaaa,
0x80000000aaaaaaaa,
0xaaaaaaaa00000001,
0xbbbbbbbbbbbbbbbb,
0x80000000bbbbbbbb,
0xbbbbbbbb00000001,
0xcccccccccccccccc,
0x4444444444444444,
0x4040404040404040,
0x4000400040004000,
0x1000404044ccaaff,
}
bufa := new(PallocBits)
for i := 0; i < len(bufa); i++ {
bufa[i] = 0xaa
}
for _, buf := range []*PallocBits{buf0, buf1, bufa} {
b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) {
for _, p := range patterns {
buf := new(PallocBits)
for i := 0; i < len(buf); i++ {
buf[i] = p
}
b.Run(fmt.Sprintf("Unpacked%02X", p), func(b *testing.B) {
checkPallocSum(b, buf.Summarize(), SummarizeSlow(buf))
for i := 0; i < b.N; i++ {
buf.Summarize()
}