mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: use bit-parallel operations to compute heap bit summaries
The new implementation is much faster in all cases. name old time/op new time/op delta PallocBitsSummarize/Unpacked00-16 142ns ± 1% 7ns ± 2% -94.75% (p=0.000 n=10+9) PallocBitsSummarize/UnpackedFFFFFFFFFFFFFFFF-16 172ns ± 0% 24ns ± 0% -86.02% (p=0.000 n=9+9) PallocBitsSummarize/UnpackedAA-16 145ns ± 0% 32ns ± 0% -78.16% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedAAAAAAAAAAAAAAAA-16 172ns ± 0% 33ns ± 0% -80.95% (p=0.000 n=9+9) PallocBitsSummarize/Unpacked80000000AAAAAAAA-16 162ns ± 1% 60ns ± 0% -62.69% (p=0.000 n=10+9) PallocBitsSummarize/UnpackedAAAAAAAA00000001-16 163ns ± 0% 68ns ± 1% -58.47% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedBBBBBBBBBBBBBBBB-16 172ns ± 0% 35ns ± 0% -79.70% (p=0.000 n=9+9) PallocBitsSummarize/Unpacked80000000BBBBBBBB-16 161ns ± 0% 63ns ± 0% -60.61% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedBBBBBBBB00000001-16 163ns ± 0% 60ns ± 0% -63.14% (p=0.000 n=9+10) PallocBitsSummarize/UnpackedCCCCCCCCCCCCCCCC-16 172ns ± 0% 39ns ± 0% -77.41% (p=0.000 n=7+10) PallocBitsSummarize/Unpacked4444444444444444-16 172ns ± 0% 39ns ± 0% -77.42% (p=0.000 n=7+10) PallocBitsSummarize/Unpacked4040404040404040-16 173ns ± 2% 51ns ± 1% -70.55% (p=0.000 n=10+10) PallocBitsSummarize/Unpacked4000400040004000-16 160ns ± 1% 53ns ± 0% -66.78% (p=0.000 n=10+10) PallocBitsSummarize/Unpacked1000404044CCAAFF-16 169ns ± 1% 59ns ± 1% -65.28% (p=0.000 n=10+10) Change-Id: I94daa645b76a9cf9c93edeb2058d7132216fcb72 Reviewed-on: https://go-review.googlesource.com/c/go/+/240900 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
88c094c96a
commit
4e5ed83e8d
2 changed files with 107 additions and 74 deletions
|
|
@ -101,7 +101,7 @@ func invertPallocBits(b *PallocBits) {
|
|||
|
||||
// Ensures two packed summaries are identical, and reports a detailed description
|
||||
// of the difference if they're not.
|
||||
func checkPallocSum(t *testing.T, got, want PallocSum) {
|
||||
func checkPallocSum(t testing.TB, got, want PallocSum) {
|
||||
if got.Start() != want.Start() {
|
||||
t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start())
|
||||
}
|
||||
|
|
@ -297,17 +297,29 @@ func TestPallocBitsSummarize(t *testing.T) {
|
|||
|
||||
// Benchmarks how quickly we can summarize a PallocBits.
|
||||
func BenchmarkPallocBitsSummarize(b *testing.B) {
|
||||
buf0 := new(PallocBits)
|
||||
buf1 := new(PallocBits)
|
||||
for i := 0; i < len(buf1); i++ {
|
||||
buf1[i] = ^uint64(0)
|
||||
patterns := []uint64{
|
||||
0,
|
||||
^uint64(0),
|
||||
0xaa,
|
||||
0xaaaaaaaaaaaaaaaa,
|
||||
0x80000000aaaaaaaa,
|
||||
0xaaaaaaaa00000001,
|
||||
0xbbbbbbbbbbbbbbbb,
|
||||
0x80000000bbbbbbbb,
|
||||
0xbbbbbbbb00000001,
|
||||
0xcccccccccccccccc,
|
||||
0x4444444444444444,
|
||||
0x4040404040404040,
|
||||
0x4000400040004000,
|
||||
0x1000404044ccaaff,
|
||||
}
|
||||
bufa := new(PallocBits)
|
||||
for i := 0; i < len(bufa); i++ {
|
||||
bufa[i] = 0xaa
|
||||
}
|
||||
for _, buf := range []*PallocBits{buf0, buf1, bufa} {
|
||||
b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) {
|
||||
for _, p := range patterns {
|
||||
buf := new(PallocBits)
|
||||
for i := 0; i < len(buf); i++ {
|
||||
buf[i] = p
|
||||
}
|
||||
b.Run(fmt.Sprintf("Unpacked%02X", p), func(b *testing.B) {
|
||||
checkPallocSum(b, buf.Summarize(), SummarizeSlow(buf))
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Summarize()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue