mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: hoist invariant code out of heapBitsSmallForAddrInline
The first two instructions in heapBitsSmallForAddrInline are invariant for a given span and object and are called in a loop within ScanObjectsSmall which figures as a hot routine in profiles of some benchmark runs within sweet benchmark suite (x/benchmarks/sweet), Ideally it would have been great if the compiler hoisted this code out of the loop, Moving it out of inner loop manually gives gains (moving it entirely out of nested loop does not improve performance, in some cases it even regresses it perhaps due to the early loop exit). Tested with AMD64, ARM64, PPC64LE and S390x Fixes #76212 Change-Id: I49c3c826b9d7bf3125ffc42c8c174cce0ecc4cbf Reviewed-on: https://go-review.googlesource.com/c/go/+/718680 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
594129b80c
commit
65c09eafdf
1 changed files with 7 additions and 6 deletions
|
|
@ -978,7 +978,9 @@ func spanSetScans(spanBase uintptr, nelems uint16, imb *spanInlineMarkBits, toSc
|
|||
}
|
||||
|
||||
func scanObjectSmall(spanBase, b, objSize uintptr, gcw *gcWork) {
|
||||
ptrBits := heapBitsSmallForAddrInline(spanBase, b, objSize)
|
||||
hbitsBase, _ := spanHeapBitsRange(spanBase, gc.PageSize, objSize)
|
||||
hbits := (*byte)(unsafe.Pointer(hbitsBase))
|
||||
ptrBits := extractHeapBitsSmall(hbits, spanBase, b, objSize)
|
||||
gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
|
||||
nptrs := 0
|
||||
n := sys.OnesCount64(uint64(ptrBits))
|
||||
|
|
@ -1017,12 +1019,14 @@ func scanObjectsSmall(base, objSize uintptr, elems uint16, gcw *gcWork, scans *g
|
|||
break
|
||||
}
|
||||
n := sys.OnesCount64(uint64(bits))
|
||||
hbitsBase, _ := spanHeapBitsRange(base, gc.PageSize, objSize)
|
||||
hbits := (*byte)(unsafe.Pointer(hbitsBase))
|
||||
for range n {
|
||||
j := sys.TrailingZeros64(uint64(bits))
|
||||
bits &^= 1 << j
|
||||
|
||||
b := base + uintptr(i*(goarch.PtrSize*8)+j)*objSize
|
||||
ptrBits := heapBitsSmallForAddrInline(base, b, objSize)
|
||||
ptrBits := extractHeapBitsSmall(hbits, base, b, objSize)
|
||||
gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
|
||||
|
||||
n := sys.OnesCount64(uint64(ptrBits))
|
||||
|
|
@ -1056,10 +1060,7 @@ func scanObjectsSmall(base, objSize uintptr, elems uint16, gcw *gcWork, scans *g
|
|||
}
|
||||
}
|
||||
|
||||
func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr {
|
||||
hbitsBase, _ := spanHeapBitsRange(spanBase, gc.PageSize, elemsize)
|
||||
hbits := (*byte)(unsafe.Pointer(hbitsBase))
|
||||
|
||||
func extractHeapBitsSmall(hbits *byte, spanBase, addr, elemsize uintptr) uintptr {
|
||||
// These objects are always small enough that their bitmaps
|
||||
// fit in a single word, so just load the word or two we need.
|
||||
//
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue