mirror of
https://github.com/golang/go.git
synced 2025-11-11 22:21:06 +00:00
runtime: convert markallocated from C to Go
benchmark old ns/op new ns/op delta BenchmarkMalloc8 28.7 22.4 -21.95% BenchmarkMalloc16 44.8 33.8 -24.55% BenchmarkMallocTypeInfo8 49.0 32.9 -32.86% BenchmarkMallocTypeInfo16 46.7 35.8 -23.34% BenchmarkMallocLargeStruct 907 901 -0.66% BenchmarkGobDecode 13235542 12036851 -9.06% BenchmarkGobEncode 10639699 9539155 -10.34% BenchmarkJSONEncode 25193036 21898922 -13.08% BenchmarkJSONDecode 96104044 89464904 -6.91% Fixes #8452. LGTM=khr R=golang-codereviews, bradfitz, rsc, dave, khr CC=golang-codereviews https://golang.org/cl/122090043
This commit is contained in:
parent
c5b2c370c6
commit
aac7f1a0d6
5 changed files with 161 additions and 120 deletions
|
|
@ -9,6 +9,8 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
debugMalloc = false
|
||||
|
||||
flagNoScan = 1 << 0 // GC doesn't have to scan object
|
||||
flagNoProfiling = 1 << 1 // must not profile
|
||||
flagNoZero = 1 << 2 // don't zero memory
|
||||
|
|
@ -29,6 +31,22 @@ const (
|
|||
pageShift = 13
|
||||
pageSize = 1 << pageShift
|
||||
pageMask = pageSize - 1
|
||||
|
||||
wordsPerBitmapWord = ptrSize * 8 / 4
|
||||
gcBits = 4
|
||||
bitsPerPointer = 2
|
||||
bitsMask = 1<<bitsPerPointer - 1
|
||||
pointersPerByte = 8 / bitsPerPointer
|
||||
bitPtrMask = bitsMask << 2
|
||||
maxGCMask = 0 // disabled because wastes several bytes of memory
|
||||
bitsDead = 0
|
||||
bitsPointer = 2
|
||||
|
||||
bitMiddle = 0
|
||||
bitBoundary = 1
|
||||
bitAllocated = 2
|
||||
bitMarked = 3
|
||||
bitMask = bitMiddle | bitBoundary | bitAllocated | bitMarked
|
||||
)
|
||||
|
||||
// All zero-sized allocations return a pointer to this byte.
|
||||
|
|
@ -168,14 +186,112 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
|
|||
size = uintptr(s.elemsize)
|
||||
}
|
||||
|
||||
// TODO: write markallocated in Go
|
||||
mp.ptrarg[0] = x
|
||||
mp.scalararg[0] = uint(size)
|
||||
mp.scalararg[1] = uint(size0)
|
||||
mp.ptrarg[1] = unsafe.Pointer(typ)
|
||||
mp.scalararg[2] = uint(flags & flagNoScan)
|
||||
onM(&markallocated_m)
|
||||
// From here till marked label marking the object as allocated
|
||||
// and storing type info in the GC bitmap.
|
||||
arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
|
||||
off := (uintptr(x) - arena_start) / ptrSize
|
||||
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
|
||||
shift := (off % wordsPerBitmapWord) * gcBits
|
||||
if debugMalloc && (((*xbits)>>shift)&bitMask) != bitBoundary {
|
||||
gothrow("bad bits in markallocated")
|
||||
}
|
||||
|
||||
var ti, te uintptr
|
||||
var ptrmask *uint8
|
||||
if flags&flagNoScan != 0 {
|
||||
// bitsDead in the first quadruple means don't scan.
|
||||
if size == ptrSize {
|
||||
*xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated + (bitsDead << 2)) << shift)
|
||||
} else {
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = bitAllocated + (bitsDead << 2)
|
||||
}
|
||||
goto marked
|
||||
}
|
||||
if size == ptrSize {
|
||||
// It's one word and it has pointers, it must be a pointer.
|
||||
*xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated | (bitsPointer << 2)) << shift)
|
||||
goto marked
|
||||
}
|
||||
if typ != nil && (uintptr(typ.gc[0])|uintptr(typ.gc[1])) != 0 && uintptr(typ.size) > ptrSize {
|
||||
if typ.kind&kindGCProg != 0 {
|
||||
nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
|
||||
masksize := nptr
|
||||
if masksize%2 != 0 {
|
||||
masksize *= 2 // repeated
|
||||
}
|
||||
masksize = masksize * pointersPerByte / 8 // 4 bits per word
|
||||
masksize++ // unroll flag in the beginning
|
||||
if masksize > maxGCMask && typ.gc[1] != 0 {
|
||||
// If the mask is too large, unroll the program directly
|
||||
// into the GC bitmap. It's 7 times slower than copying
|
||||
// from the pre-unrolled mask, but saves 1/16 of type size
|
||||
// memory for the mask.
|
||||
mp.ptrarg[0] = x
|
||||
mp.ptrarg[1] = unsafe.Pointer(typ)
|
||||
mp.scalararg[0] = uint(size)
|
||||
mp.scalararg[1] = uint(size0)
|
||||
onM(&unrollgcproginplace_m)
|
||||
goto marked
|
||||
}
|
||||
ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
|
||||
// Check whether the program is already unrolled.
|
||||
if uintptr(goatomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
|
||||
mp.ptrarg[0] = unsafe.Pointer(typ)
|
||||
onM(&unrollgcprog_m)
|
||||
}
|
||||
ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
|
||||
} else {
|
||||
ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
|
||||
}
|
||||
if size == 2*ptrSize {
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = *ptrmask | bitAllocated
|
||||
goto marked
|
||||
}
|
||||
te = uintptr(typ.size) / ptrSize
|
||||
// If the type occupies odd number of words, its mask is repeated.
|
||||
if te%2 == 0 {
|
||||
te /= 2
|
||||
}
|
||||
}
|
||||
if size == 2*ptrSize {
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitAllocated
|
||||
goto marked
|
||||
}
|
||||
// Copy pointer bitmask into the bitmap.
|
||||
for i := uintptr(0); i < size0; i += 2 * ptrSize {
|
||||
v := uint8((bitsPointer << 2) | (bitsPointer << 6))
|
||||
if ptrmask != nil {
|
||||
v = *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
|
||||
ti++
|
||||
if ti == te {
|
||||
ti = 0
|
||||
}
|
||||
}
|
||||
if i == 0 {
|
||||
v |= bitAllocated
|
||||
}
|
||||
if i+ptrSize == size0 {
|
||||
v &= ^uint8(bitPtrMask << 4)
|
||||
}
|
||||
|
||||
off := (uintptr(x) + i - arena_start) / ptrSize
|
||||
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
|
||||
shift := (off % wordsPerBitmapWord) * gcBits
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = v
|
||||
}
|
||||
if size0%(2*ptrSize) == 0 && size0 < size {
|
||||
// Mark the word after last object's word as bitsDead.
|
||||
off := (uintptr(x) + size0 - arena_start) / ptrSize
|
||||
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
|
||||
shift := (off % wordsPerBitmapWord) * gcBits
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = bitsDead << 2
|
||||
}
|
||||
marked:
|
||||
mp.mallocing = 0
|
||||
|
||||
if raceenabled {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue