runtime: combine sizespecializedmalloc small stubs into a single stub

This is purely refactoring that doesn't affect the generated code, other
than adding some spurious blank lines that are more annoying to remove
than to just deal with.

For #79286

Change-Id: I363e44deb245139b531b9f69f52c0a206a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/777860
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Matloob <matloob@google.com>
TryBot-Bypass: Michael Matloob <matloob@google.com>
This commit is contained in:
Michael Matloob 2026-05-14 08:04:37 -04:00 committed by Michael Matloob
parent 0a151acad8
commit 722ee60825
3 changed files with 113 additions and 118 deletions

View file

@ -166,7 +166,8 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
templateFunc: "mallocStub",
name: name,
ops: []op{
{inlineFunc, "inlinedMalloc", "smallScanNoHeaderStub"},
{inlineFunc, "inlinedMalloc", "smallStub"},
{foldCondition, "isNoScan_", str(false)},
{inlineFunc, "heapSetTypeNoHeaderStub", "heapSetTypeNoHeaderStub"},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{inlineFunc, "writeHeapBitsSmallStub", "writeHeapBitsSmallStub"},
@ -211,7 +212,8 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
templateFunc: "mallocStub",
name: name,
ops: []op{
{inlineFunc, "inlinedMalloc", "smallNoScanStub"},
{inlineFunc, "inlinedMalloc", "smallStub"},
{foldCondition, "isNoScan_", str(true)},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{subBasicLit, "elemsize_", str(elemsize)},
{subBasicLit, "sizeclass_", str(sc)},

View file

@ -35,7 +35,9 @@ func mallocgcSmallScanNoHeaderSC1(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -63,6 +65,7 @@ func mallocgcSmallScanNoHeaderSC1(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -196,7 +199,9 @@ func mallocgcSmallScanNoHeaderSC2(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -224,6 +229,7 @@ func mallocgcSmallScanNoHeaderSC2(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -357,7 +363,9 @@ func mallocgcSmallScanNoHeaderSC3(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -385,6 +393,7 @@ func mallocgcSmallScanNoHeaderSC3(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -518,7 +527,9 @@ func mallocgcSmallScanNoHeaderSC4(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -546,6 +557,7 @@ func mallocgcSmallScanNoHeaderSC4(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -679,7 +691,9 @@ func mallocgcSmallScanNoHeaderSC5(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -707,6 +721,7 @@ func mallocgcSmallScanNoHeaderSC5(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -840,7 +855,9 @@ func mallocgcSmallScanNoHeaderSC6(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -868,6 +885,7 @@ func mallocgcSmallScanNoHeaderSC6(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -1001,7 +1019,9 @@ func mallocgcSmallScanNoHeaderSC7(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -1029,6 +1049,7 @@ func mallocgcSmallScanNoHeaderSC7(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -1162,7 +1183,9 @@ func mallocgcSmallScanNoHeaderSC8(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -1190,6 +1213,7 @@ func mallocgcSmallScanNoHeaderSC8(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -1323,7 +1347,9 @@ func mallocgcSmallScanNoHeaderSC9(size uintptr, typ *_type, needzero bool) unsaf
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -1351,6 +1377,7 @@ func mallocgcSmallScanNoHeaderSC9(size uintptr, typ *_type, needzero bool) unsaf
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -1484,7 +1511,9 @@ func mallocgcSmallScanNoHeaderSC10(size uintptr, typ *_type, needzero bool) unsa
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
mp.mallocing = 1
@ -1512,6 +1541,7 @@ func mallocgcSmallScanNoHeaderSC10(size uintptr, typ *_type, needzero bool) unsa
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -1790,7 +1820,9 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -1846,6 +1878,7 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -1915,7 +1948,9 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -1971,6 +2006,7 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2040,7 +2076,9 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2096,6 +2134,7 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2165,7 +2204,9 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2221,6 +2262,7 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2290,7 +2332,9 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2346,6 +2390,7 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2415,7 +2460,9 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2471,6 +2518,7 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2540,7 +2588,9 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2596,6 +2646,7 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2665,7 +2716,9 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2721,6 +2774,7 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
@ -2790,7 +2844,9 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
@ -2846,6 +2902,7 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}

View file

@ -10,11 +10,11 @@
// To generate the specialized mallocgc functions, do 'go run .' inside runtime/_mkmalloc.
//
// To assemble a mallocgc function, the mallocStub function is cloned, and the call to
// inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub,
// smallNoScanStub or tinyStub, depending on the parameters being specialized.
// inlinedMalloc is replaced with the inlined body of smallStub or tinyStub,
// depending on the parameters being specialized.
//
// The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases)
// identifiers are replaced with the value of the parameter in the specialized case.
// The size_ (for the tiny case) and elemsize_, sizeclass_, noscanint_, and isNoScan_ (for all
// three cases) identifiers are replaced with the value of the parameter in the specialized case.
// The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub
// functions are also inlined by _mkmalloc.
@ -36,6 +36,7 @@ import (
const elemsize_ = 8
const sizeclass_ = 0
const noscanint_ = 0
const isNoScan_ = false
const size_ = 0
const isTiny_ = false
@ -139,9 +140,9 @@ func deductAssistCredit(size uintptr) {
// inlinedMalloc will never be called. It is defined just so that the compiler can compile
// the mallocStub function, which will also never be called, but instead used as a template
// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
// will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub
// when generating the size-specialized malloc function. See the comment at the top of this
// file for more information.
// will be replaced with the inlined body of smallStub or tinyStub when generating the
// size-specialized malloc function. See the comment at the top of this file for more
// information.
func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
return unsafe.Pointer(uintptr(0)), 0
}
@ -161,14 +162,19 @@ func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) {
}
}
func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
func smallStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
const sizeclass = sizeclass_
const elemsize = elemsize_
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallScanNoHeader(size, typ, mp)
if isNoScan_ {
doubleCheckSmallNoScan(typ, mp)
}
if !isNoScan_ {
doubleCheckSmallScanNoHeader(size, typ, mp)
}
}
mp.mallocing = 1
@ -176,23 +182,49 @@ func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Poin
c := getMCache(mp)
const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
span := c.alloc[spc]
if isNoScan_ {
// First, check for a reusable object.
if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
// We have a reusable object, use it.
v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
mp.mallocing = 0
releasem(mp)
// TODO(thepudds): note that the generated return path is essentially duplicated
// by the generator. For example, see the two postMallocgcDebug calls and
// related duplicated code on the return path currently in the generated
// mallocgcSmallNoScanSC2 function. One set of those correspond to this
// return here. We might be able to de-duplicate the generated return path
// by updating the generator, perhaps by jumping to a shared return or similar.
return v, elemsize
}
}
v := nextFreeFastStub(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
if isNoScan_ {
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
}
if goarch.PtrSize == 8 && sizeclass == 1 {
// initHeapBits already set the pointer bits for the 8-byte sizeclass
// on 64-bit platforms.
c.scanAlloc += 8
} else {
dataSize := size // make the inliner happy
x := uintptr(x)
scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span)
c.scanAlloc += scanSize
if !isNoScan_ {
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
if goarch.PtrSize == 8 && sizeclass == 1 {
// initHeapBits already set the pointer bits for the 8-byte sizeclass
// on 64-bit platforms.
c.scanAlloc += 8
} else {
dataSize := size // make the inliner happy
x := uintptr(x)
scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span)
c.scanAlloc += scanSize
}
}
// Ensure that the stores above that initialize x to
@ -258,102 +290,6 @@ func doubleCheckSmallNoScan(typ *_type, mp *m) {
}
}
func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant
// sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class
// and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically
// spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself,
// so that its code could not diverge from the generated functions.
const sizeclass = sizeclass_
const elemsize = elemsize_
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
doubleCheckSmallNoScan(typ, mp)
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
span := c.alloc[spc]
// First, check for a reusable object.
if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
// We have a reusable object, use it.
v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
mp.mallocing = 0
releasem(mp)
// TODO(thepudds): note that the generated return path is essentially duplicated
// by the generator. For example, see the two postMallocgcDebug calls and
// related duplicated code on the return path currently in the generated
// mallocgcSmallNoScanSC2 function. One set of those correspond to this
// return here. We might be able to de-duplicate the generated return path
// by updating the generator, perhaps by jumping to a shared return or similar.
return v, elemsize
}
v := nextFreeFastStub(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
if writeBarrier.enabled {
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
gcmarknewobject(span, uintptr(x))
} else {
// Track the last free index before the mark phase. This field
// is only used by the garbage collector. During the mark phase
// this is used by the conservative scanner to filter out objects
// that are both free and recently-allocated. It's safe to do that
// because we allocate-black if the GC is enabled. The conservative
// scanner produces pointers out of thin air, so without additional
// synchronization it might otherwise observe a partially-initialized
// object, which could crash the program.
span.freeIndexForScan = span.freeindex
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(elemsize)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, elemsize)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, elemsize
}
func doubleCheckTiny(size uintptr, typ *_type, mp *m) {
if mp.mallocing != 0 {
throw("malloc deadlock")