runtime: consolidate tiny sizespecializedmalloc functions

In the sizespecializedmalloc goexperiment, we specialized the tiny
function per tiny size, so there was a different allocation function per
size from 1-15. This created a lot of functions for a code path that was
not executed that often. From the microbenchmarks, comparing the
consolidated tiny function in this cl with the per-size functions, the
specialized functions could be up to 20% faster, but for 8 byte
allocations, which are almost certainly the most common, the per-size
function was slower.

Look at the change description of CL 766980 for the results of those
microbenchmarks. The CL also contains the code used to run the
benchmark.

Since we've noticed significant icache pressure from all the functions,
and, the tiny functions aren't used as much as the other ones, and the
benefits seem to be mixed, consolidate the 15 functions into a single
function.

This cuts the size of the mallocgc* functions by about 20%.

For #79286

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64_c2s16-perf_vs_parent-sizespecializedmalloc,gotip-linux-amd64_c3h88-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4ah72-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4as16-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4as16-perf_vs_parent,gotip-linux-arm64_c4ah72-perf_vs_parent,gotip-linux-amd64_c3h88-perf_vs_parent,gotip-linux-amd64_c2s16-perf_vs_parent
Change-Id: I824f65727a858158c14d2edd6fea1e846a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/772540
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Matloob <matloob@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Michael Matloob 2026-04-15 20:08:38 -04:00 committed by Michael Matloob
parent 326e7845a2
commit 9936a78b78
11 changed files with 101 additions and 2277 deletions

View file

@ -38,9 +38,9 @@ type symsStruct struct {
MoveSliceNoCapNoScan *obj.LSym
InterfaceSwitch *obj.LSym
MallocGC *obj.LSym
MallocGCTiny *obj.LSym
MallocGCSmallNoScan [27]*obj.LSym
MallocGCSmallScanNoHeader [27]*obj.LSym
MallocGCTiny [16]*obj.LSym
Memmove *obj.LSym
Memequal *obj.LSym
Msanread *obj.LSym

View file

@ -480,7 +480,7 @@ func isSpecializedMalloc(aux Aux) bool {
name := fn.String()
return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
strings.HasPrefix(name, "runtime.mallocgcTinySize")
strings.HasPrefix(name, "runtime.mallocgcTinySC")
}
// canLoadUnaligned reports if the architecture supports unaligned load operations.

View file

@ -139,9 +139,7 @@ func InitConfig() {
for i := 1; i < len(ir.Syms.MallocGCSmallScanNoHeader); i++ {
ir.Syms.MallocGCSmallScanNoHeader[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", i))
}
for i := 1; i < len(ir.Syms.MallocGCTiny); i++ {
ir.Syms.MallocGCTiny[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcTinySize%d", i))
}
ir.Syms.MallocGCTiny = typecheck.LookupRuntimeFunc("mallocgcTinySC2")
ir.Syms.MallocGC = typecheck.LookupRuntimeFunc("mallocgc")
ir.Syms.Memmove = typecheck.LookupRuntimeFunc("memmove")
ir.Syms.Memequal = typecheck.LookupRuntimeFunc("memequal")
@ -821,7 +819,7 @@ func (s *state) specializedMallocSym(size int64, hasPointers bool) *obj.LSym {
return ir.Syms.MallocGCSmallScanNoHeader[sizeClass]
}
if size < gc.TinySize {
return ir.Syms.MallocGCTiny[size]
return ir.Syms.MallocGCTiny
}
return ir.Syms.MallocGCSmallNoScan[sizeClass]
}

View file

@ -126,12 +126,7 @@ func smallScanNoHeaderSCFuncName(sc, scMax uint8) string {
return fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", sc)
}
func tinyFuncName(size uintptr) string {
if size == 0 || size > smallScanNoHeaderMax {
return "mallocPanic"
}
return fmt.Sprintf("mallocgcTinySize%d", size)
}
const tinyFuncName = "mallocgcTinySC2"
func smallNoScanSCFuncName(sc, scMax uint8) string {
if sc < 2 || sc > scMax {
@ -184,11 +179,8 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
// tiny
tinySizeClass := sizeToSizeClass[tinySize]
for s := range uintptr(16) {
if s == 0 {
continue
}
name := tinyFuncName(s)
{
name := tinyFuncName
elemsize := classes[tinySizeClass].size
config.specs = append(config.specs, spec{
templateFunc: "mallocStub",
@ -196,9 +188,9 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
ops: []op{
{inlineFunc, "inlinedMalloc", "tinyStub"},
{inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"},
{inlineFunc, "deductAssistCredit", "deductAssistCredit"},
{subBasicLit, "elemsize_", str(elemsize)},
{subBasicLit, "sizeclass_", str(tinySizeClass)},
{subBasicLit, "size_", str(s)},
{subBasicLit, "noscanint_", str(noscan)},
{foldCondition, "isTiny_", str(true)},
},
@ -356,7 +348,8 @@ func foldIfCondition(node ast.Node, from, to string) ast.Node {
}
// inlineFunction recursively replaces calls to the function 'from' with the body of the function
// 'toDecl'. All calls to 'from' must appear in assignment statements.
// 'toDecl'. All calls to 'from' must either have no return values and appear in standalone expression statements
// or otherwise must appear in assignment statements.
// The replacement is very simple: it doesn't substitute the arguments for the parameters, so the
// arguments to the function call must be the same identifier as the parameters to the function
// declared by 'toDecl'. If there are any calls to from where that's not the case there will be a fatal error.
@ -374,13 +367,17 @@ func inlineFunction(node ast.Node, from string, toDecl *ast.FuncDecl) ast.Node {
replaceAssignment(cursor, node, toDecl)
}
return false
case *ast.CallExpr:
// double check that all calls to from appear within an assignment
if isCallTo(node, from) {
if _, ok := cursor.Parent().(*ast.AssignStmt); !ok {
log.Fatalf("applying op: all calls to function %q being replaced must appear in an assignment statement, appears in %T", from, cursor.Parent())
case *ast.ExprStmt:
if callExpr, ok := node.X.(*ast.CallExpr); ok && isCallTo(callExpr, from) {
if !argsMatchParameters(callExpr.Args, toDecl.Type.Params) {
log.Fatalf("applying op: arguments to %v don't match parameter names of %v: %v", from, toDecl.Name, debugPrint(callExpr.Args...))
}
if toDecl.Type.Results != nil {
log.Fatalf("applying op: call to %v, which does not appear in an assignment, is replaced with %v which has return values: %v", from, toDecl.Name, debugPrint(callExpr.Args...))
}
replaceCallExprStmt(cursor, toDecl)
}
return false
}
return true
}, nil)
@ -425,6 +422,16 @@ func isCallTo(expr ast.Expr, name string) bool {
return isIdentWithName(callexpr.Fun, name)
}
// replaceCallExprStmt replaces a standalone expression statement calling a function with no
// return values with the body of the function.
func replaceCallExprStmt(cursor *astutil.Cursor, funcdecl *ast.FuncDecl) {
body := internalastutil.CloneNode(funcdecl.Body)
for _, stmt := range body.List {
cursor.InsertBefore(stmt)
}
cursor.Delete()
}
// replaceAssignment replaces an assignment statement where the right hand side is a function call
// whose arguments have the same names as the parameters to funcdecl with the body of funcdecl.
// It sets the left hand side of the assignment to the return values of the function.
@ -641,7 +648,7 @@ var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.
var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
for i := range uintptr(smallScanNoHeaderMax + 1) {
if i < 16 {
fmt.Fprintf(&b, "%s,\n", tinyFuncName(i))
fmt.Fprintf(&b, "%s,\n", "mallocPanic")
} else {
fmt.Fprintf(&b, "%s,\n", smallNoScanSCFuncName(sizeToSizeClass[i], scMax))
}

View file

@ -1078,7 +1078,10 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if sizeSpecializedMallocEnabled && heapBitsInSpan(size) {
if typ == nil || !typ.Pointers() {
return mallocNoScanTable[size](size, typ, needzero)
if size >= maxTinySize {
return mallocNoScanTable[size](size, typ, needzero)
}
return mallocgcTinySC2(size, typ, needzero)
} else {
if !needzero {
throw("objects with pointers must be zeroed")
@ -1840,28 +1843,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) {
}
}
// deductAssistCredit reduces the current G's assist credit
// by size bytes, and assists the GC if necessary.
//
// Caller must be preemptible.
func deductAssistCredit(size uintptr) {
// Charge the current user G for this allocation.
assistG := getg()
if assistG.m.curg != nil {
assistG = assistG.m.curg
}
// Charge the allocation against the G. We'll account
// for internal fragmentation at the end of mallocgc.
assistG.gcAssistBytes -= int64(size)
if assistG.gcAssistBytes < 0 {
// This G is in debt. Assist the GC to correct
// this before allocating. This must happen
// before disabling preemption.
gcAssistAlloc(assistG)
}
}
// addAssistCredit is like deductAssistCredit,
// but adds credit rather than removes,
// and never calls gcAssistAlloc.

File diff suppressed because it is too large Load diff

View file

@ -126,6 +126,23 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
return x
}
// deductAssistCredit reduces the current G's GC assist credit
// by size bytes, and assists the GC if necessary.
//
// Caller must be preemptible.
//
// Defined here so it can be inlined by mkmalloc.
func deductAssistCredit(size uintptr) {
assistG := getg()
if assistG.m.curg != nil {
assistG = assistG.m.curg
}
assistG.gcAssistBytes -= int64(size)
if assistG.gcAssistBytes < 0 {
gcAssistAlloc(assistG)
}
}
// inlinedMalloc will never be called. It is defined just so that the compiler can compile
// the mallocStub function, which will also never be called, but instead used as a template
// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
@ -357,13 +374,12 @@ func doubleCheckTiny(size uintptr, typ *_type, mp *m) {
}
func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
const constsize = size_
const elemsize = elemsize_
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
doubleCheckTiny(constsize, typ, mp)
doubleCheckTiny(size, typ, mp)
}
mp.mallocing = 1
@ -399,9 +415,9 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
c := getMCache(mp)
off := c.tinyoffset
// Align tiny pointer for required (conservative) alignment.
if constsize&7 == 0 {
if size&7 == 0 {
off = alignUp(off, 8)
} else if goarch.PtrSize == 4 && constsize == 12 {
} else if goarch.PtrSize == 4 && size == 12 {
// Conservatively align 12-byte objects to 8 bytes on 32-bit
// systems so that objects whose first field is a 64-bit
// value is aligned to 8 bytes and does not cause a fault on
@ -409,15 +425,15 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
// TODO(mknyszek): Remove this workaround if/when issue 36606
// is resolved.
off = alignUp(off, 8)
} else if constsize&3 == 0 {
} else if size&3 == 0 {
off = alignUp(off, 4)
} else if constsize&1 == 0 {
} else if size&1 == 0 {
off = alignUp(off, 2)
}
if off+constsize <= maxTinySize && c.tiny != 0 {
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
x := unsafe.Pointer(c.tiny + off)
c.tinyoffset = off + constsize
c.tinyoffset = off + size
c.tinyAllocs++
mp.mallocing = 0
releasem(mp)
@ -435,10 +451,10 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) {
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
// Note: disabled when race detector is on, see comment near end of this function.
c.tiny = uintptr(x)
c.tinyoffset = constsize
c.tinyoffset = size
}
// Ensure that the stores above that initialize x to
@ -502,7 +518,7 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
// TODO: enable this padding for all allocations, not just
// tinyalloc ones. It's tricky because of pointer maps.
// Maybe just all noscan objects?
x = add(x, elemsize-constsize)
x = add(x, elemsize-size)
}
return x, elemsize
}

View file

@ -523,21 +523,21 @@ var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.
var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{
mallocPanic,
mallocgcTinySize1,
mallocgcTinySize2,
mallocgcTinySize3,
mallocgcTinySize4,
mallocgcTinySize5,
mallocgcTinySize6,
mallocgcTinySize7,
mallocgcTinySize8,
mallocgcTinySize9,
mallocgcTinySize10,
mallocgcTinySize11,
mallocgcTinySize12,
mallocgcTinySize13,
mallocgcTinySize14,
mallocgcTinySize15,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocPanic,
mallocgcSmallNoScanSC2,
mallocgcSmallNoScanSC3,
mallocgcSmallNoScanSC3,

View file

@ -23,7 +23,7 @@ func CountBytes(s []byte) int {
func ToByteSlice() []byte { // Issue #24698
// amd64:`LEAQ type:\[3\]uint8`
// amd64:`CALL runtime\.(newobject|mallocgcTinySize3)`
// amd64:`CALL runtime\.(newobject|mallocgcTinySC2)`
// amd64:-`.*runtime.stringtoslicebyte`
return []byte("foo")
}

View file

@ -665,14 +665,14 @@ func f39a() (x []int) {
func f39b() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
printnl() // ERROR "live at call to printnl: x$"
return x
}
func f39c() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
printnl() // ERROR "live at call to printnl: x$"
return
}

View file

@ -663,14 +663,14 @@ func f39a() (x []int) {
func f39b() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
printnl() // ERROR "live at call to printnl: x$"
return x
}
func f39c() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
printnl() // ERROR "live at call to printnl: x$"
return
}