mirror of
https://github.com/golang/go.git
synced 2026-06-27 03:11:23 +00:00
runtime: consolidate tiny sizespecializedmalloc functions
In the sizespecializedmalloc goexperiment, we specialized the tiny function per tiny size, so there was a different allocation function per size from 1-15. This created a lot of functions for a code path that was not executed that often. From the microbenchmarks, comparing the consolidated tiny function in this cl with the per-size functions, the specialized functions could be up to 20% faster, but for 8 byte allocations, which are almost certainly the most common, the per-size function was slower. Look at the change description of CL 766980 for the results of those microbenchmarks. The CL also contains the code used to run the benchmark. Since we've noticed significant icache pressure from all the functions, and, the tiny functions aren't used as much as the other ones, and the benefits seem to be mixed, consolidate the 15 functions into a single function. This cuts the size of the mallocgc* functions by about 20%. For #79286 Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64_c2s16-perf_vs_parent-sizespecializedmalloc,gotip-linux-amd64_c3h88-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4ah72-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4as16-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4as16-perf_vs_parent,gotip-linux-arm64_c4ah72-perf_vs_parent,gotip-linux-amd64_c3h88-perf_vs_parent,gotip-linux-amd64_c2s16-perf_vs_parent Change-Id: I824f65727a858158c14d2edd6fea1e846a6a6964 Reviewed-on: https://go-review.googlesource.com/c/go/+/772540 LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Matloob <matloob@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
parent
326e7845a2
commit
9936a78b78
11 changed files with 101 additions and 2277 deletions
|
|
@ -38,9 +38,9 @@ type symsStruct struct {
|
|||
MoveSliceNoCapNoScan *obj.LSym
|
||||
InterfaceSwitch *obj.LSym
|
||||
MallocGC *obj.LSym
|
||||
MallocGCTiny *obj.LSym
|
||||
MallocGCSmallNoScan [27]*obj.LSym
|
||||
MallocGCSmallScanNoHeader [27]*obj.LSym
|
||||
MallocGCTiny [16]*obj.LSym
|
||||
Memmove *obj.LSym
|
||||
Memequal *obj.LSym
|
||||
Msanread *obj.LSym
|
||||
|
|
|
|||
|
|
@ -480,7 +480,7 @@ func isSpecializedMalloc(aux Aux) bool {
|
|||
name := fn.String()
|
||||
return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
|
||||
strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
|
||||
strings.HasPrefix(name, "runtime.mallocgcTinySize")
|
||||
strings.HasPrefix(name, "runtime.mallocgcTinySC")
|
||||
}
|
||||
|
||||
// canLoadUnaligned reports if the architecture supports unaligned load operations.
|
||||
|
|
|
|||
|
|
@ -139,9 +139,7 @@ func InitConfig() {
|
|||
for i := 1; i < len(ir.Syms.MallocGCSmallScanNoHeader); i++ {
|
||||
ir.Syms.MallocGCSmallScanNoHeader[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", i))
|
||||
}
|
||||
for i := 1; i < len(ir.Syms.MallocGCTiny); i++ {
|
||||
ir.Syms.MallocGCTiny[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcTinySize%d", i))
|
||||
}
|
||||
ir.Syms.MallocGCTiny = typecheck.LookupRuntimeFunc("mallocgcTinySC2")
|
||||
ir.Syms.MallocGC = typecheck.LookupRuntimeFunc("mallocgc")
|
||||
ir.Syms.Memmove = typecheck.LookupRuntimeFunc("memmove")
|
||||
ir.Syms.Memequal = typecheck.LookupRuntimeFunc("memequal")
|
||||
|
|
@ -821,7 +819,7 @@ func (s *state) specializedMallocSym(size int64, hasPointers bool) *obj.LSym {
|
|||
return ir.Syms.MallocGCSmallScanNoHeader[sizeClass]
|
||||
}
|
||||
if size < gc.TinySize {
|
||||
return ir.Syms.MallocGCTiny[size]
|
||||
return ir.Syms.MallocGCTiny
|
||||
}
|
||||
return ir.Syms.MallocGCSmallNoScan[sizeClass]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,12 +126,7 @@ func smallScanNoHeaderSCFuncName(sc, scMax uint8) string {
|
|||
return fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", sc)
|
||||
}
|
||||
|
||||
func tinyFuncName(size uintptr) string {
|
||||
if size == 0 || size > smallScanNoHeaderMax {
|
||||
return "mallocPanic"
|
||||
}
|
||||
return fmt.Sprintf("mallocgcTinySize%d", size)
|
||||
}
|
||||
const tinyFuncName = "mallocgcTinySC2"
|
||||
|
||||
func smallNoScanSCFuncName(sc, scMax uint8) string {
|
||||
if sc < 2 || sc > scMax {
|
||||
|
|
@ -184,11 +179,8 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
|
|||
|
||||
// tiny
|
||||
tinySizeClass := sizeToSizeClass[tinySize]
|
||||
for s := range uintptr(16) {
|
||||
if s == 0 {
|
||||
continue
|
||||
}
|
||||
name := tinyFuncName(s)
|
||||
{
|
||||
name := tinyFuncName
|
||||
elemsize := classes[tinySizeClass].size
|
||||
config.specs = append(config.specs, spec{
|
||||
templateFunc: "mallocStub",
|
||||
|
|
@ -196,9 +188,9 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
|
|||
ops: []op{
|
||||
{inlineFunc, "inlinedMalloc", "tinyStub"},
|
||||
{inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"},
|
||||
{inlineFunc, "deductAssistCredit", "deductAssistCredit"},
|
||||
{subBasicLit, "elemsize_", str(elemsize)},
|
||||
{subBasicLit, "sizeclass_", str(tinySizeClass)},
|
||||
{subBasicLit, "size_", str(s)},
|
||||
{subBasicLit, "noscanint_", str(noscan)},
|
||||
{foldCondition, "isTiny_", str(true)},
|
||||
},
|
||||
|
|
@ -356,7 +348,8 @@ func foldIfCondition(node ast.Node, from, to string) ast.Node {
|
|||
}
|
||||
|
||||
// inlineFunction recursively replaces calls to the function 'from' with the body of the function
|
||||
// 'toDecl'. All calls to 'from' must appear in assignment statements.
|
||||
// 'toDecl'. All calls to 'from' must either have no return values and appear in standalone expression statements
|
||||
// or otherwise must appear in assignment statements.
|
||||
// The replacement is very simple: it doesn't substitute the arguments for the parameters, so the
|
||||
// arguments to the function call must be the same identifier as the parameters to the function
|
||||
// declared by 'toDecl'. If there are any calls to from where that's not the case there will be a fatal error.
|
||||
|
|
@ -374,13 +367,17 @@ func inlineFunction(node ast.Node, from string, toDecl *ast.FuncDecl) ast.Node {
|
|||
replaceAssignment(cursor, node, toDecl)
|
||||
}
|
||||
return false
|
||||
case *ast.CallExpr:
|
||||
// double check that all calls to from appear within an assignment
|
||||
if isCallTo(node, from) {
|
||||
if _, ok := cursor.Parent().(*ast.AssignStmt); !ok {
|
||||
log.Fatalf("applying op: all calls to function %q being replaced must appear in an assignment statement, appears in %T", from, cursor.Parent())
|
||||
case *ast.ExprStmt:
|
||||
if callExpr, ok := node.X.(*ast.CallExpr); ok && isCallTo(callExpr, from) {
|
||||
if !argsMatchParameters(callExpr.Args, toDecl.Type.Params) {
|
||||
log.Fatalf("applying op: arguments to %v don't match parameter names of %v: %v", from, toDecl.Name, debugPrint(callExpr.Args...))
|
||||
}
|
||||
if toDecl.Type.Results != nil {
|
||||
log.Fatalf("applying op: call to %v, which does not appear in an assignment, is replaced with %v which has return values: %v", from, toDecl.Name, debugPrint(callExpr.Args...))
|
||||
}
|
||||
replaceCallExprStmt(cursor, toDecl)
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}, nil)
|
||||
|
|
@ -425,6 +422,16 @@ func isCallTo(expr ast.Expr, name string) bool {
|
|||
return isIdentWithName(callexpr.Fun, name)
|
||||
}
|
||||
|
||||
// replaceCallExprStmt replaces a standalone expression statement calling a function with no
|
||||
// return values with the body of the function.
|
||||
func replaceCallExprStmt(cursor *astutil.Cursor, funcdecl *ast.FuncDecl) {
|
||||
body := internalastutil.CloneNode(funcdecl.Body)
|
||||
for _, stmt := range body.List {
|
||||
cursor.InsertBefore(stmt)
|
||||
}
|
||||
cursor.Delete()
|
||||
}
|
||||
|
||||
// replaceAssignment replaces an assignment statement where the right hand side is a function call
|
||||
// whose arguments have the same names as the parameters to funcdecl with the body of funcdecl.
|
||||
// It sets the left hand side of the assignment to the return values of the function.
|
||||
|
|
@ -641,7 +648,7 @@ var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.
|
|||
var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
|
||||
for i := range uintptr(smallScanNoHeaderMax + 1) {
|
||||
if i < 16 {
|
||||
fmt.Fprintf(&b, "%s,\n", tinyFuncName(i))
|
||||
fmt.Fprintf(&b, "%s,\n", "mallocPanic")
|
||||
} else {
|
||||
fmt.Fprintf(&b, "%s,\n", smallNoScanSCFuncName(sizeToSizeClass[i], scMax))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1078,7 +1078,10 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
|
|||
|
||||
if sizeSpecializedMallocEnabled && heapBitsInSpan(size) {
|
||||
if typ == nil || !typ.Pointers() {
|
||||
return mallocNoScanTable[size](size, typ, needzero)
|
||||
if size >= maxTinySize {
|
||||
return mallocNoScanTable[size](size, typ, needzero)
|
||||
}
|
||||
return mallocgcTinySC2(size, typ, needzero)
|
||||
} else {
|
||||
if !needzero {
|
||||
throw("objects with pointers must be zeroed")
|
||||
|
|
@ -1840,28 +1843,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) {
|
|||
}
|
||||
}
|
||||
|
||||
// deductAssistCredit reduces the current G's assist credit
|
||||
// by size bytes, and assists the GC if necessary.
|
||||
//
|
||||
// Caller must be preemptible.
|
||||
func deductAssistCredit(size uintptr) {
|
||||
// Charge the current user G for this allocation.
|
||||
assistG := getg()
|
||||
if assistG.m.curg != nil {
|
||||
assistG = assistG.m.curg
|
||||
}
|
||||
// Charge the allocation against the G. We'll account
|
||||
// for internal fragmentation at the end of mallocgc.
|
||||
assistG.gcAssistBytes -= int64(size)
|
||||
|
||||
if assistG.gcAssistBytes < 0 {
|
||||
// This G is in debt. Assist the GC to correct
|
||||
// this before allocating. This must happen
|
||||
// before disabling preemption.
|
||||
gcAssistAlloc(assistG)
|
||||
}
|
||||
}
|
||||
|
||||
// addAssistCredit is like deductAssistCredit,
|
||||
// but adds credit rather than removes,
|
||||
// and never calls gcAssistAlloc.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -126,6 +126,23 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
|
|||
return x
|
||||
}
|
||||
|
||||
// deductAssistCredit reduces the current G's GC assist credit
|
||||
// by size bytes, and assists the GC if necessary.
|
||||
//
|
||||
// Caller must be preemptible.
|
||||
//
|
||||
// Defined here so it can be inlined by mkmalloc.
|
||||
func deductAssistCredit(size uintptr) {
|
||||
assistG := getg()
|
||||
if assistG.m.curg != nil {
|
||||
assistG = assistG.m.curg
|
||||
}
|
||||
assistG.gcAssistBytes -= int64(size)
|
||||
if assistG.gcAssistBytes < 0 {
|
||||
gcAssistAlloc(assistG)
|
||||
}
|
||||
}
|
||||
|
||||
// inlinedMalloc will never be called. It is defined just so that the compiler can compile
|
||||
// the mallocStub function, which will also never be called, but instead used as a template
|
||||
// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
|
||||
|
|
@ -357,13 +374,12 @@ func doubleCheckTiny(size uintptr, typ *_type, mp *m) {
|
|||
}
|
||||
|
||||
func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
|
||||
const constsize = size_
|
||||
const elemsize = elemsize_
|
||||
|
||||
// Set mp.mallocing to keep from being preempted by GC.
|
||||
mp := acquirem()
|
||||
if doubleCheckMalloc {
|
||||
doubleCheckTiny(constsize, typ, mp)
|
||||
doubleCheckTiny(size, typ, mp)
|
||||
}
|
||||
mp.mallocing = 1
|
||||
|
||||
|
|
@ -399,9 +415,9 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
|
|||
c := getMCache(mp)
|
||||
off := c.tinyoffset
|
||||
// Align tiny pointer for required (conservative) alignment.
|
||||
if constsize&7 == 0 {
|
||||
if size&7 == 0 {
|
||||
off = alignUp(off, 8)
|
||||
} else if goarch.PtrSize == 4 && constsize == 12 {
|
||||
} else if goarch.PtrSize == 4 && size == 12 {
|
||||
// Conservatively align 12-byte objects to 8 bytes on 32-bit
|
||||
// systems so that objects whose first field is a 64-bit
|
||||
// value is aligned to 8 bytes and does not cause a fault on
|
||||
|
|
@ -409,15 +425,15 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
|
|||
// TODO(mknyszek): Remove this workaround if/when issue 36606
|
||||
// is resolved.
|
||||
off = alignUp(off, 8)
|
||||
} else if constsize&3 == 0 {
|
||||
} else if size&3 == 0 {
|
||||
off = alignUp(off, 4)
|
||||
} else if constsize&1 == 0 {
|
||||
} else if size&1 == 0 {
|
||||
off = alignUp(off, 2)
|
||||
}
|
||||
if off+constsize <= maxTinySize && c.tiny != 0 {
|
||||
if off+size <= maxTinySize && c.tiny != 0 {
|
||||
// The object fits into existing tiny block.
|
||||
x := unsafe.Pointer(c.tiny + off)
|
||||
c.tinyoffset = off + constsize
|
||||
c.tinyoffset = off + size
|
||||
c.tinyAllocs++
|
||||
mp.mallocing = 0
|
||||
releasem(mp)
|
||||
|
|
@ -435,10 +451,10 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
|
|||
(*[2]uint64)(x)[1] = 0
|
||||
// See if we need to replace the existing tiny block with the new one
|
||||
// based on amount of remaining free space.
|
||||
if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) {
|
||||
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
|
||||
// Note: disabled when race detector is on, see comment near end of this function.
|
||||
c.tiny = uintptr(x)
|
||||
c.tinyoffset = constsize
|
||||
c.tinyoffset = size
|
||||
}
|
||||
|
||||
// Ensure that the stores above that initialize x to
|
||||
|
|
@ -502,7 +518,7 @@ func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr)
|
|||
// TODO: enable this padding for all allocations, not just
|
||||
// tinyalloc ones. It's tricky because of pointer maps.
|
||||
// Maybe just all noscan objects?
|
||||
x = add(x, elemsize-constsize)
|
||||
x = add(x, elemsize-size)
|
||||
}
|
||||
return x, elemsize
|
||||
}
|
||||
|
|
|
|||
|
|
@ -523,21 +523,21 @@ var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.
|
|||
|
||||
var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{
|
||||
mallocPanic,
|
||||
mallocgcTinySize1,
|
||||
mallocgcTinySize2,
|
||||
mallocgcTinySize3,
|
||||
mallocgcTinySize4,
|
||||
mallocgcTinySize5,
|
||||
mallocgcTinySize6,
|
||||
mallocgcTinySize7,
|
||||
mallocgcTinySize8,
|
||||
mallocgcTinySize9,
|
||||
mallocgcTinySize10,
|
||||
mallocgcTinySize11,
|
||||
mallocgcTinySize12,
|
||||
mallocgcTinySize13,
|
||||
mallocgcTinySize14,
|
||||
mallocgcTinySize15,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocPanic,
|
||||
mallocgcSmallNoScanSC2,
|
||||
mallocgcSmallNoScanSC3,
|
||||
mallocgcSmallNoScanSC3,
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ func CountBytes(s []byte) int {
|
|||
|
||||
func ToByteSlice() []byte { // Issue #24698
|
||||
// amd64:`LEAQ type:\[3\]uint8`
|
||||
// amd64:`CALL runtime\.(newobject|mallocgcTinySize3)`
|
||||
// amd64:`CALL runtime\.(newobject|mallocgcTinySC2)`
|
||||
// amd64:-`.*runtime.stringtoslicebyte`
|
||||
return []byte("foo")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -665,14 +665,14 @@ func f39a() (x []int) {
|
|||
|
||||
func f39b() (x [10]*int) {
|
||||
x = [10]*int{}
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
|
||||
printnl() // ERROR "live at call to printnl: x$"
|
||||
return x
|
||||
}
|
||||
|
||||
func f39c() (x [10]*int) {
|
||||
x = [10]*int{}
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
|
||||
printnl() // ERROR "live at call to printnl: x$"
|
||||
return
|
||||
}
|
||||
|
|
|
|||
|
|
@ -663,14 +663,14 @@ func f39a() (x []int) {
|
|||
|
||||
func f39b() (x [10]*int) {
|
||||
x = [10]*int{}
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
|
||||
printnl() // ERROR "live at call to printnl: x$"
|
||||
return x
|
||||
}
|
||||
|
||||
func f39c() (x [10]*int) {
|
||||
x = [10]*int{}
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySize[48]): x$"
|
||||
x[0] = new(int) // ERROR "live at call to (newobject|mallocgcTinySC2): x$"
|
||||
printnl() // ERROR "live at call to printnl: x$"
|
||||
return
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue