cmd/compile: call generated size-specialized malloc functions directly

This change creates calls to size-specialized malloc functions instead
of calls to newObject when we know the size of the allocation at
compilation time. Most of it is a matter of calling the newObject
function (which will create calls to the size-specialized functions)
rather then the newObjectNonSpecialized function (which won't). In the
newHeapaddr, small, non-pointer case, we'll create a non specialized
newObject and transform that into the appropriate size-specialized
function when we produce the mallocgc in flushPendingHeapAllocations.

We have to update some of the rewrites in generic.rules to also apply to
the size-specialized functions when they apply to newObject.

The messiest thing is we have to adjust the offset we use to save the
memory profiler stack, because the depth of the call to profilealloc is
two frames fewer in the size-specialized malloc functions compared to
when newObject calls mallocgc. A bunch of tests have been adjusted to
account for that.

Change-Id: I6a6a6964c9037fb6719e392c4a498ed700b617d7
Reviewed-on: https://go-review.googlesource.com/c/go/+/707856
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Matloob <matloob@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Matloob 2025-09-29 17:26:49 -04:00
parent 80f3bb5516
commit 19a30ea3f2
16 changed files with 228 additions and 116 deletions

View file

@ -13,47 +13,50 @@ import (
var Syms symsStruct
type symsStruct struct {
AssertE2I *obj.LSym
AssertE2I2 *obj.LSym
Asanread *obj.LSym
Asanwrite *obj.LSym
CgoCheckMemmove *obj.LSym
CgoCheckPtrWrite *obj.LSym
CheckPtrAlignment *obj.LSym
Deferproc *obj.LSym
Deferprocat *obj.LSym
DeferprocStack *obj.LSym
Deferreturn *obj.LSym
Duffcopy *obj.LSym
Duffzero *obj.LSym
GCWriteBarrier [8]*obj.LSym
Goschedguarded *obj.LSym
Growslice *obj.LSym
InterfaceSwitch *obj.LSym
MallocGC *obj.LSym
Memmove *obj.LSym
Msanread *obj.LSym
Msanwrite *obj.LSym
Msanmove *obj.LSym
Newobject *obj.LSym
Newproc *obj.LSym
PanicBounds *obj.LSym
PanicExtend *obj.LSym
Panicdivide *obj.LSym
Panicshift *obj.LSym
PanicdottypeE *obj.LSym
PanicdottypeI *obj.LSym
Panicnildottype *obj.LSym
Panicoverflow *obj.LSym
Racefuncenter *obj.LSym
Racefuncexit *obj.LSym
Raceread *obj.LSym
Racereadrange *obj.LSym
Racewrite *obj.LSym
Racewriterange *obj.LSym
TypeAssert *obj.LSym
WBZero *obj.LSym
WBMove *obj.LSym
AssertE2I *obj.LSym
AssertE2I2 *obj.LSym
Asanread *obj.LSym
Asanwrite *obj.LSym
CgoCheckMemmove *obj.LSym
CgoCheckPtrWrite *obj.LSym
CheckPtrAlignment *obj.LSym
Deferproc *obj.LSym
Deferprocat *obj.LSym
DeferprocStack *obj.LSym
Deferreturn *obj.LSym
Duffcopy *obj.LSym
Duffzero *obj.LSym
GCWriteBarrier [8]*obj.LSym
Goschedguarded *obj.LSym
Growslice *obj.LSym
InterfaceSwitch *obj.LSym
MallocGC *obj.LSym
MallocGCSmallNoScan [27]*obj.LSym
MallocGCSmallScanNoHeader [27]*obj.LSym
MallocGCTiny [16]*obj.LSym
Memmove *obj.LSym
Msanread *obj.LSym
Msanwrite *obj.LSym
Msanmove *obj.LSym
Newobject *obj.LSym
Newproc *obj.LSym
PanicBounds *obj.LSym
PanicExtend *obj.LSym
Panicdivide *obj.LSym
Panicshift *obj.LSym
PanicdottypeE *obj.LSym
PanicdottypeI *obj.LSym
Panicnildottype *obj.LSym
Panicoverflow *obj.LSym
Racefuncenter *obj.LSym
Racefuncexit *obj.LSym
Raceread *obj.LSym
Racereadrange *obj.LSym
Racewrite *obj.LSym
Racewriterange *obj.LSym
TypeAssert *obj.LSym
WBZero *obj.LSym
WBMove *obj.LSym
// Wasm
SigPanic *obj.LSym
Staticuint64s *obj.LSym

View file

@ -2065,28 +2065,32 @@
// for rewriting results of some late-expanded rewrites (below)
(SelectN [n] m:(MakeResult ___)) => m.Args[n]
// TODO(matloob): Try out having non-zeroing mallocs for prointerless
// memory, and leaving the zeroing here. Then the compiler can remove
// the zeroing if the user has explicit writes to the whole object.
// for late-expanded calls, recognize newobject and remove zeroing and nilchecks
(Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call))
&& isSameCall(call.Aux, "runtime.newobject")
(Zero (SelectN [0] call:(StaticLECall ___)) mem:(SelectN [1] call))
&& isMalloc(call.Aux)
=> mem
(Store (SelectN [0] call:(StaticLECall _ _)) x mem:(SelectN [1] call))
(Store (SelectN [0] call:(StaticLECall ___)) x mem:(SelectN [1] call))
&& isConstZero(x)
&& isSameCall(call.Aux, "runtime.newobject")
&& isMalloc(call.Aux)
=> mem
(Store (OffPtr (SelectN [0] call:(StaticLECall _ _))) x mem:(SelectN [1] call))
(Store (OffPtr (SelectN [0] call:(StaticLECall ___))) x mem:(SelectN [1] call))
&& isConstZero(x)
&& isSameCall(call.Aux, "runtime.newobject")
&& isMalloc(call.Aux)
=> mem
(NilCheck ptr:(SelectN [0] call:(StaticLECall _ _)) _)
&& isSameCall(call.Aux, "runtime.newobject")
(NilCheck ptr:(SelectN [0] call:(StaticLECall ___)) _)
&& isMalloc(call.Aux)
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
=> ptr
(NilCheck ptr:(OffPtr (SelectN [0] call:(StaticLECall _ _))) _)
&& isSameCall(call.Aux, "runtime.newobject")
(NilCheck ptr:(OffPtr (SelectN [0] call:(StaticLECall ___))) _)
&& isMalloc(call.Aux)
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
=> ptr

View file

@ -456,6 +456,26 @@ func isSameCall(aux Aux, name string) bool {
return fn != nil && fn.String() == name
}
func isMalloc(aux Aux) bool {
return isNewObject(aux) || isSpecializedMalloc(aux)
}
func isNewObject(aux Aux) bool {
fn := aux.(*AuxCall).Fn
return fn != nil && fn.String() == "runtime.newobject"
}
func isSpecializedMalloc(aux Aux) bool {
fn := aux.(*AuxCall).Fn
if fn == nil {
return false
}
name := fn.String()
return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
strings.HasPrefix(name, "runtime.mallocTiny")
}
// canLoadUnaligned reports if the architecture supports unaligned load operations.
func canLoadUnaligned(c *Config) bool {
return c.ctxt.Arch.Alignment == 1

View file

@ -21318,8 +21318,8 @@ func rewriteValuegeneric_OpNilCheck(v *Value) bool {
v.copyOf(ptr)
return true
}
// match: (NilCheck ptr:(SelectN [0] call:(StaticLECall _ _)) _)
// cond: isSameCall(call.Aux, "runtime.newobject") && warnRule(fe.Debug_checknil(), v, "removed nil check")
// match: (NilCheck ptr:(SelectN [0] call:(StaticLECall ___)) _)
// cond: isMalloc(call.Aux) && warnRule(fe.Debug_checknil(), v, "removed nil check")
// result: ptr
for {
ptr := v_0
@ -21327,14 +21327,17 @@ func rewriteValuegeneric_OpNilCheck(v *Value) bool {
break
}
call := ptr.Args[0]
if call.Op != OpStaticLECall || len(call.Args) != 2 || !(isSameCall(call.Aux, "runtime.newobject") && warnRule(fe.Debug_checknil(), v, "removed nil check")) {
if call.Op != OpStaticLECall {
break
}
if !(isMalloc(call.Aux) && warnRule(fe.Debug_checknil(), v, "removed nil check")) {
break
}
v.copyOf(ptr)
return true
}
// match: (NilCheck ptr:(OffPtr (SelectN [0] call:(StaticLECall _ _))) _)
// cond: isSameCall(call.Aux, "runtime.newobject") && warnRule(fe.Debug_checknil(), v, "removed nil check")
// match: (NilCheck ptr:(OffPtr (SelectN [0] call:(StaticLECall ___))) _)
// cond: isMalloc(call.Aux) && warnRule(fe.Debug_checknil(), v, "removed nil check")
// result: ptr
for {
ptr := v_0
@ -21346,7 +21349,10 @@ func rewriteValuegeneric_OpNilCheck(v *Value) bool {
break
}
call := ptr_0.Args[0]
if call.Op != OpStaticLECall || len(call.Args) != 2 || !(isSameCall(call.Aux, "runtime.newobject") && warnRule(fe.Debug_checknil(), v, "removed nil check")) {
if call.Op != OpStaticLECall {
break
}
if !(isMalloc(call.Aux) && warnRule(fe.Debug_checknil(), v, "removed nil check")) {
break
}
v.copyOf(ptr)
@ -32463,27 +32469,27 @@ func rewriteValuegeneric_OpStore(v *Value) bool {
v.AddArg3(dst, e, mem)
return true
}
// match: (Store (SelectN [0] call:(StaticLECall _ _)) x mem:(SelectN [1] call))
// cond: isConstZero(x) && isSameCall(call.Aux, "runtime.newobject")
// match: (Store (SelectN [0] call:(StaticLECall ___)) x mem:(SelectN [1] call))
// cond: isConstZero(x) && isMalloc(call.Aux)
// result: mem
for {
if v_0.Op != OpSelectN || auxIntToInt64(v_0.AuxInt) != 0 {
break
}
call := v_0.Args[0]
if call.Op != OpStaticLECall || len(call.Args) != 2 {
if call.Op != OpStaticLECall {
break
}
x := v_1
mem := v_2
if mem.Op != OpSelectN || auxIntToInt64(mem.AuxInt) != 1 || call != mem.Args[0] || !(isConstZero(x) && isSameCall(call.Aux, "runtime.newobject")) {
if mem.Op != OpSelectN || auxIntToInt64(mem.AuxInt) != 1 || call != mem.Args[0] || !(isConstZero(x) && isMalloc(call.Aux)) {
break
}
v.copyOf(mem)
return true
}
// match: (Store (OffPtr (SelectN [0] call:(StaticLECall _ _))) x mem:(SelectN [1] call))
// cond: isConstZero(x) && isSameCall(call.Aux, "runtime.newobject")
// match: (Store (OffPtr (SelectN [0] call:(StaticLECall ___))) x mem:(SelectN [1] call))
// cond: isConstZero(x) && isMalloc(call.Aux)
// result: mem
for {
if v_0.Op != OpOffPtr {
@ -32494,12 +32500,12 @@ func rewriteValuegeneric_OpStore(v *Value) bool {
break
}
call := v_0_0.Args[0]
if call.Op != OpStaticLECall || len(call.Args) != 2 {
if call.Op != OpStaticLECall {
break
}
x := v_1
mem := v_2
if mem.Op != OpSelectN || auxIntToInt64(mem.AuxInt) != 1 || call != mem.Args[0] || !(isConstZero(x) && isSameCall(call.Aux, "runtime.newobject")) {
if mem.Op != OpSelectN || auxIntToInt64(mem.AuxInt) != 1 || call != mem.Args[0] || !(isConstZero(x) && isMalloc(call.Aux)) {
break
}
v.copyOf(mem)
@ -36842,19 +36848,19 @@ func rewriteValuegeneric_OpZero(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call))
// cond: isSameCall(call.Aux, "runtime.newobject")
// match: (Zero (SelectN [0] call:(StaticLECall ___)) mem:(SelectN [1] call))
// cond: isMalloc(call.Aux)
// result: mem
for {
if v_0.Op != OpSelectN || auxIntToInt64(v_0.AuxInt) != 0 {
break
}
call := v_0.Args[0]
if call.Op != OpStaticLECall || len(call.Args) != 2 {
if call.Op != OpStaticLECall {
break
}
mem := v_1
if mem.Op != OpSelectN || auxIntToInt64(mem.AuxInt) != 1 || call != mem.Args[0] || !(isSameCall(call.Aux, "runtime.newobject")) {
if mem.Op != OpSelectN || auxIntToInt64(mem.AuxInt) != 1 || call != mem.Args[0] || !(isMalloc(call.Aux)) {
break
}
v.copyOf(mem)

View file

@ -798,7 +798,16 @@ func IsNewObject(v *Value, select1 []*Value) (mem *Value, ok bool) {
if call.Op != OpStaticCall {
return nil, false
}
if !isSameCall(call.Aux, "runtime.newobject") {
// Check for new object, or for new object calls that have been transformed into size-specialized malloc calls.
// Calls that have return type unsafe pointer may have originally been produced by flushPendingHeapAllocations
// in the ssa generator, so may have not originally been newObject calls.
var numParameters int64
switch {
case isNewObject(call.Aux):
numParameters = 1
case isSpecializedMalloc(call.Aux) && !v.Type.IsUnsafePtr():
numParameters = 3
default:
return nil, false
}
if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 {
@ -813,7 +822,7 @@ func IsNewObject(v *Value, select1 []*Value) (mem *Value, ok bool) {
if v.Args[0].Args[0].Op != OpSP {
return nil, false
}
if v.Args[0].AuxInt != c.ctxt.Arch.FixedFrameSize+c.RegSize { // offset of return value
if v.Args[0].AuxInt != c.ctxt.Arch.FixedFrameSize+numParameters*c.RegSize { // offset of return value
return nil, false
}
return mem, true

View file

@ -12,6 +12,7 @@ import (
"go/constant"
"html"
"internal/buildcfg"
"internal/runtime/gc"
"os"
"path/filepath"
"slices"
@ -124,6 +125,15 @@ func InitConfig() {
ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch")
for i := 1; i < len(ir.Syms.MallocGCSmallNoScan); i++ {
ir.Syms.MallocGCSmallNoScan[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallNoScanSC%d", i))
}
for i := 1; i < len(ir.Syms.MallocGCSmallScanNoHeader); i++ {
ir.Syms.MallocGCSmallScanNoHeader[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", i))
}
for i := 1; i < len(ir.Syms.MallocGCTiny); i++ {
ir.Syms.MallocGCTiny[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocTiny%d", i))
}
ir.Syms.MallocGC = typecheck.LookupRuntimeFunc("mallocgc")
ir.Syms.Memmove = typecheck.LookupRuntimeFunc("memmove")
ir.Syms.Msanread = typecheck.LookupRuntimeFunc("msanread")
@ -690,7 +700,7 @@ func allocAlign(t *types.Type) int64 {
func (s *state) newHeapaddr(n *ir.Name) {
size := allocSize(n.Type())
if n.Type().HasPointers() || size >= maxAggregatedHeapAllocation || size == 0 {
s.setHeapaddr(n.Pos(), n, s.newObject(n.Type(), nil))
s.setHeapaddr(n.Pos(), n, s.newObject(n.Type()))
return
}
@ -709,7 +719,7 @@ func (s *state) newHeapaddr(n *ir.Name) {
// Make an allocation, but the type being allocated is just
// the first pending object. We will come back and update it
// later if needed.
allocCall = s.newObject(n.Type(), nil)
allocCall = s.newObjectNonSpecialized(n.Type(), nil)
} else {
allocCall = s.pendingHeapAllocations[0].Args[0]
}
@ -762,7 +772,11 @@ func (s *state) flushPendingHeapAllocations() {
s.constBool(true), // needZero TODO: false is ok?
call.Args[1], // memory
}
call.Aux = ssa.StaticAuxCall(ir.Syms.MallocGC, s.f.ABIDefault.ABIAnalyzeTypes(
mallocSym := ir.Syms.MallocGC
if specialMallocSym := s.specializedMallocSym(size, false); specialMallocSym != nil {
mallocSym = specialMallocSym
}
call.Aux = ssa.StaticAuxCall(mallocSym, s.f.ABIDefault.ABIAnalyzeTypes(
[]*types.Type{args[0].Type, args[1].Type, args[2].Type},
[]*types.Type{types.Types[types.TUNSAFEPTR]},
))
@ -774,6 +788,43 @@ func (s *state) flushPendingHeapAllocations() {
ptr.Type = types.Types[types.TUNSAFEPTR]
}
func (s *state) specializedMallocSym(size int64, hasPointers bool) *obj.LSym {
if !s.sizeSpecializedMallocEnabled() {
return nil
}
ptrSize := s.config.PtrSize
ptrBits := ptrSize * 8
minSizeForMallocHeader := ptrSize * ptrBits
heapBitsInSpan := size <= minSizeForMallocHeader
if !heapBitsInSpan {
return nil
}
divRoundUp := func(n, a uintptr) uintptr { return (n + a - 1) / a }
sizeClass := gc.SizeToSizeClass8[divRoundUp(uintptr(size), gc.SmallSizeDiv)]
if hasPointers {
return ir.Syms.MallocGCSmallScanNoHeader[sizeClass]
}
if size < gc.TinySize {
return ir.Syms.MallocGCTiny[size]
}
return ir.Syms.MallocGCSmallNoScan[sizeClass]
}
func (s *state) sizeSpecializedMallocEnabled() bool {
if base.Flag.CompilingRuntime {
// The compiler forces the values of the asan, msan, and race flags to false if
// we're compiling the runtime, so we lose the information about whether we're
// building in asan, msan, or race mode. Because the specialized functions don't
// work in that mode, just turn if off in that case.
// TODO(matloob): Save the information about whether the flags were passed in
// originally so we can turn off size specialized malloc in that case instead
// using Instrumenting below. Then we can remove this condition.
return false
}
return buildcfg.Experiment.SizeSpecializedMalloc && !base.Flag.Cfg.Instrumenting
}
// setHeapaddr allocates a new PAUTO variable to store ptr (which must be non-nil)
// and then sets it as n's heap address.
func (s *state) setHeapaddr(pos src.XPos, n *ir.Name, ptr *ssa.Value) {
@ -796,7 +847,24 @@ func (s *state) setHeapaddr(pos src.XPos, n *ir.Name, ptr *ssa.Value) {
}
// newObject returns an SSA value denoting new(typ).
func (s *state) newObject(typ *types.Type, rtype *ssa.Value) *ssa.Value {
func (s *state) newObject(typ *types.Type) *ssa.Value {
if typ.Size() == 0 {
return s.newValue1A(ssa.OpAddr, types.NewPtr(typ), ir.Syms.Zerobase, s.sb)
}
rtype := s.reflectType(typ)
if specialMallocSym := s.specializedMallocSym(typ.Size(), typ.HasPointers()); specialMallocSym != nil {
return s.rtcall(specialMallocSym, true, []*types.Type{types.NewPtr(typ)},
s.constInt(types.Types[types.TUINTPTR], typ.Size()),
rtype,
s.constBool(true),
)[0]
}
return s.rtcall(ir.Syms.Newobject, true, []*types.Type{types.NewPtr(typ)}, rtype)[0]
}
// newObjectNonSpecialized returns an SSA value denoting new(typ). It does
// not produce size-specialized malloc functions.
func (s *state) newObjectNonSpecialized(typ *types.Type, rtype *ssa.Value) *ssa.Value {
if typ.Size() == 0 {
return s.newValue1A(ssa.OpAddr, types.NewPtr(typ), ir.Syms.Zerobase, s.sb)
}
@ -3594,11 +3662,10 @@ func (s *state) exprCheckPtr(n ir.Node, checkPtrOK bool) *ssa.Value {
case ir.ONEW:
n := n.(*ir.UnaryExpr)
var rtype *ssa.Value
if x, ok := n.X.(*ir.DynamicType); ok && x.Op() == ir.ODYNAMICTYPE {
rtype = s.expr(x.RType)
return s.newObjectNonSpecialized(n.Type().Elem(), s.expr(x.RType))
}
return s.newObject(n.Type().Elem(), rtype)
return s.newObject(n.Type().Elem())
case ir.OUNSAFEADD:
n := n.(*ir.BinaryExpr)

View file

@ -90,6 +90,7 @@ var bootstrapDirs = []string{
"internal/platform",
"internal/profile",
"internal/race",
"internal/runtime/gc",
"internal/saferio",
"internal/syscall/unix",
"internal/types/errors",

View file

@ -49,7 +49,7 @@ const (
// desired maximum number of frames after expansion.
// This should be at least as large as the largest skip value
// used for profiling; otherwise stacks may be truncated inconsistently
maxSkip = 6
maxSkip = 8
// maxProfStackDepth is the highest valid value for debug.profstackdepth.
// It's used for the bucket.stk func.
@ -444,7 +444,7 @@ func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
}
// Only use the part of mp.profStack we need and ignore the extra space
// reserved for delayed inline expansion with frame pointer unwinding.
nstk := callers(5, mp.profStack[:debug.profstackdepth])
nstk := callers(3, mp.profStack[:debug.profstackdepth+2])
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
b := stkbucket(memProfile, size, mp.profStack[:nstk], true)

View file

@ -97,25 +97,25 @@ func TestMemoryProfiler(t *testing.T) {
legacy string
}{{
stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+( 0x[0-9,a-f]+ 0x[0-9,a-f]+)?
# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test\.go:48
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test\.go:87
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
}, {
stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:25
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:84
`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
}, {
stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:31
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:85
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
}, {
stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:35
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:86
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),

View file

@ -2585,7 +2585,7 @@ func TestProfilerStackDepth(t *testing.T) {
t.Errorf("want stack depth = %d, got %d", depth, len(stk))
}
if rootFn, wantFn := stk[depth-1], "runtime/pprof.produceProfileEvents"; rootFn != wantFn {
if rootFn, wantFn := stk[depth-1], "runtime/pprof.allocDeep"; rootFn != wantFn {
t.Errorf("want stack stack root %s, got %v", wantFn, rootFn)
}
}
@ -2660,7 +2660,7 @@ func goroutineDeep(t *testing.T, n int) {
// guaranteed to have exactly the desired depth with produceProfileEvents as
// their root frame which is expected by TestProfilerStackDepth.
func produceProfileEvents(t *testing.T, depth int) {
allocDeep(depth - 1) // -1 for produceProfileEvents, **
allocDeep(depth + 1) // +1 for produceProfileEvents, **
blockChanDeep(t, depth-2) // -2 for produceProfileEvents, **, chanrecv1
blockMutexDeep(t, depth-2) // -2 for produceProfileEvents, **, Unlock
memSink = nil

View file

@ -23,7 +23,7 @@ func CountBytes(s []byte) int {
func ToByteSlice() []byte { // Issue #24698
// amd64:`LEAQ\ttype:\[3\]uint8`
// amd64:`CALL\truntime\.newobject`
// amd64:`CALL\truntime\.mallocTiny3`
// amd64:-`.*runtime.stringtoslicebyte`
return []byte("foo")
}

View file

@ -19,7 +19,7 @@ type T struct{ M string }
var b bool
func f1(q *Q, xx []byte) interface{} { // ERROR "live at call to newobject: xx$" "live at entry to f1: xx$"
func f1(q *Q, xx []byte) interface{} { // ERROR "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: xx$" "live at entry to f1: xx$"
// xx was copied from the stack to the heap on the previous line:
// xx was live for the first two prints but then it switched to &xx
// being live. We should not see plain xx again.
@ -36,7 +36,7 @@ func f1(q *Q, xx []byte) interface{} { // ERROR "live at call to newobject: xx$"
//go:noinline
func f2(d []byte, n int) (odata, res []byte, e interface{}) { // ERROR "live at entry to f2: d$"
if n > len(d) {
return d, nil, &T{M: "hello"} // ERROR "live at call to newobject: d"
return d, nil, &T{M: "hello"} // ERROR "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: d"
}
res = d[:n]
odata = d[n:]

View file

@ -48,22 +48,23 @@ func testInterleavedAllocations() error {
const iters = 50000
// Sizes of the allocations performed by each experiment.
frames := []string{"main.allocInterleaved1", "main.allocInterleaved2", "main.allocInterleaved3"}
leafFrame := "main.allocInterleaved"
// Pass if at least one of three experiments has no errors. Use a separate
// function for each experiment to identify each experiment in the profile.
allocInterleaved1(iters)
if checkAllocations(getMemProfileRecords(), frames[0:1], iters, allocInterleavedSizes) == nil {
if checkAllocations(getMemProfileRecords(), leafFrame, frames[0:1], iters, allocInterleavedSizes) == nil {
// Passed on first try, report no error.
return nil
}
allocInterleaved2(iters)
if checkAllocations(getMemProfileRecords(), frames[0:2], iters, allocInterleavedSizes) == nil {
if checkAllocations(getMemProfileRecords(), leafFrame, frames[0:2], iters, allocInterleavedSizes) == nil {
// Passed on second try, report no error.
return nil
}
allocInterleaved3(iters)
// If it fails a third time, we may be onto something.
return checkAllocations(getMemProfileRecords(), frames[0:3], iters, allocInterleavedSizes)
return checkAllocations(getMemProfileRecords(), leafFrame, frames[0:3], iters, allocInterleavedSizes)
}
var allocInterleavedSizes = []int64{17 * 1024, 1024, 18 * 1024, 512, 16 * 1024, 256}
@ -108,22 +109,23 @@ func testSmallAllocations() error {
// Sizes of the allocations performed by each experiment.
sizes := []int64{1024, 512, 256}
frames := []string{"main.allocSmall1", "main.allocSmall2", "main.allocSmall3"}
leafFrame := "main.allocSmall"
// Pass if at least one of three experiments has no errors. Use a separate
// function for each experiment to identify each experiment in the profile.
allocSmall1(iters)
if checkAllocations(getMemProfileRecords(), frames[0:1], iters, sizes) == nil {
if checkAllocations(getMemProfileRecords(), leafFrame, frames[0:1], iters, sizes) == nil {
// Passed on first try, report no error.
return nil
}
allocSmall2(iters)
if checkAllocations(getMemProfileRecords(), frames[0:2], iters, sizes) == nil {
if checkAllocations(getMemProfileRecords(), leafFrame, frames[0:2], iters, sizes) == nil {
// Passed on second try, report no error.
return nil
}
allocSmall3(iters)
// If it fails a third time, we may be onto something.
return checkAllocations(getMemProfileRecords(), frames[0:3], iters, sizes)
return checkAllocations(getMemProfileRecords(), leafFrame, frames[0:3], iters, sizes)
}
// allocSmall performs only small allocations for sanity testing.
@ -161,21 +163,21 @@ func allocSmall3(n int) {
// Look only at samples that include the named frames, and group the
// allocations by their line number. All these allocations are done from
// the same leaf function, so their line numbers are the same.
func checkAllocations(records []runtime.MemProfileRecord, frames []string, count int64, size []int64) error {
func checkAllocations(records []runtime.MemProfileRecord, leafFrame string, frames []string, count int64, size []int64) error {
objectsPerLine := map[int][]int64{}
bytesPerLine := map[int][]int64{}
totalCount := []int64{}
// Compute the line number of the first allocation. All the
// allocations are from the same leaf, so pick the first one.
var firstLine int
for ln := range allocObjects(records, frames[0]) {
for ln := range allocObjects(records, leafFrame, frames[0]) {
if firstLine == 0 || firstLine > ln {
firstLine = ln
}
}
for _, frame := range frames {
var objectCount int64
a := allocObjects(records, frame)
a := allocObjects(records, leafFrame, frame)
for s := range size {
// Allocations of size size[s] should be on line firstLine + s.
ln := firstLine + s
@ -258,7 +260,7 @@ type allocStat struct {
// allocObjects examines the profile records for samples including the
// named function and returns the allocation stats aggregated by
// source line number of the allocation (at the leaf frame).
func allocObjects(records []runtime.MemProfileRecord, function string) map[int]allocStat {
func allocObjects(records []runtime.MemProfileRecord, leafFrame, function string) map[int]allocStat {
a := make(map[int]allocStat)
for _, r := range records {
var pcs []uintptr
@ -273,7 +275,7 @@ func allocObjects(records []runtime.MemProfileRecord, function string) map[int]a
for {
frame, more := frames.Next()
name := frame.Function
if line == 0 {
if name == leafFrame && line == 0 {
line = frame.Line
}
if name == function {

View file

@ -467,9 +467,9 @@ func f27defer(b bool) {
func f27go(b bool) {
x := 0
if b {
go call27(func() { x++ }) // ERROR "live at call to newobject: &x$" "live at call to newobject: &x .autotmp_[0-9]+$" "live at call to newproc: &x$" // allocate two closures, the func literal, and the wrapper for go
go call27(func() { x++ }) // ERROR "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: &x$" "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: &x .autotmp_[0-9]+$" "live at call to newproc: &x$" // allocate two closures, the func literal, and the wrapper for go
}
go call27(func() { x++ }) // ERROR "live at call to newobject: &x$" "live at call to newobject: .autotmp_[0-9]+$" // allocate two closures, the func literal, and the wrapper for go
go call27(func() { x++ }) // ERROR "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: &x$" "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: .autotmp_[0-9]+$" // allocate two closures, the func literal, and the wrapper for go
printnl()
}
@ -538,7 +538,7 @@ func f31(b1, b2, b3 bool) {
g31(g18()) // ERROR "stack object .autotmp_[0-9]+ \[2\]string$"
}
if b2 {
h31(g18()) // ERROR "live at call to convT: .autotmp_[0-9]+$" "live at call to newobject: .autotmp_[0-9]+$"
h31(g18()) // ERROR "live at call to convT: .autotmp_[0-9]+$" "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: .autotmp_[0-9]+$"
}
if b3 {
panic(g18())
@ -665,14 +665,14 @@ func f39a() (x []int) {
func f39b() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to newobject: x$"
x[0] = new(int) // ERROR "live at call to mallocTiny[48]: x$"
printnl() // ERROR "live at call to printnl: x$"
return x
}
func f39c() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to newobject: x$"
x[0] = new(int) // ERROR "live at call to mallocTiny[48]: x$"
printnl() // ERROR "live at call to printnl: x$"
return
}

View file

@ -465,9 +465,9 @@ func f27defer(b bool) {
func f27go(b bool) {
x := 0
if b {
go call27(func() { x++ }) // ERROR "live at call to newobject: &x$" "live at call to newobject: &x .autotmp_[0-9]+$" "live at call to newproc: &x$" // allocate two closures, the func literal, and the wrapper for go
go call27(func() { x++ }) // ERROR "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: &x$" "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: &x .autotmp_[0-9]+$" "live at call to newproc: &x$" // allocate two closures, the func literal, and the wrapper for go
}
go call27(func() { x++ }) // ERROR "live at call to newobject: &x$" "live at call to newobject: .autotmp_[0-9]+$" // allocate two closures, the func literal, and the wrapper for go
go call27(func() { x++ }) // ERROR "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: &x$" "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: .autotmp_[0-9]+$" // allocate two closures, the func literal, and the wrapper for go
printnl()
}
@ -536,7 +536,7 @@ func f31(b1, b2, b3 bool) {
g31(g18()) // ERROR "stack object .autotmp_[0-9]+ \[2\]string$"
}
if b2 {
h31(g18()) // ERROR "live at call to convT: .autotmp_[0-9]+$" "live at call to newobject: .autotmp_[0-9]+$"
h31(g18()) // ERROR "live at call to convT: .autotmp_[0-9]+$" "live at call to mallocgcSmallScanNoHeaderSC[0-9]+: .autotmp_[0-9]+$"
}
if b3 {
panic(g18())
@ -663,14 +663,14 @@ func f39a() (x []int) {
func f39b() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to newobject: x$"
x[0] = new(int) // ERROR "live at call to mallocTiny[48]: x$"
printnl() // ERROR "live at call to printnl: x$"
return x
}
func f39c() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to newobject: x$"
x[0] = new(int) // ERROR "live at call to mallocTiny[48]: x$"
printnl() // ERROR "live at call to printnl: x$"
return
}

View file

@ -33,8 +33,8 @@ func (T) M1(a uintptr) {} // ERROR "escaping uintptr"
func (T) M2(a ...uintptr) {} // ERROR "escaping ...uintptr"
func TestF1() {
var t int // ERROR "moved to heap"
F1(uintptr(unsafe.Pointer(&t))) // ERROR "live at call to F1: .?autotmp" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
var t int // ERROR "moved to heap"
F1(uintptr(unsafe.Pointer(&t))) // ERROR "live at call to F1: .?autotmp" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
}
func TestF3() {
@ -49,17 +49,17 @@ func TestM1() {
}
func TestF2() {
var v int // ERROR "moved to heap"
F2(0, 1, uintptr(unsafe.Pointer(&v)), 2) // ERROR "live at call to newobject: .?autotmp" "live at call to F2: .?autotmp" "escapes to heap" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
var v int // ERROR "moved to heap"
F2(0, 1, uintptr(unsafe.Pointer(&v)), 2) // ERROR "live at call to mallocgcSmallNoScanSC[0-9]+: .?autotmp" "live at call to F2: .?autotmp" "escapes to heap" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
}
func TestF4() {
var v2 int // ERROR "moved to heap"
F4(0, 1, uintptr(unsafe.Pointer(&v2)), 2) // ERROR "live at call to newobject: .?autotmp" "live at call to F4: .?autotmp" "escapes to heap" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
F4(0, 1, uintptr(unsafe.Pointer(&v2)), 2) // ERROR "live at call to mallocgcSmallNoScanSC[0-9]+: .?autotmp" "live at call to F4: .?autotmp" "escapes to heap" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
}
func TestM2() {
var t T
var v int // ERROR "moved to heap"
t.M2(0, 1, uintptr(unsafe.Pointer(&v)), 2) // ERROR "live at call to newobject: .?autotmp" "live at call to T.M2: .?autotmp" "escapes to heap" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
t.M2(0, 1, uintptr(unsafe.Pointer(&v)), 2) // ERROR "live at call to mallocgcSmallNoScanSC[0-9]+: .?autotmp" "live at call to T.M2: .?autotmp" "escapes to heap" "stack object .autotmp_[0-9]+ unsafe.Pointer$"
}