mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
pprof: add goroutine blocking profiling
The profiler collects goroutine blocking information similar to Google Perf Tools. You may see an example of the profile (converted to svg) attached to http://code.google.com/p/go/issues/detail?id=3946 The public API changes are: +pkg runtime, func BlockProfile([]BlockProfileRecord) (int, bool) +pkg runtime, func SetBlockProfileRate(int) +pkg runtime, method (*BlockProfileRecord) Stack() []uintptr +pkg runtime, type BlockProfileRecord struct +pkg runtime, type BlockProfileRecord struct, Count int64 +pkg runtime, type BlockProfileRecord struct, Cycles int64 +pkg runtime, type BlockProfileRecord struct, embedded StackRecord R=rsc, dave, minux.ma, r CC=gobot, golang-dev, r, remyoudompheng https://golang.org/cl/6443115
This commit is contained in:
parent
ebb0e5db75
commit
4cc7bf326a
12 changed files with 350 additions and 52 deletions
|
|
@ -112,6 +112,18 @@ directory containing the package sources, has its own flags:
|
||||||
garbage collector, provided the test can run in the available
|
garbage collector, provided the test can run in the available
|
||||||
memory without garbage collection.
|
memory without garbage collection.
|
||||||
|
|
||||||
|
-test.blockprofile block.out
|
||||||
|
Write a goroutine blocking profile to the specified file
|
||||||
|
when all tests are complete.
|
||||||
|
|
||||||
|
-test.blockprofilerate n
|
||||||
|
Control the detail provided in goroutine blocking profiles by setting
|
||||||
|
runtime.BlockProfileRate to n. See 'godoc runtime BlockProfileRate'.
|
||||||
|
The profiler aims to sample, on average, one blocking event every
|
||||||
|
n nanoseconds the program spends blocked. By default,
|
||||||
|
if -test.blockprofile is set without this flag, all blocking events
|
||||||
|
are recorded, equivalent to -test.blockprofilerate=1.
|
||||||
|
|
||||||
-test.parallel n
|
-test.parallel n
|
||||||
Allow parallel execution of test functions that call t.Parallel.
|
Allow parallel execution of test functions that call t.Parallel.
|
||||||
The value of this flag is the maximum number of tests to run
|
The value of this flag is the maximum number of tests to run
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,8 @@ var usageMessage = `Usage of go test:
|
||||||
-cpuprofile="": passes -test.cpuprofile to test
|
-cpuprofile="": passes -test.cpuprofile to test
|
||||||
-memprofile="": passes -test.memprofile to test
|
-memprofile="": passes -test.memprofile to test
|
||||||
-memprofilerate=0: passes -test.memprofilerate to test
|
-memprofilerate=0: passes -test.memprofilerate to test
|
||||||
|
-blockprofile="": pases -test.blockprofile to test
|
||||||
|
-blockprofilerate=0: passes -test.blockprofilerate to test
|
||||||
-parallel=0: passes -test.parallel to test
|
-parallel=0: passes -test.parallel to test
|
||||||
-run="": passes -test.run to test
|
-run="": passes -test.run to test
|
||||||
-short=false: passes -test.short to test
|
-short=false: passes -test.short to test
|
||||||
|
|
@ -82,6 +84,8 @@ var testFlagDefn = []*testFlagSpec{
|
||||||
{name: "cpuprofile", passToTest: true},
|
{name: "cpuprofile", passToTest: true},
|
||||||
{name: "memprofile", passToTest: true},
|
{name: "memprofile", passToTest: true},
|
||||||
{name: "memprofilerate", passToTest: true},
|
{name: "memprofilerate", passToTest: true},
|
||||||
|
{name: "blockprofile", passToTest: true},
|
||||||
|
{name: "blockprofilerate", passToTest: true},
|
||||||
{name: "parallel", passToTest: true},
|
{name: "parallel", passToTest: true},
|
||||||
{name: "run", passToTest: true},
|
{name: "run", passToTest: true},
|
||||||
{name: "short", boolVar: new(bool), passToTest: true},
|
{name: "short", boolVar: new(bool), passToTest: true},
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,10 @@
|
||||||
//
|
//
|
||||||
// go tool pprof http://localhost:6060/debug/pprof/profile
|
// go tool pprof http://localhost:6060/debug/pprof/profile
|
||||||
//
|
//
|
||||||
|
// Or to look at the goroutine blocking profile:
|
||||||
|
//
|
||||||
|
// go tool pprof http://localhost:6060/debug/pprof/block
|
||||||
|
//
|
||||||
// Or to view all available profiles:
|
// Or to view all available profiles:
|
||||||
//
|
//
|
||||||
// go tool pprof http://localhost:6060/debug/pprof/
|
// go tool pprof http://localhost:6060/debug/pprof/
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ struct SudoG
|
||||||
G* g; // g and selgen constitute
|
G* g; // g and selgen constitute
|
||||||
uint32 selgen; // a weak pointer to g
|
uint32 selgen; // a weak pointer to g
|
||||||
SudoG* link;
|
SudoG* link;
|
||||||
|
int64 releasetime;
|
||||||
byte* elem; // data element
|
byte* elem; // data element
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -154,6 +155,7 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
|
||||||
SudoG *sg;
|
SudoG *sg;
|
||||||
SudoG mysg;
|
SudoG mysg;
|
||||||
G* gp;
|
G* gp;
|
||||||
|
int64 t0;
|
||||||
|
|
||||||
if(c == nil) {
|
if(c == nil) {
|
||||||
USED(t);
|
USED(t);
|
||||||
|
|
@ -174,6 +176,13 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
|
||||||
runtime·prints("\n");
|
runtime·prints("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t0 = 0;
|
||||||
|
mysg.releasetime = 0;
|
||||||
|
if(runtime·blockprofilerate > 0) {
|
||||||
|
t0 = runtime·cputicks();
|
||||||
|
mysg.releasetime = -1;
|
||||||
|
}
|
||||||
|
|
||||||
runtime·lock(c);
|
runtime·lock(c);
|
||||||
if(c->closed)
|
if(c->closed)
|
||||||
goto closed;
|
goto closed;
|
||||||
|
|
@ -189,6 +198,8 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
|
||||||
gp->param = sg;
|
gp->param = sg;
|
||||||
if(sg->elem != nil)
|
if(sg->elem != nil)
|
||||||
c->elemalg->copy(c->elemsize, sg->elem, ep);
|
c->elemalg->copy(c->elemsize, sg->elem, ep);
|
||||||
|
if(sg->releasetime)
|
||||||
|
sg->releasetime = runtime·cputicks();
|
||||||
runtime·ready(gp);
|
runtime·ready(gp);
|
||||||
|
|
||||||
if(pres != nil)
|
if(pres != nil)
|
||||||
|
|
@ -216,6 +227,9 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres)
|
||||||
goto closed;
|
goto closed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(mysg.releasetime > 0)
|
||||||
|
runtime·blockevent(mysg.releasetime - t0, 2);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
asynch:
|
asynch:
|
||||||
|
|
@ -246,11 +260,15 @@ asynch:
|
||||||
if(sg != nil) {
|
if(sg != nil) {
|
||||||
gp = sg->g;
|
gp = sg->g;
|
||||||
runtime·unlock(c);
|
runtime·unlock(c);
|
||||||
|
if(sg->releasetime)
|
||||||
|
sg->releasetime = runtime·cputicks();
|
||||||
runtime·ready(gp);
|
runtime·ready(gp);
|
||||||
} else
|
} else
|
||||||
runtime·unlock(c);
|
runtime·unlock(c);
|
||||||
if(pres != nil)
|
if(pres != nil)
|
||||||
*pres = true;
|
*pres = true;
|
||||||
|
if(mysg.releasetime > 0)
|
||||||
|
runtime·blockevent(mysg.releasetime - t0, 2);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
closed:
|
closed:
|
||||||
|
|
@ -265,6 +283,7 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
|
||||||
SudoG *sg;
|
SudoG *sg;
|
||||||
SudoG mysg;
|
SudoG mysg;
|
||||||
G *gp;
|
G *gp;
|
||||||
|
int64 t0;
|
||||||
|
|
||||||
if(runtime·gcwaiting)
|
if(runtime·gcwaiting)
|
||||||
runtime·gosched();
|
runtime·gosched();
|
||||||
|
|
@ -282,6 +301,13 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
|
||||||
return; // not reached
|
return; // not reached
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t0 = 0;
|
||||||
|
mysg.releasetime = 0;
|
||||||
|
if(runtime·blockprofilerate > 0) {
|
||||||
|
t0 = runtime·cputicks();
|
||||||
|
mysg.releasetime = -1;
|
||||||
|
}
|
||||||
|
|
||||||
runtime·lock(c);
|
runtime·lock(c);
|
||||||
if(c->dataqsiz > 0)
|
if(c->dataqsiz > 0)
|
||||||
goto asynch;
|
goto asynch;
|
||||||
|
|
@ -297,6 +323,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
|
||||||
c->elemalg->copy(c->elemsize, ep, sg->elem);
|
c->elemalg->copy(c->elemsize, ep, sg->elem);
|
||||||
gp = sg->g;
|
gp = sg->g;
|
||||||
gp->param = sg;
|
gp->param = sg;
|
||||||
|
if(sg->releasetime)
|
||||||
|
sg->releasetime = runtime·cputicks();
|
||||||
runtime·ready(gp);
|
runtime·ready(gp);
|
||||||
|
|
||||||
if(selected != nil)
|
if(selected != nil)
|
||||||
|
|
@ -328,6 +356,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive
|
||||||
|
|
||||||
if(received != nil)
|
if(received != nil)
|
||||||
*received = true;
|
*received = true;
|
||||||
|
if(mysg.releasetime > 0)
|
||||||
|
runtime·blockevent(mysg.releasetime - t0, 2);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
asynch:
|
asynch:
|
||||||
|
|
@ -362,6 +392,8 @@ asynch:
|
||||||
if(sg != nil) {
|
if(sg != nil) {
|
||||||
gp = sg->g;
|
gp = sg->g;
|
||||||
runtime·unlock(c);
|
runtime·unlock(c);
|
||||||
|
if(sg->releasetime)
|
||||||
|
sg->releasetime = runtime·cputicks();
|
||||||
runtime·ready(gp);
|
runtime·ready(gp);
|
||||||
} else
|
} else
|
||||||
runtime·unlock(c);
|
runtime·unlock(c);
|
||||||
|
|
@ -370,6 +402,8 @@ asynch:
|
||||||
*selected = true;
|
*selected = true;
|
||||||
if(received != nil)
|
if(received != nil)
|
||||||
*received = true;
|
*received = true;
|
||||||
|
if(mysg.releasetime > 0)
|
||||||
|
runtime·blockevent(mysg.releasetime - t0, 2);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
closed:
|
closed:
|
||||||
|
|
@ -380,6 +414,8 @@ closed:
|
||||||
if(received != nil)
|
if(received != nil)
|
||||||
*received = false;
|
*received = false;
|
||||||
runtime·unlock(c);
|
runtime·unlock(c);
|
||||||
|
if(mysg.releasetime > 0)
|
||||||
|
runtime·blockevent(mysg.releasetime - t0, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// chansend1(hchan *chan any, elem any);
|
// chansend1(hchan *chan any, elem any);
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,31 @@ func CPUProfile() []byte
|
||||||
// SetCPUProfileRate directly.
|
// SetCPUProfileRate directly.
|
||||||
func SetCPUProfileRate(hz int)
|
func SetCPUProfileRate(hz int)
|
||||||
|
|
||||||
|
// SetBlockProfileRate controls the fraction of goroutine blocking events
|
||||||
|
// that are reported in the blocking profile. The profiler aims to sample
|
||||||
|
// an average of one blocking event per rate nanoseconds spent blocked.
|
||||||
|
//
|
||||||
|
// To include every blocking event in the profile, pass rate = 1.
|
||||||
|
// To turn off profiling entirely, pass rate <= 0.
|
||||||
|
func SetBlockProfileRate(rate int)
|
||||||
|
|
||||||
|
// BlockProfileRecord describes blocking events originated
|
||||||
|
// at a particular call sequence (stack trace).
|
||||||
|
type BlockProfileRecord struct {
|
||||||
|
Count int64
|
||||||
|
Cycles int64
|
||||||
|
StackRecord
|
||||||
|
}
|
||||||
|
|
||||||
|
// BlockProfile returns n, the number of records in the current blocking profile.
|
||||||
|
// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
|
||||||
|
// If len(p) < n, BlockProfile does not change p and returns n, false.
|
||||||
|
//
|
||||||
|
// Most clients should use the runtime/pprof package or
|
||||||
|
// the testing package's -test.blockprofile flag instead
|
||||||
|
// of calling BlockProfile directly.
|
||||||
|
func BlockProfile(p []BlockProfileRecord) (n int, ok bool)
|
||||||
|
|
||||||
// Stack formats a stack trace of the calling goroutine into buf
|
// Stack formats a stack trace of the calling goroutine into buf
|
||||||
// and returns the number of bytes written to buf.
|
// and returns the number of bytes written to buf.
|
||||||
// If all is true, Stack formats stack traces of all other goroutines
|
// If all is true, Stack formats stack traces of all other goroutines
|
||||||
|
|
|
||||||
|
|
@ -15,21 +15,35 @@ package runtime
|
||||||
// NOTE(rsc): Everything here could use cas if contention became an issue.
|
// NOTE(rsc): Everything here could use cas if contention became an issue.
|
||||||
static Lock proflock;
|
static Lock proflock;
|
||||||
|
|
||||||
// Per-call-stack allocation information.
|
enum { MProf, BProf }; // profile types
|
||||||
|
|
||||||
|
// Per-call-stack profiling information.
|
||||||
// Lookup by hashing call stack into a linked-list hash table.
|
// Lookup by hashing call stack into a linked-list hash table.
|
||||||
typedef struct Bucket Bucket;
|
typedef struct Bucket Bucket;
|
||||||
struct Bucket
|
struct Bucket
|
||||||
{
|
{
|
||||||
Bucket *next; // next in hash list
|
Bucket *next; // next in hash list
|
||||||
Bucket *allnext; // next in list of all buckets
|
Bucket *allnext; // next in list of all mbuckets/bbuckets
|
||||||
uintptr allocs;
|
int32 typ;
|
||||||
uintptr frees;
|
union
|
||||||
uintptr alloc_bytes;
|
{
|
||||||
uintptr free_bytes;
|
struct // typ == MProf
|
||||||
uintptr recent_allocs; // since last gc
|
{
|
||||||
uintptr recent_frees;
|
uintptr allocs;
|
||||||
uintptr recent_alloc_bytes;
|
uintptr frees;
|
||||||
uintptr recent_free_bytes;
|
uintptr alloc_bytes;
|
||||||
|
uintptr free_bytes;
|
||||||
|
uintptr recent_allocs; // since last gc
|
||||||
|
uintptr recent_frees;
|
||||||
|
uintptr recent_alloc_bytes;
|
||||||
|
uintptr recent_free_bytes;
|
||||||
|
};
|
||||||
|
struct // typ == BProf
|
||||||
|
{
|
||||||
|
int64 count;
|
||||||
|
int64 cycles;
|
||||||
|
};
|
||||||
|
};
|
||||||
uintptr hash;
|
uintptr hash;
|
||||||
uintptr nstk;
|
uintptr nstk;
|
||||||
uintptr stk[1];
|
uintptr stk[1];
|
||||||
|
|
@ -38,12 +52,13 @@ enum {
|
||||||
BuckHashSize = 179999,
|
BuckHashSize = 179999,
|
||||||
};
|
};
|
||||||
static Bucket **buckhash;
|
static Bucket **buckhash;
|
||||||
static Bucket *buckets;
|
static Bucket *mbuckets; // memory profile buckets
|
||||||
|
static Bucket *bbuckets; // blocking profile buckets
|
||||||
static uintptr bucketmem;
|
static uintptr bucketmem;
|
||||||
|
|
||||||
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
|
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
|
||||||
static Bucket*
|
static Bucket*
|
||||||
stkbucket(uintptr *stk, int32 nstk, bool alloc)
|
stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
|
||||||
{
|
{
|
||||||
int32 i;
|
int32 i;
|
||||||
uintptr h;
|
uintptr h;
|
||||||
|
|
@ -66,7 +81,7 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc)
|
||||||
|
|
||||||
i = h%BuckHashSize;
|
i = h%BuckHashSize;
|
||||||
for(b = buckhash[i]; b; b=b->next)
|
for(b = buckhash[i]; b; b=b->next)
|
||||||
if(b->hash == h && b->nstk == nstk &&
|
if(b->typ == typ && b->hash == h && b->nstk == nstk &&
|
||||||
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
|
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
|
||||||
return b;
|
return b;
|
||||||
|
|
||||||
|
|
@ -76,12 +91,18 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc)
|
||||||
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
|
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
|
||||||
bucketmem += sizeof *b + nstk*sizeof stk[0];
|
bucketmem += sizeof *b + nstk*sizeof stk[0];
|
||||||
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
|
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
|
||||||
|
b->typ = typ;
|
||||||
b->hash = h;
|
b->hash = h;
|
||||||
b->nstk = nstk;
|
b->nstk = nstk;
|
||||||
b->next = buckhash[i];
|
b->next = buckhash[i];
|
||||||
buckhash[i] = b;
|
buckhash[i] = b;
|
||||||
b->allnext = buckets;
|
if(typ == MProf) {
|
||||||
buckets = b;
|
b->allnext = mbuckets;
|
||||||
|
mbuckets = b;
|
||||||
|
} else {
|
||||||
|
b->allnext = bbuckets;
|
||||||
|
bbuckets = b;
|
||||||
|
}
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -92,7 +113,7 @@ runtime·MProf_GC(void)
|
||||||
Bucket *b;
|
Bucket *b;
|
||||||
|
|
||||||
runtime·lock(&proflock);
|
runtime·lock(&proflock);
|
||||||
for(b=buckets; b; b=b->allnext) {
|
for(b=mbuckets; b; b=b->allnext) {
|
||||||
b->allocs += b->recent_allocs;
|
b->allocs += b->recent_allocs;
|
||||||
b->frees += b->recent_frees;
|
b->frees += b->recent_frees;
|
||||||
b->alloc_bytes += b->recent_alloc_bytes;
|
b->alloc_bytes += b->recent_alloc_bytes;
|
||||||
|
|
@ -228,7 +249,7 @@ runtime·MProf_Malloc(void *p, uintptr size)
|
||||||
m->nomemprof++;
|
m->nomemprof++;
|
||||||
nstk = runtime·callers(1, stk, 32);
|
nstk = runtime·callers(1, stk, 32);
|
||||||
runtime·lock(&proflock);
|
runtime·lock(&proflock);
|
||||||
b = stkbucket(stk, nstk, true);
|
b = stkbucket(MProf, stk, nstk, true);
|
||||||
b->recent_allocs++;
|
b->recent_allocs++;
|
||||||
b->recent_alloc_bytes += size;
|
b->recent_alloc_bytes += size;
|
||||||
setaddrbucket((uintptr)p, b);
|
setaddrbucket((uintptr)p, b);
|
||||||
|
|
@ -256,6 +277,35 @@ runtime·MProf_Free(void *p, uintptr size)
|
||||||
m->nomemprof--;
|
m->nomemprof--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64 runtime·blockprofilerate; // in CPU ticks
|
||||||
|
|
||||||
|
void
|
||||||
|
runtime·SetBlockProfileRate(intgo rate)
|
||||||
|
{
|
||||||
|
runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
runtime·blockevent(int64 cycles, int32 skip)
|
||||||
|
{
|
||||||
|
int32 nstk;
|
||||||
|
int64 rate;
|
||||||
|
uintptr stk[32];
|
||||||
|
Bucket *b;
|
||||||
|
|
||||||
|
if(cycles <= 0)
|
||||||
|
return;
|
||||||
|
rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
|
||||||
|
if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
|
||||||
|
return;
|
||||||
|
|
||||||
|
nstk = runtime·callers(skip, stk, 32);
|
||||||
|
runtime·lock(&proflock);
|
||||||
|
b = stkbucket(BProf, stk, nstk, true);
|
||||||
|
b->count++;
|
||||||
|
b->cycles += cycles;
|
||||||
|
runtime·unlock(&proflock);
|
||||||
|
}
|
||||||
|
|
||||||
// Go interface to profile data. (Declared in extern.go)
|
// Go interface to profile data. (Declared in extern.go)
|
||||||
// Assumes Go sizeof(int) == sizeof(int32)
|
// Assumes Go sizeof(int) == sizeof(int32)
|
||||||
|
|
@ -290,20 +340,53 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
|
||||||
|
|
||||||
runtime·lock(&proflock);
|
runtime·lock(&proflock);
|
||||||
n = 0;
|
n = 0;
|
||||||
for(b=buckets; b; b=b->allnext)
|
for(b=mbuckets; b; b=b->allnext)
|
||||||
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
||||||
n++;
|
n++;
|
||||||
ok = false;
|
ok = false;
|
||||||
if(n <= p.len) {
|
if(n <= p.len) {
|
||||||
ok = true;
|
ok = true;
|
||||||
r = (Record*)p.array;
|
r = (Record*)p.array;
|
||||||
for(b=buckets; b; b=b->allnext)
|
for(b=mbuckets; b; b=b->allnext)
|
||||||
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
||||||
record(r++, b);
|
record(r++, b);
|
||||||
}
|
}
|
||||||
runtime·unlock(&proflock);
|
runtime·unlock(&proflock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Must match BlockProfileRecord in debug.go.
|
||||||
|
typedef struct BRecord BRecord;
|
||||||
|
struct BRecord {
|
||||||
|
int64 count;
|
||||||
|
int64 cycles;
|
||||||
|
uintptr stk[32];
|
||||||
|
};
|
||||||
|
|
||||||
|
func BlockProfile(p Slice) (n int, ok bool) {
|
||||||
|
Bucket *b;
|
||||||
|
BRecord *r;
|
||||||
|
int32 i;
|
||||||
|
|
||||||
|
runtime·lock(&proflock);
|
||||||
|
n = 0;
|
||||||
|
for(b=bbuckets; b; b=b->allnext)
|
||||||
|
n++;
|
||||||
|
ok = false;
|
||||||
|
if(n <= p.len) {
|
||||||
|
ok = true;
|
||||||
|
r = (BRecord*)p.array;
|
||||||
|
for(b=bbuckets; b; b=b->allnext, r++) {
|
||||||
|
r->count = b->count;
|
||||||
|
r->cycles = b->cycles;
|
||||||
|
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
|
||||||
|
r->stk[i] = b->stk[i];
|
||||||
|
for(; i<nelem(r->stk); i++)
|
||||||
|
r->stk[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
runtime·unlock(&proflock);
|
||||||
|
}
|
||||||
|
|
||||||
// Must match StackRecord in debug.go.
|
// Must match StackRecord in debug.go.
|
||||||
typedef struct TRecord TRecord;
|
typedef struct TRecord TRecord;
|
||||||
struct TRecord {
|
struct TRecord {
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,7 @@ import (
|
||||||
// goroutine - stack traces of all current goroutines
|
// goroutine - stack traces of all current goroutines
|
||||||
// heap - a sampling of all heap allocations
|
// heap - a sampling of all heap allocations
|
||||||
// threadcreate - stack traces that led to the creation of new OS threads
|
// threadcreate - stack traces that led to the creation of new OS threads
|
||||||
|
// block - stack traces that led to blocking on synchronization primitives
|
||||||
//
|
//
|
||||||
// These predefine profiles maintain themselves and panic on an explicit
|
// These predefine profiles maintain themselves and panic on an explicit
|
||||||
// Add or Remove method call.
|
// Add or Remove method call.
|
||||||
|
|
@ -76,6 +77,12 @@ var heapProfile = &Profile{
|
||||||
write: writeHeap,
|
write: writeHeap,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var blockProfile = &Profile{
|
||||||
|
name: "block",
|
||||||
|
count: countBlock,
|
||||||
|
write: writeBlock,
|
||||||
|
}
|
||||||
|
|
||||||
func lockProfiles() {
|
func lockProfiles() {
|
||||||
profiles.mu.Lock()
|
profiles.mu.Lock()
|
||||||
if profiles.m == nil {
|
if profiles.m == nil {
|
||||||
|
|
@ -84,6 +91,7 @@ func lockProfiles() {
|
||||||
"goroutine": goroutineProfile,
|
"goroutine": goroutineProfile,
|
||||||
"threadcreate": threadcreateProfile,
|
"threadcreate": threadcreateProfile,
|
||||||
"heap": heapProfile,
|
"heap": heapProfile,
|
||||||
|
"block": blockProfile,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -600,3 +608,60 @@ func StopCPUProfile() {
|
||||||
runtime.SetCPUProfileRate(0)
|
runtime.SetCPUProfileRate(0)
|
||||||
<-cpu.done
|
<-cpu.done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type byCycles []runtime.BlockProfileRecord
|
||||||
|
|
||||||
|
func (x byCycles) Len() int { return len(x) }
|
||||||
|
func (x byCycles) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||||
|
func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles }
|
||||||
|
|
||||||
|
// countBlock returns the number of records in the blocking profile.
|
||||||
|
func countBlock() int {
|
||||||
|
n, _ := runtime.BlockProfile(nil)
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeBlock writes the current blocking profile to w.
|
||||||
|
func writeBlock(w io.Writer, debug int) error {
|
||||||
|
var p []runtime.BlockProfileRecord
|
||||||
|
n, ok := runtime.BlockProfile(nil)
|
||||||
|
for {
|
||||||
|
p = make([]runtime.BlockProfileRecord, n+50)
|
||||||
|
n, ok = runtime.BlockProfile(p)
|
||||||
|
if ok {
|
||||||
|
p = p[:n]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Sort(byCycles(p))
|
||||||
|
|
||||||
|
b := bufio.NewWriter(w)
|
||||||
|
var tw *tabwriter.Writer
|
||||||
|
w = b
|
||||||
|
if debug > 0 {
|
||||||
|
tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
|
||||||
|
w = tw
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "--- contention:\n")
|
||||||
|
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
|
||||||
|
for i := range p {
|
||||||
|
r := &p[i]
|
||||||
|
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
|
||||||
|
for _, pc := range r.Stack() {
|
||||||
|
fmt.Fprintf(w, " %#x", pc)
|
||||||
|
}
|
||||||
|
fmt.Fprint(w, "\n")
|
||||||
|
if debug > 0 {
|
||||||
|
printStackRecord(w, r.Stack(), false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if tw != nil {
|
||||||
|
tw.Flush()
|
||||||
|
}
|
||||||
|
return b.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
func runtime_cyclesPerSecond() int64
|
||||||
|
|
|
||||||
|
|
@ -358,3 +358,40 @@ runtime·fastrand1(void)
|
||||||
m->fastrand = x;
|
m->fastrand = x;
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Lock ticksLock;
|
||||||
|
static int64 ticks;
|
||||||
|
|
||||||
|
int64
|
||||||
|
runtime·tickspersecond(void)
|
||||||
|
{
|
||||||
|
int64 res, t0, t1, c0, c1;
|
||||||
|
|
||||||
|
res = (int64)runtime·atomicload64((uint64*)&ticks);
|
||||||
|
if(res != 0)
|
||||||
|
return ticks;
|
||||||
|
runtime·lock(&ticksLock);
|
||||||
|
res = ticks;
|
||||||
|
if(res == 0) {
|
||||||
|
t0 = runtime·nanotime();
|
||||||
|
c0 = runtime·cputicks();
|
||||||
|
runtime·usleep(100*1000);
|
||||||
|
t1 = runtime·nanotime();
|
||||||
|
c1 = runtime·cputicks();
|
||||||
|
if(t1 == t0)
|
||||||
|
t1++;
|
||||||
|
res = (c1-c0)*1000*1000*1000/(t1-t0);
|
||||||
|
if(res == 0)
|
||||||
|
res++;
|
||||||
|
runtime·atomicstore64((uint64*)&ticks, res);
|
||||||
|
}
|
||||||
|
runtime·unlock(&ticksLock);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
runtime∕pprof·runtime_cyclesPerSecond(int64 res)
|
||||||
|
{
|
||||||
|
res = runtime·tickspersecond();
|
||||||
|
FLUSH(&res);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -642,6 +642,9 @@ void runtime·resetcpuprofiler(int32);
|
||||||
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
||||||
void runtime·usleep(uint32);
|
void runtime·usleep(uint32);
|
||||||
int64 runtime·cputicks(void);
|
int64 runtime·cputicks(void);
|
||||||
|
int64 runtime·tickspersecond(void);
|
||||||
|
void runtime·blockevent(int64, int32);
|
||||||
|
extern int64 runtime·blockprofilerate;
|
||||||
|
|
||||||
#pragma varargck argpos runtime·printf 1
|
#pragma varargck argpos runtime·printf 1
|
||||||
#pragma varargck type "d" int32
|
#pragma varargck type "d" int32
|
||||||
|
|
|
||||||
|
|
@ -24,20 +24,21 @@ package sync
|
||||||
typedef struct Sema Sema;
|
typedef struct Sema Sema;
|
||||||
struct Sema
|
struct Sema
|
||||||
{
|
{
|
||||||
uint32 volatile *addr;
|
uint32 volatile* addr;
|
||||||
G *g;
|
G* g;
|
||||||
Sema *prev;
|
int64 releasetime;
|
||||||
Sema *next;
|
Sema* prev;
|
||||||
|
Sema* next;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct SemaRoot SemaRoot;
|
typedef struct SemaRoot SemaRoot;
|
||||||
struct SemaRoot
|
struct SemaRoot
|
||||||
{
|
{
|
||||||
Lock;
|
Lock;
|
||||||
Sema *head;
|
Sema* head;
|
||||||
Sema *tail;
|
Sema* tail;
|
||||||
// Number of waiters. Read w/o the lock.
|
// Number of waiters. Read w/o the lock.
|
||||||
uint32 volatile nwait;
|
uint32 volatile nwait;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Prime to not correlate with any user patterns.
|
// Prime to not correlate with any user patterns.
|
||||||
|
|
@ -97,12 +98,13 @@ cansemacquire(uint32 *addr)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
runtime·semacquire(uint32 volatile *addr)
|
semacquireimpl(uint32 volatile *addr, int32 profile)
|
||||||
{
|
{
|
||||||
Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
|
Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
|
||||||
SemaRoot *root;
|
SemaRoot *root;
|
||||||
|
int64 t0;
|
||||||
|
|
||||||
// Easy case.
|
// Easy case.
|
||||||
if(cansemacquire(addr))
|
if(cansemacquire(addr))
|
||||||
return;
|
return;
|
||||||
|
|
@ -114,6 +116,12 @@ runtime·semacquire(uint32 volatile *addr)
|
||||||
// sleep
|
// sleep
|
||||||
// (waiter descriptor is dequeued by signaler)
|
// (waiter descriptor is dequeued by signaler)
|
||||||
root = semroot(addr);
|
root = semroot(addr);
|
||||||
|
t0 = 0;
|
||||||
|
s.releasetime = 0;
|
||||||
|
if(profile && runtime·blockprofilerate > 0) {
|
||||||
|
t0 = runtime·cputicks();
|
||||||
|
s.releasetime = -1;
|
||||||
|
}
|
||||||
for(;;) {
|
for(;;) {
|
||||||
runtime·lock(root);
|
runtime·lock(root);
|
||||||
// Add ourselves to nwait to disable "easy case" in semrelease.
|
// Add ourselves to nwait to disable "easy case" in semrelease.
|
||||||
|
|
@ -128,11 +136,20 @@ runtime·semacquire(uint32 volatile *addr)
|
||||||
// (we set nwait above), so go to sleep.
|
// (we set nwait above), so go to sleep.
|
||||||
semqueue(root, addr, &s);
|
semqueue(root, addr, &s);
|
||||||
runtime·park(runtime·unlock, root, "semacquire");
|
runtime·park(runtime·unlock, root, "semacquire");
|
||||||
if(cansemacquire(addr))
|
if(cansemacquire(addr)) {
|
||||||
|
if(t0)
|
||||||
|
runtime·blockevent(s.releasetime - t0, 3);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
runtime·semacquire(uint32 volatile *addr)
|
||||||
|
{
|
||||||
|
semacquireimpl(addr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
runtime·semrelease(uint32 volatile *addr)
|
runtime·semrelease(uint32 volatile *addr)
|
||||||
{
|
{
|
||||||
|
|
@ -164,12 +181,15 @@ runtime·semrelease(uint32 volatile *addr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
runtime·unlock(root);
|
runtime·unlock(root);
|
||||||
if(s)
|
if(s) {
|
||||||
|
if(s->releasetime)
|
||||||
|
s->releasetime = runtime·cputicks();
|
||||||
runtime·ready(s->g);
|
runtime·ready(s->g);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func runtime_Semacquire(addr *uint32) {
|
func runtime_Semacquire(addr *uint32) {
|
||||||
runtime·semacquire(addr);
|
semacquireimpl(addr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
func runtime_Semrelease(addr *uint32) {
|
func runtime_Semrelease(addr *uint32) {
|
||||||
|
|
|
||||||
|
|
@ -206,14 +206,8 @@ runtime·setup_auxv(int32 argc, void *argv_list)
|
||||||
#pragma textflag 7
|
#pragma textflag 7
|
||||||
int64
|
int64
|
||||||
runtime·cputicks() {
|
runtime·cputicks() {
|
||||||
// copied from runtime.c:/^fastrand1
|
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
|
||||||
uint32 x;
|
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
|
||||||
|
// runtime·randomNumber provides better seeding of fastrand1.
|
||||||
x = runtime·randomNumber;
|
return runtime·nanotime() + runtime·randomNumber;
|
||||||
x += x;
|
|
||||||
if(x & 0x80000000L)
|
|
||||||
x ^= 0x88888eefUL;
|
|
||||||
runtime·randomNumber = x;
|
|
||||||
|
|
||||||
return ((int64)x) << 32 | x;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -102,14 +102,16 @@ var (
|
||||||
short = flag.Bool("test.short", false, "run smaller test suite to save time")
|
short = flag.Bool("test.short", false, "run smaller test suite to save time")
|
||||||
|
|
||||||
// Report as tests are run; default is silent for success.
|
// Report as tests are run; default is silent for success.
|
||||||
chatty = flag.Bool("test.v", false, "verbose: print additional output")
|
chatty = flag.Bool("test.v", false, "verbose: print additional output")
|
||||||
match = flag.String("test.run", "", "regular expression to select tests and examples to run")
|
match = flag.String("test.run", "", "regular expression to select tests and examples to run")
|
||||||
memProfile = flag.String("test.memprofile", "", "write a memory profile to the named file after execution")
|
memProfile = flag.String("test.memprofile", "", "write a memory profile to the named file after execution")
|
||||||
memProfileRate = flag.Int("test.memprofilerate", 0, "if >=0, sets runtime.MemProfileRate")
|
memProfileRate = flag.Int("test.memprofilerate", 0, "if >=0, sets runtime.MemProfileRate")
|
||||||
cpuProfile = flag.String("test.cpuprofile", "", "write a cpu profile to the named file during execution")
|
cpuProfile = flag.String("test.cpuprofile", "", "write a cpu profile to the named file during execution")
|
||||||
timeout = flag.Duration("test.timeout", 0, "if positive, sets an aggregate time limit for all tests")
|
blockProfile = flag.String("test.blockprofile", "", "write a goroutine blocking profile to the named file after execution")
|
||||||
cpuListStr = flag.String("test.cpu", "", "comma-separated list of number of CPUs to use for each test")
|
blockProfileRate = flag.Int("test.blockprofilerate", 1, "if >= 0, calls runtime.SetBlockProfileRate()")
|
||||||
parallel = flag.Int("test.parallel", runtime.GOMAXPROCS(0), "maximum test parallelism")
|
timeout = flag.Duration("test.timeout", 0, "if positive, sets an aggregate time limit for all tests")
|
||||||
|
cpuListStr = flag.String("test.cpu", "", "comma-separated list of number of CPUs to use for each test")
|
||||||
|
parallel = flag.Int("test.parallel", runtime.GOMAXPROCS(0), "maximum test parallelism")
|
||||||
|
|
||||||
haveExamples bool // are there examples?
|
haveExamples bool // are there examples?
|
||||||
|
|
||||||
|
|
@ -420,7 +422,9 @@ func before() {
|
||||||
}
|
}
|
||||||
// Could save f so after can call f.Close; not worth the effort.
|
// Could save f so after can call f.Close; not worth the effort.
|
||||||
}
|
}
|
||||||
|
if *blockProfile != "" && *blockProfileRate >= 0 {
|
||||||
|
runtime.SetBlockProfileRate(*blockProfileRate)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// after runs after all testing.
|
// after runs after all testing.
|
||||||
|
|
@ -439,6 +443,17 @@ func after() {
|
||||||
}
|
}
|
||||||
f.Close()
|
f.Close()
|
||||||
}
|
}
|
||||||
|
if *blockProfile != "" && *blockProfileRate >= 0 {
|
||||||
|
f, err := os.Create(*blockProfile)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "testing: %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err = pprof.Lookup("block").WriteTo(f, 0); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "testing: can't write %s: %s", *blockProfile, err)
|
||||||
|
}
|
||||||
|
f.Close()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var timer *time.Timer
|
var timer *time.Timer
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue