go/src/runtime/heapdump.go

729 lines
17 KiB
Go
Raw Normal View History

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Implementation of runtime/debug.WriteHeapDump. Writes all
// objects in the heap plus additional info (roots, threads,
// finalizers, etc.) to a file.
// The format of the dumped file is described at
// https://golang.org/s/go15heapdump.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
func runtime_debug_WriteHeapDump(fd uintptr) {
stopTheWorld("write heap dump")
systemstack(func() {
writeheapdump_m(fd)
})
startTheWorld()
}
const (
fieldKindEol = 0
fieldKindPtr = 1
fieldKindIface = 2
fieldKindEface = 3
tagEOF = 0
tagObject = 1
tagOtherRoot = 2
tagType = 3
tagGoroutine = 4
tagStackFrame = 5
tagParams = 6
tagFinalizer = 7
tagItab = 8
tagOSThread = 9
tagMemStats = 10
tagQueuedFinalizer = 11
tagData = 12
tagBSS = 13
tagDefer = 14
tagPanic = 15
tagMemProf = 16
tagAllocSample = 17
)
var dumpfd uintptr // fd to write the dump to.
var tmpbuf []byte
// buffer of pending write data
const (
bufSize = 4096
)
var buf [bufSize]byte
var nbuf uintptr
func dwrite(data unsafe.Pointer, len uintptr) {
if len == 0 {
return
}
if nbuf+len <= bufSize {
copy(buf[nbuf:], (*[bufSize]byte)(data)[:len])
nbuf += len
return
}
write(dumpfd, unsafe.Pointer(&buf), int32(nbuf))
if len >= bufSize {
write(dumpfd, data, int32(len))
nbuf = 0
} else {
copy(buf[:], (*[bufSize]byte)(data)[:len])
nbuf = len
}
}
func dwritebyte(b byte) {
dwrite(unsafe.Pointer(&b), 1)
}
func flush() {
write(dumpfd, unsafe.Pointer(&buf), int32(nbuf))
nbuf = 0
}
// Cache of types that have been serialized already.
// We use a type's hash field to pick a bucket.
// Inside a bucket, we keep a list of types that
// have been serialized so far, most recently used first.
// Note: when a bucket overflows we may end up
// serializing a type more than once. That's ok.
const (
typeCacheBuckets = 256
typeCacheAssoc = 4
)
type typeCacheBucket struct {
t [typeCacheAssoc]*_type
}
var typecache [typeCacheBuckets]typeCacheBucket
// dump a uint64 in a varint format parseable by encoding/binary
func dumpint(v uint64) {
var buf [10]byte
var n int
for v >= 0x80 {
buf[n] = byte(v | 0x80)
n++
v >>= 7
}
buf[n] = byte(v)
n++
dwrite(unsafe.Pointer(&buf), uintptr(n))
}
func dumpbool(b bool) {
if b {
dumpint(1)
} else {
dumpint(0)
}
}
// dump varint uint64 length followed by memory contents
func dumpmemrange(data unsafe.Pointer, len uintptr) {
dumpint(uint64(len))
dwrite(data, len)
}
func dumpslice(b []byte) {
dumpint(uint64(len(b)))
if len(b) > 0 {
dwrite(unsafe.Pointer(&b[0]), uintptr(len(b)))
}
}
func dumpstr(s string) {
sp := stringStructOf(&s)
dumpmemrange(sp.str, uintptr(sp.len))
}
// dump information for a type
func dumptype(t *_type) {
if t == nil {
return
}
// If we've definitely serialized the type before,
// no need to do it again.
b := &typecache[t.hash&(typeCacheBuckets-1)]
if t == b.t[0] {
return
}
for i := 1; i < typeCacheAssoc; i++ {
if t == b.t[i] {
// Move-to-front
for j := i; j > 0; j-- {
b.t[j] = b.t[j-1]
}
b.t[0] = t
return
}
}
// Might not have been dumped yet. Dump it and
// remember we did so.
for j := typeCacheAssoc - 1; j > 0; j-- {
b.t[j] = b.t[j-1]
}
b.t[0] = t
// dump the type
dumpint(tagType)
dumpint(uint64(uintptr(unsafe.Pointer(t))))
dumpint(uint64(t.size))
if x := t.uncommon(); x == nil || t.nameOff(x.pkgpath).name() == "" {
dumpstr(t.string())
} else {
pkgpathstr := t.nameOff(x.pkgpath).name()
pkgpath := stringStructOf(&pkgpathstr)
namestr := t.name()
name := stringStructOf(&namestr)
dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
dwrite(pkgpath.str, uintptr(pkgpath.len))
dwritebyte('.')
dwrite(name.str, uintptr(name.len))
}
dumpbool(t.kind&kindDirectIface == 0 || t.ptrdata != 0)
}
// dump an object
func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
dumpint(tagObject)
dumpint(uint64(uintptr(obj)))
dumpmemrange(obj, size)
dumpfields(bv)
}
func dumpotherroot(description string, to unsafe.Pointer) {
dumpint(tagOtherRoot)
dumpstr(description)
dumpint(uint64(uintptr(to)))
}
func dumpfinalizer(obj unsafe.Pointer, fn *funcval, fint *_type, ot *ptrtype) {
dumpint(tagFinalizer)
dumpint(uint64(uintptr(obj)))
dumpint(uint64(uintptr(unsafe.Pointer(fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fint))))
dumpint(uint64(uintptr(unsafe.Pointer(ot))))
}
type childInfo struct {
// Information passed up from the callee frame about
// the layout of the outargs region.
argoff uintptr // where the arguments start in the frame
arglen uintptr // size of args region
args bitvector // if args.n >= 0, pointer map of args region
sp *uint8 // callee sp
depth uintptr // depth in call stack (0 == most recent)
}
// dump kinds & offsets of interesting fields in bv
func dumpbv(cbv *bitvector, offset uintptr) {
for i := uintptr(0); i < uintptr(cbv.n); i++ {
if cbv.ptrbit(i) == 1 {
dumpint(fieldKindPtr)
dumpint(uint64(offset + i*sys.PtrSize))
}
}
}
func dumpframe(s *stkframe, arg unsafe.Pointer) bool {
child := (*childInfo)(arg)
f := s.fn
// Figure out what we can about our stack map
pc := s.pc
pcdata := int32(-1) // Use the entry map at function entry
if pc != f.entry {
pc--
pcdata = pcdatavalue(f, _PCDATA_StackMapIndex, pc, nil)
}
if pcdata == -1 {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
var bv bitvector
if stkmap != nil && stkmap.n > 0 {
bv = stackmapdata(stkmap, pcdata)
} else {
bv.n = -1
}
// Dump main body of stack frame.
dumpint(tagStackFrame)
dumpint(uint64(s.sp)) // lowest address in frame
dumpint(uint64(child.depth)) // # of frames deep on the stack
dumpint(uint64(uintptr(unsafe.Pointer(child.sp)))) // sp of child, or 0 if bottom of stack
dumpmemrange(unsafe.Pointer(s.sp), s.fp-s.sp) // frame contents
dumpint(uint64(f.entry))
dumpint(uint64(s.pc))
dumpint(uint64(s.continpc))
name := funcname(f)
if name == "" {
name = "unknown function"
}
dumpstr(name)
// Dump fields in the outargs section
if child.args.n >= 0 {
dumpbv(&child.args, child.argoff)
} else {
// conservative - everything might be a pointer
for off := child.argoff; off < child.argoff+child.arglen; off += sys.PtrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
}
// Dump fields in the local vars section
if stkmap == nil {
// No locals information, dump everything.
for off := child.arglen; off < s.varp-s.sp; off += sys.PtrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
} else if stkmap.n < 0 {
// Locals size information, dump just the locals.
size := uintptr(-stkmap.n)
for off := s.varp - size - s.sp; off < s.varp-s.sp; off += sys.PtrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
} else if stkmap.n > 0 {
// Locals bitmap information, scan just the pointers in
// locals.
dumpbv(&bv, s.varp-uintptr(bv.n)*sys.PtrSize-s.sp)
}
dumpint(fieldKindEol)
// Record arg info for parent.
child.argoff = s.argp - s.fp
child.arglen = s.arglen
child.sp = (*uint8)(unsafe.Pointer(s.sp))
child.depth++
stkmap = (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap != nil {
child.args = stackmapdata(stkmap, pcdata)
} else {
child.args.n = -1
}
return true
}
func dumpgoroutine(gp *g) {
var sp, pc, lr uintptr
if gp.syscallsp != 0 {
sp = gp.syscallsp
pc = gp.syscallpc
lr = 0
} else {
sp = gp.sched.sp
pc = gp.sched.pc
lr = gp.sched.lr
}
dumpint(tagGoroutine)
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(sp))
dumpint(uint64(gp.goid))
dumpint(uint64(gp.gopc))
dumpint(uint64(readgstatus(gp)))
dumpbool(isSystemGoroutine(gp, false))
dumpbool(false) // isbackground
dumpint(uint64(gp.waitsince))
dumpstr(gp.waitreason.String())
dumpint(uint64(uintptr(gp.sched.ctxt)))
dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
dumpint(uint64(uintptr(unsafe.Pointer(gp._panic))))
// dump stack
var child childInfo
child.args.n = -1
child.arglen = 0
child.sp = nil
child.depth = 0
gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, dumpframe, noescape(unsafe.Pointer(&child)), 0)
// dump defer & panic records
for d := gp._defer; d != nil; d = d.link {
dumpint(tagDefer)
dumpint(uint64(uintptr(unsafe.Pointer(d))))
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(d.sp))
dumpint(uint64(d.pc))
dumpint(uint64(uintptr(unsafe.Pointer(d.fn))))
if d.fn == nil {
// d.fn can be nil for open-coded defers
dumpint(uint64(0))
} else {
dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
}
dumpint(uint64(uintptr(unsafe.Pointer(d.link))))
}
for p := gp._panic; p != nil; p = p.link {
dumpint(tagPanic)
dumpint(uint64(uintptr(unsafe.Pointer(p))))
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
eface := efaceOf(&p.arg)
dumpint(uint64(uintptr(unsafe.Pointer(eface._type))))
dumpint(uint64(uintptr(unsafe.Pointer(eface.data))))
dumpint(0) // was p->defer, no longer recorded
dumpint(uint64(uintptr(unsafe.Pointer(p.link))))
}
}
func dumpgs() {
// goroutines & stacks
for i := 0; uintptr(i) < allglen; i++ {
gp := allgs[i]
status := readgstatus(gp) // The world is stopped so gp will not be in a scan state.
switch status {
default:
print("runtime: unexpected G.status ", hex(status), "\n")
throw("dumpgs in STW - bad status")
case _Gdead:
// ok
case _Grunnable,
_Gsyscall,
_Gwaiting:
dumpgoroutine(gp)
}
}
}
func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, ot *ptrtype) {
dumpint(tagQueuedFinalizer)
dumpint(uint64(uintptr(obj)))
dumpint(uint64(uintptr(unsafe.Pointer(fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fint))))
dumpint(uint64(uintptr(unsafe.Pointer(ot))))
}
func dumproots() {
// TODO(mwhudson): dump datamask etc from all objects
// data segment
dumpint(tagData)
dumpint(uint64(firstmoduledata.data))
dumpmemrange(unsafe.Pointer(firstmoduledata.data), firstmoduledata.edata-firstmoduledata.data)
dumpfields(firstmoduledata.gcdatamask)
// bss segment
dumpint(tagBSS)
dumpint(uint64(firstmoduledata.bss))
dumpmemrange(unsafe.Pointer(firstmoduledata.bss), firstmoduledata.ebss-firstmoduledata.bss)
dumpfields(firstmoduledata.gcbssmask)
// mspan.types
for _, s := range mheap_.allspans {
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s.state.get() == mSpanInUse {
// Finalizers
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
continue
}
spf := (*specialfinalizer)(unsafe.Pointer(sp))
p := unsafe.Pointer(s.base() + uintptr(spf.special.offset))
dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
}
}
}
// Finalizer queue
iterate_finq(finq_callback)
}
// Bit vector of free marks.
// Needs to be as big as the largest number of objects per span.
var freemark [_PageSize / 8]bool
func dumpobjs() {
for _, s := range mheap_.allspans {
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s.state.get() != mSpanInUse {
continue
}
p := s.base()
size := s.elemsize
n := (s.npages << _PageShift) / size
if n > uintptr(len(freemark)) {
throw("freemark array doesn't have enough entries")
}
for freeIndex := uintptr(0); freeIndex < s.nelems; freeIndex++ {
if s.isFree(freeIndex) {
freemark[freeIndex] = true
}
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
freemark[j] = false
continue
}
dumpobj(unsafe.Pointer(p), size, makeheapobjbv(p, size))
}
}
}
func dumpparams() {
dumpint(tagParams)
x := uintptr(1)
if *(*byte)(unsafe.Pointer(&x)) == 1 {
dumpbool(false) // little-endian ptrs
} else {
dumpbool(true) // big-endian ptrs
}
dumpint(sys.PtrSize)
var arenaStart, arenaEnd uintptr
runtime: support a two-level arena map Currently, the heap arena map is a single, large array that covers every possible arena frame in the entire address space. This is practical up to about 48 bits of address space with 64 MB arenas. However, there are two problems with this: 1. mips64, ppc64, and s390x support full 64-bit address spaces (though on Linux only s390x has kernel support for 64-bit address spaces). On these platforms, it would be good to support these larger address spaces. 2. On Windows, processes are charged for untouched memory, so for processes with small heaps, the mostly-untouched 32 MB arena map plus a 64 MB arena are significant overhead. Hence, it would be good to reduce both the arena map size and the arena size, but with a single-level arena, these are inversely proportional. This CL adds support for a two-level arena map. Arena frame numbers are now divided into arenaL1Bits of L1 index and arenaL2Bits of L2 index. At the moment, arenaL1Bits is always 0, so we effectively have a single level map. We do a few things so that this has no cost beyond the current single-level map: 1. We embed the L2 array directly in mheap, so if there's a single entry in the L2 array, the representation is identical to the current representation and there's no extra level of indirection. 2. Hot code that accesses the arena map is structured so that it optimizes to nearly the same machine code as it does currently. 3. We make some small tweaks to hot code paths and to the inliner itself to keep some important functions inlined despite their now-larger ASTs. In particular, this is necessary for heapBitsForAddr and heapBits.next. Possibly as a result of some of the tweaks, this actually slightly improves the performance of the x/benchmarks garbage benchmark: name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.28ms ± 1% 2.26ms ± 1% -1.07% (p=0.000 n=17+19) (https://perf.golang.org/search?q=upload:20180223.2) For #23900. Change-Id: If5164e0961754f97eb9eca58f837f36d759505ff Reviewed-on: https://go-review.googlesource.com/96779 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
2018-02-22 20:38:09 -05:00
for i1 := range mheap_.arenas {
if mheap_.arenas[i1] == nil {
continue
}
for i, ha := range mheap_.arenas[i1] {
if ha == nil {
continue
}
base := arenaBase(arenaIdx(i1)<<arenaL1Shift | arenaIdx(i))
if arenaStart == 0 || base < arenaStart {
arenaStart = base
}
if base+heapArenaBytes > arenaEnd {
arenaEnd = base + heapArenaBytes
}
}
}
dumpint(uint64(arenaStart))
dumpint(uint64(arenaEnd))
dumpstr(sys.GOARCH)
dumpstr(sys.Goexperiment)
dumpint(uint64(ncpu))
}
func itab_callback(tab *itab) {
t := tab._type
dumptype(t)
dumpint(tagItab)
dumpint(uint64(uintptr(unsafe.Pointer(tab))))
dumpint(uint64(uintptr(unsafe.Pointer(t))))
}
func dumpitabs() {
iterate_itabs(itab_callback)
}
func dumpms() {
for mp := allm; mp != nil; mp = mp.alllink {
dumpint(tagOSThread)
dumpint(uint64(uintptr(unsafe.Pointer(mp))))
dumpint(uint64(mp.id))
dumpint(mp.procid)
}
}
func dumpmemstats() {
// These ints should be identical to the exported
// MemStats structure and should be ordered the same
// way too.
dumpint(tagMemStats)
dumpint(memstats.alloc)
dumpint(memstats.total_alloc)
dumpint(memstats.sys)
dumpint(memstats.nlookup)
dumpint(memstats.nmalloc)
dumpint(memstats.nfree)
dumpint(memstats.alloc)
runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2020-07-29 20:25:05 +00:00
dumpint(memstats.heap_sys.load())
dumpint(memstats.heap_sys.load() - memstats.heap_inuse)
dumpint(memstats.heap_inuse)
dumpint(memstats.heap_released)
dumpint(memstats.heap_objects)
dumpint(memstats.stacks_inuse)
runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2020-07-29 20:25:05 +00:00
dumpint(memstats.stacks_sys.load())
dumpint(memstats.mspan_inuse)
runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2020-07-29 20:25:05 +00:00
dumpint(memstats.mspan_sys.load())
dumpint(memstats.mcache_inuse)
runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2020-07-29 20:25:05 +00:00
dumpint(memstats.mcache_sys.load())
dumpint(memstats.buckhash_sys.load())
dumpint(memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse)
runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2020-07-29 20:25:05 +00:00
dumpint(memstats.other_sys.load())
dumpint(memstats.next_gc)
dumpint(memstats.last_gc_unix)
dumpint(memstats.pause_total_ns)
for i := 0; i < 256; i++ {
dumpint(memstats.pause_ns[i])
}
dumpint(uint64(memstats.numgc))
}
func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) {
stk := (*[100000]uintptr)(unsafe.Pointer(pstk))
dumpint(tagMemProf)
dumpint(uint64(uintptr(unsafe.Pointer(b))))
dumpint(uint64(size))
dumpint(uint64(nstk))
for i := uintptr(0); i < nstk; i++ {
pc := stk[i]
f := findfunc(pc)
if !f.valid() {
var buf [64]byte
n := len(buf)
n--
buf[n] = ')'
if pc == 0 {
n--
buf[n] = '0'
} else {
for pc > 0 {
n--
buf[n] = "0123456789abcdef"[pc&15]
pc >>= 4
}
}
n--
buf[n] = 'x'
n--
buf[n] = '0'
n--
buf[n] = '('
dumpslice(buf[n:])
dumpstr("?")
dumpint(0)
} else {
dumpstr(funcname(f))
if i > 0 && pc > f.entry {
pc--
}
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack Scalararg and ptrarg are not "signal safe". Go code filling them out can be interrupted by a signal, and then the signal handler runs, and if it also ends up in Go code that uses scalararg or ptrarg, now the old values have been smashed. For the pieces of code that do need to run in a signal handler, we introduced onM_signalok, which is really just onM except that the _signalok is meant to convey that the caller asserts that scalarg and ptrarg will be restored to their old values after the call (instead of the usual behavior, zeroing them). Scalararg and ptrarg are also untyped and therefore error-prone. Go code can always pass a closure instead of using scalararg and ptrarg; they were only really necessary for C code. And there's no more C code. For all these reasons, delete scalararg and ptrarg, converting the few remaining references to use closures. Once those are gone, there is no need for a distinction between onM and onM_signalok, so replace both with a single function equivalent to the current onM_signalok (that is, it can be called on any of the curg, g0, and gsignal stacks). The name onM and the phrase 'm stack' are misnomers, because on most system an M has two system stacks: the main thread stack and the signal handling stack. Correct the misnomer by naming the replacement function systemstack. Fix a few references to "M stack" in code. The main motivation for this change is to eliminate scalararg/ptrarg. Rick and I have already seen them cause problems because the calling sequence m.ptrarg[0] = p is a heap pointer assignment, so it gets a write barrier. The write barrier also uses onM, so it has all the same problems as if it were being invoked by a signal handler. We worked around this by saving and restoring the old values and by calling onM_signalok, but there's no point in keeping this nice home for bugs around any longer. This CL also changes funcline to return the file name as a result instead of filling in a passed-in *string. (The *string signature is left over from when the code was written in and called from C.) That's arguably an unrelated change, except that once I had done the ptrarg/scalararg/onM cleanup I started getting false positives about the *string argument escaping (not allowed in package runtime). The compiler is wrong, but the easiest fix is to write the code like Go code instead of like C code. I am a bit worried that the compiler is wrong because of some use of uninitialized memory in the escape analysis. If that's the reason, it will go away when we convert the compiler to Go. (And if not, we'll debug it the next time.) LGTM=khr R=r, khr CC=austin, golang-codereviews, iant, rlh https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
file, line := funcline(f, pc)
dumpstr(file)
dumpint(uint64(line))
}
}
dumpint(uint64(allocs))
dumpint(uint64(frees))
}
func dumpmemprof() {
iterate_memprof(dumpmemprof_callback)
for _, s := range mheap_.allspans {
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s.state.get() != mSpanInUse {
continue
}
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialProfile {
continue
}
spp := (*specialprofile)(unsafe.Pointer(sp))
p := s.base() + uintptr(spp.special.offset)
dumpint(tagAllocSample)
dumpint(uint64(p))
dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
}
}
}
var dumphdr = []byte("go1.7 heap dump\n")
func mdump() {
// make sure we're done sweeping
for _, s := range mheap_.allspans {
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s.state.get() == mSpanInUse {
s.ensureSwept()
}
}
memclrNoHeapPointers(unsafe.Pointer(&typecache), unsafe.Sizeof(typecache))
dwrite(unsafe.Pointer(&dumphdr[0]), uintptr(len(dumphdr)))
dumpparams()
dumpitabs()
dumpobjs()
dumpgs()
dumpms()
dumproots()
dumpmemstats()
dumpmemprof()
dumpint(tagEOF)
flush()
}
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack Scalararg and ptrarg are not "signal safe". Go code filling them out can be interrupted by a signal, and then the signal handler runs, and if it also ends up in Go code that uses scalararg or ptrarg, now the old values have been smashed. For the pieces of code that do need to run in a signal handler, we introduced onM_signalok, which is really just onM except that the _signalok is meant to convey that the caller asserts that scalarg and ptrarg will be restored to their old values after the call (instead of the usual behavior, zeroing them). Scalararg and ptrarg are also untyped and therefore error-prone. Go code can always pass a closure instead of using scalararg and ptrarg; they were only really necessary for C code. And there's no more C code. For all these reasons, delete scalararg and ptrarg, converting the few remaining references to use closures. Once those are gone, there is no need for a distinction between onM and onM_signalok, so replace both with a single function equivalent to the current onM_signalok (that is, it can be called on any of the curg, g0, and gsignal stacks). The name onM and the phrase 'm stack' are misnomers, because on most system an M has two system stacks: the main thread stack and the signal handling stack. Correct the misnomer by naming the replacement function systemstack. Fix a few references to "M stack" in code. The main motivation for this change is to eliminate scalararg/ptrarg. Rick and I have already seen them cause problems because the calling sequence m.ptrarg[0] = p is a heap pointer assignment, so it gets a write barrier. The write barrier also uses onM, so it has all the same problems as if it were being invoked by a signal handler. We worked around this by saving and restoring the old values and by calling onM_signalok, but there's no point in keeping this nice home for bugs around any longer. This CL also changes funcline to return the file name as a result instead of filling in a passed-in *string. (The *string signature is left over from when the code was written in and called from C.) That's arguably an unrelated change, except that once I had done the ptrarg/scalararg/onM cleanup I started getting false positives about the *string argument escaping (not allowed in package runtime). The compiler is wrong, but the easiest fix is to write the code like Go code instead of like C code. I am a bit worried that the compiler is wrong because of some use of uninitialized memory in the escape analysis. If that's the reason, it will go away when we convert the compiler to Go. (And if not, we'll debug it the next time.) LGTM=khr R=r, khr CC=austin, golang-codereviews, iant, rlh https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
func writeheapdump_m(fd uintptr) {
_g_ := getg()
casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
_g_.waitreason = waitReasonDumpingHeap
// Update stats so we can dump them.
// As a side effect, flushes all the mcaches so the mspan.freelist
// lists contain all the free objects.
updatememstats()
// Set dump file.
dumpfd = fd
// Call dump routine.
mdump()
// Reset dump file.
dumpfd = 0
if tmpbuf != nil {
sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
tmpbuf = nil
}
casgstatus(_g_.m.curg, _Gwaiting, _Grunning)
}
// dumpint() the kind & offset of each field in an object.
func dumpfields(bv bitvector) {
dumpbv(&bv, 0)
dumpint(fieldKindEol)
}
func makeheapobjbv(p uintptr, size uintptr) bitvector {
// Extend the temp buffer if necessary.
nptr := size / sys.PtrSize
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
if uintptr(len(tmpbuf)) < nptr/8+1 {
if tmpbuf != nil {
sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
n := nptr/8 + 1
p := sysAlloc(n, &memstats.other_sys)
if p == nil {
throw("heapdump: out of memory")
}
tmpbuf = (*[1 << 30]byte)(p)[:n]
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// Convert heap bitmap to pointer bitmap.
for i := uintptr(0); i < nptr/8+1; i++ {
tmpbuf[i] = 0
}
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
runtime: move checkmarks to a separate bitmap Currently, the GC stores the object marks for checkmarks mode in the heap bitmap using a rather complex encoding: for one word objects, the checkmark is stored in the pointer/scalar bit since one word objects must be pointers; for larger objects, the checkmark is stored in what would be the scan/dead bit for the second word of the object. This encoding made more sense when the runtime used the first scan/dead bit as the regular mark bit, but we moved away from that long ago. This encoding and overloading of the heap bitmap bits causes a great deal of complexity in many parts of the allocator and garbage collector and leads to some subtle bugs like #15903. This CL moves the checkmarks mark bits into their own per-arena bitmap and reclaims the second scan/dead bit as a regular scan/dead bit. I tested this by enabling doubleCheck mode in heapBitsSetType and running in both regular and GODEBUG=gccheckmark=1 mode. Fixes #15903. No performance degradation. (Very slight improvement on a few benchmarks, but it's probably just noise.) name old time/op new time/op delta BiogoIgor 16.6s ± 1% 16.4s ± 1% -0.94% (p=0.000 n=25+24) BiogoKrishna 19.2s ± 3% 19.2s ± 3% ~ (p=0.638 n=23+25) BleveIndexBatch100 6.12s ± 5% 6.17s ± 4% ~ (p=0.170 n=25+25) CompileTemplate 206ms ± 1% 205ms ± 1% -0.43% (p=0.005 n=24+24) CompileUnicode 82.2ms ± 2% 81.5ms ± 2% -0.95% (p=0.001 n=22+22) CompileGoTypes 755ms ± 3% 754ms ± 4% ~ (p=0.715 n=25+25) CompileCompiler 3.73s ± 1% 3.73s ± 1% ~ (p=0.445 n=25+24) CompileSSA 8.67s ± 1% 8.66s ± 1% ~ (p=0.836 n=24+22) CompileFlate 134ms ± 2% 133ms ± 1% -0.66% (p=0.001 n=24+23) CompileGoParser 164ms ± 1% 163ms ± 1% -0.85% (p=0.000 n=24+24) CompileReflect 466ms ± 5% 466ms ± 3% ~ (p=0.863 n=25+25) CompileTar 182ms ± 1% 182ms ± 1% -0.31% (p=0.048 n=24+24) CompileXML 249ms ± 1% 248ms ± 1% -0.32% (p=0.031 n=21+25) CompileStdCmd 10.3s ± 1% 10.3s ± 1% ~ (p=0.459 n=23+23) FoglemanFauxGLRenderRotateBoat 8.66s ± 1% 8.62s ± 1% -0.47% (p=0.000 n=23+24) FoglemanPathTraceRenderGopherIter1 20.3s ± 3% 20.2s ± 2% ~ (p=0.893 n=25+25) GopherLuaKNucleotide 29.7s ± 1% 29.8s ± 2% ~ (p=0.421 n=24+25) MarkdownRenderXHTML 246ms ± 1% 247ms ± 1% ~ (p=0.558 n=25+24) Tile38WithinCircle100kmRequest 779µs ± 4% 779µs ± 3% ~ (p=0.954 n=25+25) Tile38IntersectsCircle100kmRequest 1.02ms ± 3% 1.01ms ± 4% ~ (p=0.658 n=25+25) Tile38KNearestLimit100Request 984µs ± 4% 986µs ± 4% ~ (p=0.627 n=24+25) [Geo mean] 552ms 551ms -0.19% https://perf.golang.org/search?q=upload:20200723.6 Change-Id: Ic703f26a83fb034941dc6f4788fc997d56890dec Reviewed-on: https://go-review.googlesource.com/c/go/+/244539 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Martin Möhrmann <moehrmann@google.com>
2020-06-05 16:48:03 -04:00
if !hbits.morePointers() {
break // end of object
}
if hbits.isPointer() {
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
tmpbuf[i/8] |= 1 << (i % 8)
}
hbits = hbits.next()
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
return bitvector{int32(i), &tmpbuf[0]}
}