go/src/runtime/heapdump.go

721 lines
17 KiB
Go
Raw Normal View History

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Implementation of runtime/debug.WriteHeapDump. Writes all
// objects in the heap plus additional info (roots, threads,
// finalizers, etc.) to a file.
// The format of the dumped file is described at
// https://golang.org/s/go15heapdump.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
func runtime_debug_WriteHeapDump(fd uintptr) {
stopTheWorld("write heap dump")
systemstack(func() {
writeheapdump_m(fd)
})
startTheWorld()
}
const (
fieldKindEol = 0
fieldKindPtr = 1
fieldKindIface = 2
fieldKindEface = 3
tagEOF = 0
tagObject = 1
tagOtherRoot = 2
tagType = 3
tagGoroutine = 4
tagStackFrame = 5
tagParams = 6
tagFinalizer = 7
tagItab = 8
tagOSThread = 9
tagMemStats = 10
tagQueuedFinalizer = 11
tagData = 12
tagBSS = 13
tagDefer = 14
tagPanic = 15
tagMemProf = 16
tagAllocSample = 17
)
var dumpfd uintptr // fd to write the dump to.
var tmpbuf []byte
// buffer of pending write data
const (
bufSize = 4096
)
var buf [bufSize]byte
var nbuf uintptr
func dwrite(data unsafe.Pointer, len uintptr) {
if len == 0 {
return
}
if nbuf+len <= bufSize {
copy(buf[nbuf:], (*[bufSize]byte)(data)[:len])
nbuf += len
return
}
write(dumpfd, unsafe.Pointer(&buf), int32(nbuf))
if len >= bufSize {
write(dumpfd, data, int32(len))
nbuf = 0
} else {
copy(buf[:], (*[bufSize]byte)(data)[:len])
nbuf = len
}
}
func dwritebyte(b byte) {
dwrite(unsafe.Pointer(&b), 1)
}
func flush() {
write(dumpfd, unsafe.Pointer(&buf), int32(nbuf))
nbuf = 0
}
// Cache of types that have been serialized already.
// We use a type's hash field to pick a bucket.
// Inside a bucket, we keep a list of types that
// have been serialized so far, most recently used first.
// Note: when a bucket overflows we may end up
// serializing a type more than once. That's ok.
const (
typeCacheBuckets = 256
typeCacheAssoc = 4
)
type typeCacheBucket struct {
t [typeCacheAssoc]*_type
}
var typecache [typeCacheBuckets]typeCacheBucket
// dump a uint64 in a varint format parseable by encoding/binary
func dumpint(v uint64) {
var buf [10]byte
var n int
for v >= 0x80 {
buf[n] = byte(v | 0x80)
n++
v >>= 7
}
buf[n] = byte(v)
n++
dwrite(unsafe.Pointer(&buf), uintptr(n))
}
func dumpbool(b bool) {
if b {
dumpint(1)
} else {
dumpint(0)
}
}
// dump varint uint64 length followed by memory contents
func dumpmemrange(data unsafe.Pointer, len uintptr) {
dumpint(uint64(len))
dwrite(data, len)
}
func dumpslice(b []byte) {
dumpint(uint64(len(b)))
if len(b) > 0 {
dwrite(unsafe.Pointer(&b[0]), uintptr(len(b)))
}
}
func dumpstr(s string) {
sp := stringStructOf(&s)
dumpmemrange(sp.str, uintptr(sp.len))
}
// dump information for a type
func dumptype(t *_type) {
if t == nil {
return
}
// If we've definitely serialized the type before,
// no need to do it again.
b := &typecache[t.hash&(typeCacheBuckets-1)]
if t == b.t[0] {
return
}
for i := 1; i < typeCacheAssoc; i++ {
if t == b.t[i] {
// Move-to-front
for j := i; j > 0; j-- {
b.t[j] = b.t[j-1]
}
b.t[0] = t
return
}
}
// Might not have been dumped yet. Dump it and
// remember we did so.
for j := typeCacheAssoc - 1; j > 0; j-- {
b.t[j] = b.t[j-1]
}
b.t[0] = t
// dump the type
dumpint(tagType)
dumpint(uint64(uintptr(unsafe.Pointer(t))))
dumpint(uint64(t.size))
if x := t.uncommon(); x == nil || t.nameOff(x.pkgpath).name() == "" {
dumpstr(t.string())
} else {
pkgpathstr := t.nameOff(x.pkgpath).name()
pkgpath := stringStructOf(&pkgpathstr)
namestr := t.name()
name := stringStructOf(&namestr)
dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
dwrite(pkgpath.str, uintptr(pkgpath.len))
dwritebyte('.')
dwrite(name.str, uintptr(name.len))
}
dumpbool(t.kind&kindDirectIface == 0 || t.kind&kindNoPointers == 0)
}
// dump an object
func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
dumpbvtypes(&bv, obj)
dumpint(tagObject)
dumpint(uint64(uintptr(obj)))
dumpmemrange(obj, size)
dumpfields(bv)
}
func dumpotherroot(description string, to unsafe.Pointer) {
dumpint(tagOtherRoot)
dumpstr(description)
dumpint(uint64(uintptr(to)))
}
func dumpfinalizer(obj unsafe.Pointer, fn *funcval, fint *_type, ot *ptrtype) {
dumpint(tagFinalizer)
dumpint(uint64(uintptr(obj)))
dumpint(uint64(uintptr(unsafe.Pointer(fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fint))))
dumpint(uint64(uintptr(unsafe.Pointer(ot))))
}
type childInfo struct {
// Information passed up from the callee frame about
// the layout of the outargs region.
argoff uintptr // where the arguments start in the frame
arglen uintptr // size of args region
args bitvector // if args.n >= 0, pointer map of args region
sp *uint8 // callee sp
depth uintptr // depth in call stack (0 == most recent)
}
// dump kinds & offsets of interesting fields in bv
func dumpbv(cbv *bitvector, offset uintptr) {
bv := gobv(*cbv)
for i := uintptr(0); i < bv.n; i++ {
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
if bv.bytedata[i/8]>>(i%8)&1 == 1 {
dumpint(fieldKindPtr)
dumpint(uint64(offset + i*sys.PtrSize))
}
}
}
func dumpframe(s *stkframe, arg unsafe.Pointer) bool {
child := (*childInfo)(arg)
f := s.fn
// Figure out what we can about our stack map
pc := s.pc
if pc != f.entry {
pc--
}
runtime: add pcvalue cache to improve stack scan speed The cost of scanning large stacks is currently dominated by the time spent looking up and decoding the pcvalue table. However, large stacks are usually large not because they contain calls to many different functions, but because they contain many calls to the same, small set of recursive functions. Hence, walking large stacks tends to make the same pcvalue queries many times. Based on this observation, this commit adds a small, very simple, and fast cache in front of pcvalue lookup. We thread this cache down from operations that make many pcvalue calls, such as gentraceback, stack scanning, and stack adjusting. This simple cache works well because it has minimal overhead when it's not effective. I also tried a hashed direct-map cache, CLOCK-based replacement, round-robin replacement, and round-robin with lookups disabled until there had been at least 16 probes, but none of these approaches had obvious wins over the random replacement policy in this commit. This nearly doubles the overall performance of the deep stack test program from issue #10898: name old time/op new time/op delta Issue10898 16.5s ±12% 9.2s ±12% -44.37% (p=0.008 n=5+5) It's a very slight win on the garbage benchmark: name old time/op new time/op delta XBenchGarbage-12 4.92ms ± 1% 4.89ms ± 1% -0.75% (p=0.000 n=18+19) It's a wash (but doesn't harm performance) on the go1 benchmarks, which don't have particularly deep stacks: name old time/op new time/op delta BinaryTree17-12 3.11s ± 2% 3.20s ± 3% +2.83% (p=0.000 n=17+20) Fannkuch11-12 2.51s ± 1% 2.51s ± 1% -0.22% (p=0.034 n=19+18) FmtFprintfEmpty-12 50.8ns ± 3% 50.6ns ± 2% ~ (p=0.793 n=20+20) FmtFprintfString-12 174ns ± 0% 174ns ± 1% +0.17% (p=0.048 n=15+20) FmtFprintfInt-12 177ns ± 0% 165ns ± 1% -6.99% (p=0.000 n=17+19) FmtFprintfIntInt-12 283ns ± 1% 284ns ± 0% +0.22% (p=0.000 n=18+15) FmtFprintfPrefixedInt-12 243ns ± 1% 244ns ± 1% +0.40% (p=0.000 n=20+19) FmtFprintfFloat-12 318ns ± 0% 319ns ± 0% +0.27% (p=0.001 n=19+20) FmtManyArgs-12 1.12µs ± 0% 1.14µs ± 0% +1.74% (p=0.000 n=19+20) GobDecode-12 8.69ms ± 0% 8.73ms ± 1% +0.46% (p=0.000 n=18+18) GobEncode-12 6.64ms ± 1% 6.61ms ± 1% -0.46% (p=0.000 n=20+20) Gzip-12 323ms ± 2% 319ms ± 1% -1.11% (p=0.000 n=20+20) Gunzip-12 42.8ms ± 0% 42.9ms ± 0% ~ (p=0.158 n=18+20) HTTPClientServer-12 63.3µs ± 1% 63.1µs ± 1% -0.35% (p=0.011 n=20+20) JSONEncode-12 16.9ms ± 1% 17.3ms ± 1% +2.84% (p=0.000 n=19+20) JSONDecode-12 59.7ms ± 0% 58.5ms ± 0% -2.05% (p=0.000 n=19+17) Mandelbrot200-12 3.92ms ± 0% 3.91ms ± 0% -0.16% (p=0.003 n=19+19) GoParse-12 3.79ms ± 2% 3.75ms ± 2% -0.91% (p=0.005 n=20+20) RegexpMatchEasy0_32-12 102ns ± 1% 101ns ± 1% -0.80% (p=0.001 n=14+20) RegexpMatchEasy0_1K-12 337ns ± 1% 346ns ± 1% +2.90% (p=0.000 n=20+19) RegexpMatchEasy1_32-12 84.4ns ± 2% 84.3ns ± 2% ~ (p=0.743 n=20+20) RegexpMatchEasy1_1K-12 502ns ± 1% 505ns ± 0% +0.64% (p=0.000 n=20+20) RegexpMatchMedium_32-12 133ns ± 1% 132ns ± 1% -0.85% (p=0.000 n=20+19) RegexpMatchMedium_1K-12 40.1µs ± 1% 39.8µs ± 1% -0.77% (p=0.000 n=18+18) RegexpMatchHard_32-12 2.08µs ± 1% 2.07µs ± 1% -0.55% (p=0.001 n=18+19) RegexpMatchHard_1K-12 62.4µs ± 1% 62.0µs ± 1% -0.74% (p=0.000 n=19+19) Revcomp-12 545ms ± 2% 545ms ± 3% ~ (p=0.771 n=19+20) Template-12 73.7ms ± 1% 72.0ms ± 0% -2.33% (p=0.000 n=20+18) TimeParse-12 358ns ± 1% 351ns ± 1% -2.07% (p=0.000 n=20+20) TimeFormat-12 369ns ± 1% 356ns ± 0% -3.53% (p=0.000 n=20+18) [Geo mean] 63.5µs 63.2µs -0.41% name old speed new speed delta GobDecode-12 88.3MB/s ± 0% 87.9MB/s ± 0% -0.43% (p=0.000 n=18+17) GobEncode-12 116MB/s ± 1% 116MB/s ± 1% +0.47% (p=0.000 n=20+20) Gzip-12 60.2MB/s ± 2% 60.8MB/s ± 1% +1.13% (p=0.000 n=20+20) Gunzip-12 453MB/s ± 0% 453MB/s ± 0% ~ (p=0.160 n=18+20) JSONEncode-12 115MB/s ± 1% 112MB/s ± 1% -2.76% (p=0.000 n=19+20) JSONDecode-12 32.5MB/s ± 0% 33.2MB/s ± 0% +2.09% (p=0.000 n=19+17) GoParse-12 15.3MB/s ± 2% 15.4MB/s ± 2% +0.92% (p=0.004 n=20+20) RegexpMatchEasy0_32-12 311MB/s ± 1% 314MB/s ± 1% +0.78% (p=0.000 n=15+19) RegexpMatchEasy0_1K-12 3.04GB/s ± 1% 2.95GB/s ± 1% -2.90% (p=0.000 n=19+19) RegexpMatchEasy1_32-12 379MB/s ± 2% 380MB/s ± 2% ~ (p=0.779 n=20+20) RegexpMatchEasy1_1K-12 2.04GB/s ± 1% 2.02GB/s ± 0% -0.62% (p=0.000 n=20+20) RegexpMatchMedium_32-12 7.46MB/s ± 1% 7.53MB/s ± 1% +0.86% (p=0.000 n=20+19) RegexpMatchMedium_1K-12 25.5MB/s ± 1% 25.7MB/s ± 1% +0.78% (p=0.000 n=18+18) RegexpMatchHard_32-12 15.4MB/s ± 1% 15.5MB/s ± 1% +0.62% (p=0.000 n=19+19) RegexpMatchHard_1K-12 16.4MB/s ± 1% 16.5MB/s ± 1% +0.82% (p=0.000 n=20+19) Revcomp-12 466MB/s ± 2% 466MB/s ± 3% ~ (p=0.765 n=19+20) Template-12 26.3MB/s ± 1% 27.0MB/s ± 0% +2.38% (p=0.000 n=20+18) [Geo mean] 97.8MB/s 98.0MB/s +0.23% Change-Id: I281044ae0b24990ba46487cacbc1069493274bc4 Reviewed-on: https://go-review.googlesource.com/13614 Reviewed-by: Keith Randall <khr@golang.org>
2015-08-12 23:43:43 -04:00
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, pc, nil)
if pcdata == -1 {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
// Dump any types we will need to resolve Efaces.
if child.args.n >= 0 {
dumpbvtypes(&child.args, unsafe.Pointer(s.sp+child.argoff))
}
var bv bitvector
if stkmap != nil && stkmap.n > 0 {
bv = stackmapdata(stkmap, pcdata)
dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n*sys.PtrSize)))
} else {
bv.n = -1
}
// Dump main body of stack frame.
dumpint(tagStackFrame)
dumpint(uint64(s.sp)) // lowest address in frame
dumpint(uint64(child.depth)) // # of frames deep on the stack
dumpint(uint64(uintptr(unsafe.Pointer(child.sp)))) // sp of child, or 0 if bottom of stack
dumpmemrange(unsafe.Pointer(s.sp), s.fp-s.sp) // frame contents
dumpint(uint64(f.entry))
dumpint(uint64(s.pc))
dumpint(uint64(s.continpc))
name := funcname(f)
if name == "" {
name = "unknown function"
}
dumpstr(name)
// Dump fields in the outargs section
if child.args.n >= 0 {
dumpbv(&child.args, child.argoff)
} else {
// conservative - everything might be a pointer
for off := child.argoff; off < child.argoff+child.arglen; off += sys.PtrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
}
// Dump fields in the local vars section
if stkmap == nil {
// No locals information, dump everything.
for off := child.arglen; off < s.varp-s.sp; off += sys.PtrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
} else if stkmap.n < 0 {
// Locals size information, dump just the locals.
size := uintptr(-stkmap.n)
for off := s.varp - size - s.sp; off < s.varp-s.sp; off += sys.PtrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
} else if stkmap.n > 0 {
// Locals bitmap information, scan just the pointers in
// locals.
dumpbv(&bv, s.varp-uintptr(bv.n)*sys.PtrSize-s.sp)
}
dumpint(fieldKindEol)
// Record arg info for parent.
child.argoff = s.argp - s.fp
child.arglen = s.arglen
child.sp = (*uint8)(unsafe.Pointer(s.sp))
child.depth++
stkmap = (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap != nil {
child.args = stackmapdata(stkmap, pcdata)
} else {
child.args.n = -1
}
return true
}
func dumpgoroutine(gp *g) {
var sp, pc, lr uintptr
if gp.syscallsp != 0 {
sp = gp.syscallsp
pc = gp.syscallpc
lr = 0
} else {
sp = gp.sched.sp
pc = gp.sched.pc
lr = gp.sched.lr
}
dumpint(tagGoroutine)
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(sp))
dumpint(uint64(gp.goid))
dumpint(uint64(gp.gopc))
dumpint(uint64(readgstatus(gp)))
dumpbool(isSystemGoroutine(gp))
dumpbool(false) // isbackground
dumpint(uint64(gp.waitsince))
dumpstr(gp.waitreason)
dumpint(uint64(uintptr(gp.sched.ctxt)))
dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
dumpint(uint64(uintptr(unsafe.Pointer(gp._panic))))
// dump stack
var child childInfo
child.args.n = -1
child.arglen = 0
child.sp = nil
child.depth = 0
gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, dumpframe, noescape(unsafe.Pointer(&child)), 0)
// dump defer & panic records
for d := gp._defer; d != nil; d = d.link {
dumpint(tagDefer)
dumpint(uint64(uintptr(unsafe.Pointer(d))))
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(d.sp))
dumpint(uint64(d.pc))
dumpint(uint64(uintptr(unsafe.Pointer(d.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(d.link))))
}
for p := gp._panic; p != nil; p = p.link {
dumpint(tagPanic)
dumpint(uint64(uintptr(unsafe.Pointer(p))))
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
eface := efaceOf(&p.arg)
dumpint(uint64(uintptr(unsafe.Pointer(eface._type))))
dumpint(uint64(uintptr(unsafe.Pointer(eface.data))))
dumpint(0) // was p->defer, no longer recorded
dumpint(uint64(uintptr(unsafe.Pointer(p.link))))
}
}
func dumpgs() {
// goroutines & stacks
for i := 0; uintptr(i) < allglen; i++ {
gp := allgs[i]
status := readgstatus(gp) // The world is stopped so gp will not be in a scan state.
switch status {
default:
print("runtime: unexpected G.status ", hex(status), "\n")
throw("dumpgs in STW - bad status")
case _Gdead:
// ok
case _Grunnable,
_Gsyscall,
_Gwaiting:
dumpgoroutine(gp)
}
}
}
func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, ot *ptrtype) {
dumpint(tagQueuedFinalizer)
dumpint(uint64(uintptr(obj)))
dumpint(uint64(uintptr(unsafe.Pointer(fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fint))))
dumpint(uint64(uintptr(unsafe.Pointer(ot))))
}
func dumproots() {
// TODO(mwhudson): dump datamask etc from all objects
// data segment
dumpbvtypes(&firstmoduledata.gcdatamask, unsafe.Pointer(firstmoduledata.data))
dumpint(tagData)
dumpint(uint64(firstmoduledata.data))
dumpmemrange(unsafe.Pointer(firstmoduledata.data), firstmoduledata.edata-firstmoduledata.data)
dumpfields(firstmoduledata.gcdatamask)
// bss segment
dumpbvtypes(&firstmoduledata.gcbssmask, unsafe.Pointer(firstmoduledata.bss))
dumpint(tagBSS)
dumpint(uint64(firstmoduledata.bss))
dumpmemrange(unsafe.Pointer(firstmoduledata.bss), firstmoduledata.ebss-firstmoduledata.bss)
dumpfields(firstmoduledata.gcbssmask)
// MSpan.types
for _, s := range mheap_.allspans {
if s.state == _MSpanInUse {
// Finalizers
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
continue
}
spf := (*specialfinalizer)(unsafe.Pointer(sp))
p := unsafe.Pointer(s.base() + uintptr(spf.special.offset))
dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
}
}
}
// Finalizer queue
iterate_finq(finq_callback)
}
// Bit vector of free marks.
// Needs to be as big as the largest number of objects per span.
var freemark [_PageSize / 8]bool
func dumpobjs() {
for _, s := range mheap_.allspans {
if s.state != _MSpanInUse {
continue
}
p := s.base()
size := s.elemsize
n := (s.npages << _PageShift) / size
if n > uintptr(len(freemark)) {
throw("freemark array doesn't have enough entries")
}
for freeIndex := uintptr(0); freeIndex < s.nelems; freeIndex++ {
if s.isFree(freeIndex) {
freemark[freeIndex] = true
}
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
freemark[j] = false
continue
}
dumpobj(unsafe.Pointer(p), size, makeheapobjbv(p, size))
}
}
}
func dumpparams() {
dumpint(tagParams)
x := uintptr(1)
if *(*byte)(unsafe.Pointer(&x)) == 1 {
dumpbool(false) // little-endian ptrs
} else {
dumpbool(true) // big-endian ptrs
}
dumpint(sys.PtrSize)
dumpint(uint64(mheap_.arena_start))
dumpint(uint64(mheap_.arena_used))
dumpstr(sys.GOARCH)
dumpstr(sys.Goexperiment)
dumpint(uint64(ncpu))
}
func itab_callback(tab *itab) {
t := tab._type
dumptype(t)
dumpint(tagItab)
dumpint(uint64(uintptr(unsafe.Pointer(tab))))
dumpint(uint64(uintptr(unsafe.Pointer(t))))
}
func dumpitabs() {
iterate_itabs(itab_callback)
}
func dumpms() {
for mp := allm; mp != nil; mp = mp.alllink {
dumpint(tagOSThread)
dumpint(uint64(uintptr(unsafe.Pointer(mp))))
dumpint(uint64(mp.id))
dumpint(mp.procid)
}
}
func dumpmemstats() {
dumpint(tagMemStats)
dumpint(memstats.alloc)
dumpint(memstats.total_alloc)
dumpint(memstats.sys)
dumpint(memstats.nlookup)
dumpint(memstats.nmalloc)
dumpint(memstats.nfree)
dumpint(memstats.heap_alloc)
dumpint(memstats.heap_sys)
dumpint(memstats.heap_idle)
dumpint(memstats.heap_inuse)
dumpint(memstats.heap_released)
dumpint(memstats.heap_objects)
dumpint(memstats.stacks_inuse)
dumpint(memstats.stacks_sys)
dumpint(memstats.mspan_inuse)
dumpint(memstats.mspan_sys)
dumpint(memstats.mcache_inuse)
dumpint(memstats.mcache_sys)
dumpint(memstats.buckhash_sys)
dumpint(memstats.gc_sys)
dumpint(memstats.other_sys)
dumpint(memstats.next_gc)
dumpint(memstats.last_gc)
dumpint(memstats.pause_total_ns)
for i := 0; i < 256; i++ {
dumpint(memstats.pause_ns[i])
}
dumpint(uint64(memstats.numgc))
}
func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) {
stk := (*[100000]uintptr)(unsafe.Pointer(pstk))
dumpint(tagMemProf)
dumpint(uint64(uintptr(unsafe.Pointer(b))))
dumpint(uint64(size))
dumpint(uint64(nstk))
for i := uintptr(0); i < nstk; i++ {
pc := stk[i]
f := findfunc(pc)
if f == nil {
var buf [64]byte
n := len(buf)
n--
buf[n] = ')'
if pc == 0 {
n--
buf[n] = '0'
} else {
for pc > 0 {
n--
buf[n] = "0123456789abcdef"[pc&15]
pc >>= 4
}
}
n--
buf[n] = 'x'
n--
buf[n] = '0'
n--
buf[n] = '('
dumpslice(buf[n:])
dumpstr("?")
dumpint(0)
} else {
dumpstr(funcname(f))
if i > 0 && pc > f.entry {
pc--
}
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack Scalararg and ptrarg are not "signal safe". Go code filling them out can be interrupted by a signal, and then the signal handler runs, and if it also ends up in Go code that uses scalararg or ptrarg, now the old values have been smashed. For the pieces of code that do need to run in a signal handler, we introduced onM_signalok, which is really just onM except that the _signalok is meant to convey that the caller asserts that scalarg and ptrarg will be restored to their old values after the call (instead of the usual behavior, zeroing them). Scalararg and ptrarg are also untyped and therefore error-prone. Go code can always pass a closure instead of using scalararg and ptrarg; they were only really necessary for C code. And there's no more C code. For all these reasons, delete scalararg and ptrarg, converting the few remaining references to use closures. Once those are gone, there is no need for a distinction between onM and onM_signalok, so replace both with a single function equivalent to the current onM_signalok (that is, it can be called on any of the curg, g0, and gsignal stacks). The name onM and the phrase 'm stack' are misnomers, because on most system an M has two system stacks: the main thread stack and the signal handling stack. Correct the misnomer by naming the replacement function systemstack. Fix a few references to "M stack" in code. The main motivation for this change is to eliminate scalararg/ptrarg. Rick and I have already seen them cause problems because the calling sequence m.ptrarg[0] = p is a heap pointer assignment, so it gets a write barrier. The write barrier also uses onM, so it has all the same problems as if it were being invoked by a signal handler. We worked around this by saving and restoring the old values and by calling onM_signalok, but there's no point in keeping this nice home for bugs around any longer. This CL also changes funcline to return the file name as a result instead of filling in a passed-in *string. (The *string signature is left over from when the code was written in and called from C.) That's arguably an unrelated change, except that once I had done the ptrarg/scalararg/onM cleanup I started getting false positives about the *string argument escaping (not allowed in package runtime). The compiler is wrong, but the easiest fix is to write the code like Go code instead of like C code. I am a bit worried that the compiler is wrong because of some use of uninitialized memory in the escape analysis. If that's the reason, it will go away when we convert the compiler to Go. (And if not, we'll debug it the next time.) LGTM=khr R=r, khr CC=austin, golang-codereviews, iant, rlh https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
file, line := funcline(f, pc)
dumpstr(file)
dumpint(uint64(line))
}
}
dumpint(uint64(allocs))
dumpint(uint64(frees))
}
func dumpmemprof() {
iterate_memprof(dumpmemprof_callback)
for _, s := range mheap_.allspans {
if s.state != _MSpanInUse {
continue
}
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialProfile {
continue
}
spp := (*specialprofile)(unsafe.Pointer(sp))
p := s.base() + uintptr(spp.special.offset)
dumpint(tagAllocSample)
dumpint(uint64(p))
dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
}
}
}
var dumphdr = []byte("go1.7 heap dump\n")
func mdump() {
// make sure we're done sweeping
for _, s := range mheap_.allspans {
if s.state == _MSpanInUse {
s.ensureSwept()
}
}
memclrNoHeapPointers(unsafe.Pointer(&typecache), unsafe.Sizeof(typecache))
dwrite(unsafe.Pointer(&dumphdr[0]), uintptr(len(dumphdr)))
dumpparams()
dumpitabs()
dumpobjs()
dumpgs()
dumpms()
dumproots()
dumpmemstats()
dumpmemprof()
dumpint(tagEOF)
flush()
}
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack Scalararg and ptrarg are not "signal safe". Go code filling them out can be interrupted by a signal, and then the signal handler runs, and if it also ends up in Go code that uses scalararg or ptrarg, now the old values have been smashed. For the pieces of code that do need to run in a signal handler, we introduced onM_signalok, which is really just onM except that the _signalok is meant to convey that the caller asserts that scalarg and ptrarg will be restored to their old values after the call (instead of the usual behavior, zeroing them). Scalararg and ptrarg are also untyped and therefore error-prone. Go code can always pass a closure instead of using scalararg and ptrarg; they were only really necessary for C code. And there's no more C code. For all these reasons, delete scalararg and ptrarg, converting the few remaining references to use closures. Once those are gone, there is no need for a distinction between onM and onM_signalok, so replace both with a single function equivalent to the current onM_signalok (that is, it can be called on any of the curg, g0, and gsignal stacks). The name onM and the phrase 'm stack' are misnomers, because on most system an M has two system stacks: the main thread stack and the signal handling stack. Correct the misnomer by naming the replacement function systemstack. Fix a few references to "M stack" in code. The main motivation for this change is to eliminate scalararg/ptrarg. Rick and I have already seen them cause problems because the calling sequence m.ptrarg[0] = p is a heap pointer assignment, so it gets a write barrier. The write barrier also uses onM, so it has all the same problems as if it were being invoked by a signal handler. We worked around this by saving and restoring the old values and by calling onM_signalok, but there's no point in keeping this nice home for bugs around any longer. This CL also changes funcline to return the file name as a result instead of filling in a passed-in *string. (The *string signature is left over from when the code was written in and called from C.) That's arguably an unrelated change, except that once I had done the ptrarg/scalararg/onM cleanup I started getting false positives about the *string argument escaping (not allowed in package runtime). The compiler is wrong, but the easiest fix is to write the code like Go code instead of like C code. I am a bit worried that the compiler is wrong because of some use of uninitialized memory in the escape analysis. If that's the reason, it will go away when we convert the compiler to Go. (And if not, we'll debug it the next time.) LGTM=khr R=r, khr CC=austin, golang-codereviews, iant, rlh https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
func writeheapdump_m(fd uintptr) {
_g_ := getg()
casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
_g_.waitreason = "dumping heap"
// Update stats so we can dump them.
// As a side effect, flushes all the MCaches so the MSpan.freelist
// lists contain all the free objects.
updatememstats(nil)
// Set dump file.
dumpfd = fd
// Call dump routine.
mdump()
// Reset dump file.
dumpfd = 0
if tmpbuf != nil {
sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
tmpbuf = nil
}
casgstatus(_g_.m.curg, _Gwaiting, _Grunning)
}
// dumpint() the kind & offset of each field in an object.
func dumpfields(bv bitvector) {
dumpbv(&bv, 0)
dumpint(fieldKindEol)
}
// The heap dump reader needs to be able to disambiguate
// Eface entries. So it needs to know every type that might
// appear in such an entry. The following routine accomplishes that.
// TODO(rsc, khr): Delete - no longer possible.
// Dump all the types that appear in the type field of
// any Eface described by this bit vector.
func dumpbvtypes(bv *bitvector, base unsafe.Pointer) {
}
func makeheapobjbv(p uintptr, size uintptr) bitvector {
// Extend the temp buffer if necessary.
nptr := size / sys.PtrSize
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
if uintptr(len(tmpbuf)) < nptr/8+1 {
if tmpbuf != nil {
sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
n := nptr/8 + 1
p := sysAlloc(n, &memstats.other_sys)
if p == nil {
throw("heapdump: out of memory")
}
tmpbuf = (*[1 << 30]byte)(p)[:n]
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
// Convert heap bitmap to pointer bitmap.
for i := uintptr(0); i < nptr/8+1; i++ {
tmpbuf[i] = 0
}
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
runtime: reclaim scan/dead bit in first word With the switch to separate mark bitmaps, the scan/dead bit for the first word of each object is now unused. Reclaim this bit and use it as a scan/dead bit, just like words three and on. The second word is still used for checkmark. This dramatically simplifies heapBitsSetTypeNoScan and hasPointers, since they no longer need different cases for 1, 2, and 3+ word objects. They can instead just manipulate the heap bitmap for the first word and be done with it. In order to enable this, we change heapBitsSetType and runGCProg to always set the scan/dead bit to scan for the first word on every code path. Since these functions only apply to types that have pointers, there's no need to do this conditionally: it's *always* necessary to set the scan bit in the first word. We also change every place that scans an object and checks if there are more pointers. Rather than only checking morePointers if the word is >= 2, we now check morePointers if word != 1 (since that's the checkmark word). Looking forward, we should probably reclaim the checkmark bit, too, but that's going to be quite a bit more work. Tested by setting doubleCheck in heapBitsSetType and running all.bash on both linux/amd64 and linux/386, and by running GOGC=10 all.bash. This particularly improves the FmtFprintf* go1 benchmarks, since they do a large amount of noscan allocation. name old time/op new time/op delta BinaryTree17-12 2.34s ± 1% 2.38s ± 1% +1.70% (p=0.000 n=17+19) Fannkuch11-12 2.09s ± 0% 2.09s ± 1% ~ (p=0.276 n=17+16) FmtFprintfEmpty-12 44.9ns ± 2% 44.8ns ± 2% ~ (p=0.340 n=19+18) FmtFprintfString-12 127ns ± 0% 125ns ± 0% -1.57% (p=0.000 n=16+15) FmtFprintfInt-12 128ns ± 0% 122ns ± 1% -4.45% (p=0.000 n=15+20) FmtFprintfIntInt-12 207ns ± 1% 193ns ± 0% -6.55% (p=0.000 n=19+14) FmtFprintfPrefixedInt-12 197ns ± 1% 191ns ± 0% -2.93% (p=0.000 n=17+18) FmtFprintfFloat-12 263ns ± 0% 248ns ± 1% -5.88% (p=0.000 n=15+19) FmtManyArgs-12 794ns ± 0% 779ns ± 1% -1.90% (p=0.000 n=18+18) GobDecode-12 7.14ms ± 2% 7.11ms ± 1% ~ (p=0.072 n=20+20) GobEncode-12 5.85ms ± 1% 5.82ms ± 1% -0.49% (p=0.000 n=20+20) Gzip-12 218ms ± 1% 215ms ± 1% -1.22% (p=0.000 n=19+19) Gunzip-12 36.8ms ± 0% 36.7ms ± 0% -0.18% (p=0.006 n=18+20) HTTPClientServer-12 77.1µs ± 4% 77.1µs ± 3% ~ (p=0.945 n=19+20) JSONEncode-12 15.6ms ± 1% 15.9ms ± 1% +1.68% (p=0.000 n=18+20) JSONDecode-12 55.2ms ± 1% 53.6ms ± 1% -2.93% (p=0.000 n=17+19) Mandelbrot200-12 4.05ms ± 1% 4.05ms ± 0% ~ (p=0.306 n=17+17) GoParse-12 3.14ms ± 1% 3.10ms ± 1% -1.31% (p=0.000 n=19+18) RegexpMatchEasy0_32-12 69.3ns ± 1% 70.0ns ± 0% +0.89% (p=0.000 n=19+17) RegexpMatchEasy0_1K-12 237ns ± 1% 236ns ± 0% -0.62% (p=0.000 n=19+16) RegexpMatchEasy1_32-12 69.5ns ± 1% 70.3ns ± 1% +1.14% (p=0.000 n=18+17) RegexpMatchEasy1_1K-12 377ns ± 1% 366ns ± 1% -3.03% (p=0.000 n=15+19) RegexpMatchMedium_32-12 107ns ± 1% 107ns ± 2% ~ (p=0.318 n=20+19) RegexpMatchMedium_1K-12 33.8µs ± 3% 33.5µs ± 1% -1.04% (p=0.001 n=20+19) RegexpMatchHard_32-12 1.68µs ± 1% 1.73µs ± 0% +2.50% (p=0.000 n=20+18) RegexpMatchHard_1K-12 50.8µs ± 1% 52.0µs ± 1% +2.50% (p=0.000 n=19+18) Revcomp-12 381ms ± 1% 385ms ± 1% +1.00% (p=0.000 n=17+18) Template-12 64.9ms ± 3% 62.6ms ± 1% -3.55% (p=0.000 n=19+18) TimeParse-12 324ns ± 0% 328ns ± 1% +1.25% (p=0.000 n=18+18) TimeFormat-12 345ns ± 0% 334ns ± 0% -3.31% (p=0.000 n=15+17) [Geo mean] 52.1µs 51.5µs -1.00% Change-Id: I13e74da3193a7f80794c654f944d1f0d60817049 Reviewed-on: https://go-review.googlesource.com/22632 Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-29 14:51:48 -04:00
if i != 1 && !hbits.morePointers() {
break // end of object
}
if hbits.isPointer() {
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
tmpbuf[i/8] |= 1 << (i % 8)
}
hbits = hbits.next()
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
return bitvector{int32(i), &tmpbuf[0]}
}