mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: compute size classes statically
No point in computing this info on startup. Compute it at build time. This lets us spend more time computing & checking the size classes. Improve the div magic for rounding to the start of an object. We can now use 32-bit multiplies & shifts, which should help 32-bit platforms. The static data is <1KB. The actual size classes are not changed by this CL. Change-Id: I6450cec7d1b2b4ad31fd3f945f504ed2ec6570e7 Reviewed-on: https://go-review.googlesource.com/32219 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
753caecc7e
commit
7ba36f4adb
6 changed files with 356 additions and 272 deletions
|
|
@ -102,28 +102,13 @@ const (
|
||||||
mSpanInUse = _MSpanInUse
|
mSpanInUse = _MSpanInUse
|
||||||
|
|
||||||
concurrentSweep = _ConcurrentSweep
|
concurrentSweep = _ConcurrentSweep
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
_PageSize = 1 << _PageShift
|
||||||
_PageShift = 13
|
_PageMask = _PageSize - 1
|
||||||
_PageSize = 1 << _PageShift
|
|
||||||
_PageMask = _PageSize - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
|
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
|
||||||
_64bit = 1 << (^uintptr(0) >> 63) / 2
|
_64bit = 1 << (^uintptr(0) >> 63) / 2
|
||||||
|
|
||||||
// Computed constant. The definition of MaxSmallSize and the
|
|
||||||
// algorithm in msize.go produces some number of different allocation
|
|
||||||
// size classes. NumSizeClasses is that number. It's needed here
|
|
||||||
// because there are static arrays of this length; when msize runs its
|
|
||||||
// size choosing algorithm it double-checks that NumSizeClasses agrees.
|
|
||||||
_NumSizeClasses = 67
|
|
||||||
|
|
||||||
// Tunable constants.
|
|
||||||
_MaxSmallSize = 32 << 10
|
|
||||||
|
|
||||||
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
|
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
|
||||||
_TinySize = 16
|
_TinySize = 16
|
||||||
_TinySizeClass = 2
|
_TinySizeClass = 2
|
||||||
|
|
@ -169,9 +154,9 @@ const (
|
||||||
// on the hardware details of the machine. The garbage
|
// on the hardware details of the machine. The garbage
|
||||||
// collector scales well to 32 cpus.
|
// collector scales well to 32 cpus.
|
||||||
_MaxGcproc = 32
|
_MaxGcproc = 32
|
||||||
)
|
|
||||||
|
|
||||||
const _MaxArena32 = 1<<32 - 1
|
_MaxArena32 = 1<<32 - 1
|
||||||
|
)
|
||||||
|
|
||||||
// physPageSize is the size in bytes of the OS's physical pages.
|
// physPageSize is the size in bytes of the OS's physical pages.
|
||||||
// Mapping and unmapping operations must be done at multiples of
|
// Mapping and unmapping operations must be done at multiples of
|
||||||
|
|
@ -220,12 +205,17 @@ var physPageSize uintptr
|
||||||
// if accessed. Used only for debugging the runtime.
|
// if accessed. Used only for debugging the runtime.
|
||||||
|
|
||||||
func mallocinit() {
|
func mallocinit() {
|
||||||
initSizes()
|
|
||||||
|
|
||||||
if class_to_size[_TinySizeClass] != _TinySize {
|
if class_to_size[_TinySizeClass] != _TinySize {
|
||||||
throw("bad TinySizeClass")
|
throw("bad TinySizeClass")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testdefersizes()
|
||||||
|
|
||||||
|
// Copy class sizes out for statistics table.
|
||||||
|
for i := range class_to_size {
|
||||||
|
memstats.by_size[i].size = uint32(class_to_size[i])
|
||||||
|
}
|
||||||
|
|
||||||
// Check physPageSize.
|
// Check physPageSize.
|
||||||
if physPageSize == 0 {
|
if physPageSize == 0 {
|
||||||
// The OS init code failed to fetch the physical page size.
|
// The OS init code failed to fetch the physical page size.
|
||||||
|
|
|
||||||
|
|
@ -439,7 +439,7 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
|
||||||
if s.baseMask != 0 {
|
if s.baseMask != 0 {
|
||||||
// optimize for power of 2 sized objects.
|
// optimize for power of 2 sized objects.
|
||||||
base = s.base()
|
base = s.base()
|
||||||
base = base + (p-base)&s.baseMask
|
base = base + (p-base)&uintptr(s.baseMask)
|
||||||
objIndex = (base - s.base()) >> s.divShift
|
objIndex = (base - s.base()) >> s.divShift
|
||||||
// base = p & s.baseMask is faster for small spans,
|
// base = p & s.baseMask is faster for small spans,
|
||||||
// but doesn't work for large spans.
|
// but doesn't work for large spans.
|
||||||
|
|
@ -448,7 +448,7 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
|
||||||
base = s.base()
|
base = s.base()
|
||||||
if p-base >= s.elemsize {
|
if p-base >= s.elemsize {
|
||||||
// n := (p - base) / s.elemsize, using division by multiplication
|
// n := (p - base) / s.elemsize, using division by multiplication
|
||||||
objIndex = uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
|
objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2
|
||||||
base += objIndex * s.elemsize
|
base += objIndex * s.elemsize
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -234,7 +234,8 @@ type mspan struct {
|
||||||
// h->sweepgen is incremented by 2 after every GC
|
// h->sweepgen is incremented by 2 after every GC
|
||||||
|
|
||||||
sweepgen uint32
|
sweepgen uint32
|
||||||
divMul uint32 // for divide by elemsize - divMagic.mul
|
divMul uint16 // for divide by elemsize - divMagic.mul
|
||||||
|
baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
|
||||||
allocCount uint16 // capacity - number of objects in freelist
|
allocCount uint16 // capacity - number of objects in freelist
|
||||||
sizeclass uint8 // size class
|
sizeclass uint8 // size class
|
||||||
incache bool // being used by an mcache
|
incache bool // being used by an mcache
|
||||||
|
|
@ -248,7 +249,6 @@ type mspan struct {
|
||||||
limit uintptr // end of data in span
|
limit uintptr // end of data in span
|
||||||
speciallock mutex // guards specials list
|
speciallock mutex // guards specials list
|
||||||
specials *special // linked list of special records sorted by offset.
|
specials *special // linked list of special records sorted by offset.
|
||||||
baseMask uintptr // if non-0, elemsize is a power of 2, & this will get object allocation base
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *mspan) base() uintptr {
|
func (s *mspan) base() uintptr {
|
||||||
|
|
|
||||||
309
src/runtime/mksizeclasses.go
Normal file
309
src/runtime/mksizeclasses.go
Normal file
|
|
@ -0,0 +1,309 @@
|
||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build ignore
|
||||||
|
|
||||||
|
// Generate tables for small malloc size classes.
|
||||||
|
//
|
||||||
|
// See malloc.go for overview.
|
||||||
|
//
|
||||||
|
// The size classes are chosen so that rounding an allocation
|
||||||
|
// request up to the next size class wastes at most 12.5% (1.125x).
|
||||||
|
//
|
||||||
|
// Each size class has its own page count that gets allocated
|
||||||
|
// and chopped up when new objects of the size class are needed.
|
||||||
|
// That page count is chosen so that chopping up the run of
|
||||||
|
// pages into objects of the given size wastes at most 12.5% (1.125x)
|
||||||
|
// of the memory. It is not necessary that the cutoff here be
|
||||||
|
// the same as above.
|
||||||
|
//
|
||||||
|
// The two sources of waste multiply, so the worst possible case
|
||||||
|
// for the above constraints would be that allocations of some
|
||||||
|
// size might have a 26.6% (1.266x) overhead.
|
||||||
|
// In practice, only one of the wastes comes into play for a
|
||||||
|
// given size (sizes < 512 waste mainly on the round-up,
|
||||||
|
// sizes > 512 waste mainly on the page chopping).
|
||||||
|
//
|
||||||
|
// TODO(rsc): Compute max waste for any given size.
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"go/format"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Generate msize.go
|
||||||
|
|
||||||
|
var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
fmt.Fprintln(&b, "// AUTO-GENERATED by mksizeclasses.go; DO NOT EDIT")
|
||||||
|
fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go")
|
||||||
|
fmt.Fprintln(&b)
|
||||||
|
fmt.Fprintln(&b, "package runtime")
|
||||||
|
classes := makeClasses()
|
||||||
|
|
||||||
|
printClasses(&b, classes)
|
||||||
|
|
||||||
|
out, err := format.Source(b.Bytes())
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if *stdout {
|
||||||
|
_, err = os.Stdout.Write(out)
|
||||||
|
} else {
|
||||||
|
err = ioutil.WriteFile("sizeclasses.go", out, 0666)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Constants that we use and will transfer to the runtime.
|
||||||
|
maxSmallSize = 32 << 10
|
||||||
|
smallSizeDiv = 8
|
||||||
|
smallSizeMax = 1024
|
||||||
|
largeSizeDiv = 128
|
||||||
|
pageShift = 13
|
||||||
|
|
||||||
|
// Derived constants.
|
||||||
|
pageSize = 1 << pageShift
|
||||||
|
)
|
||||||
|
|
||||||
|
type class struct {
|
||||||
|
size int // max size
|
||||||
|
npages int // number of pages
|
||||||
|
|
||||||
|
mul int
|
||||||
|
shift uint
|
||||||
|
shift2 uint
|
||||||
|
mask int
|
||||||
|
}
|
||||||
|
|
||||||
|
func powerOfTwo(x int) bool {
|
||||||
|
return x != 0 && x&(x-1) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeClasses() []class {
|
||||||
|
var classes []class
|
||||||
|
|
||||||
|
classes = append(classes, class{}) // class #0 is a dummy entry
|
||||||
|
|
||||||
|
align := 8
|
||||||
|
for size := align; size <= maxSmallSize; size += align {
|
||||||
|
if powerOfTwo(size) { // bump alignment once in a while
|
||||||
|
if size >= 2048 {
|
||||||
|
align = 256
|
||||||
|
} else if size >= 128 {
|
||||||
|
align = size / 8
|
||||||
|
} else if size >= 16 {
|
||||||
|
align = 16 // required for x86 SSE instructions, if we want to use them
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !powerOfTwo(align) {
|
||||||
|
panic("incorrect alignment")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the allocnpages big enough that
|
||||||
|
// the leftover is less than 1/8 of the total,
|
||||||
|
// so wasted space is at most 12.5%.
|
||||||
|
allocsize := pageSize
|
||||||
|
for allocsize%size > allocsize/8 {
|
||||||
|
allocsize += pageSize
|
||||||
|
}
|
||||||
|
npages := allocsize / pageSize
|
||||||
|
|
||||||
|
// If the previous sizeclass chose the same
|
||||||
|
// allocation size and fit the same number of
|
||||||
|
// objects into the page, we might as well
|
||||||
|
// use just this size instead of having two
|
||||||
|
// different sizes.
|
||||||
|
if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size {
|
||||||
|
classes[len(classes)-1].size = size
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
classes = append(classes, class{size: size, npages: npages})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increase object sizes if we can fit the same number of larger objects
|
||||||
|
// into the same number of pages. For example, we choose size 8448 above
|
||||||
|
// with 6 objects in 7 pages. But we can well use object size 9472,
|
||||||
|
// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
|
||||||
|
// We need to preserve at least largeSizeDiv alignment otherwise
|
||||||
|
// sizeToClass won't work.
|
||||||
|
for i := range classes {
|
||||||
|
if i == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
c := &classes[i]
|
||||||
|
psize := c.npages * pageSize
|
||||||
|
new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1)
|
||||||
|
if new_size > c.size {
|
||||||
|
c.size = new_size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(classes) != 67 {
|
||||||
|
panic("number of size classes has changed")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range classes {
|
||||||
|
computeDivMagic(&classes[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
return classes
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeDivMagic computes some magic constants to implement
|
||||||
|
// the division required to compute object number from span offset.
|
||||||
|
// n / c.size is implemented as n >> c.shift * c.mul >> c.shift2
|
||||||
|
// for all 0 <= n < c.npages * pageSize
|
||||||
|
func computeDivMagic(c *class) {
|
||||||
|
// divisor
|
||||||
|
d := c.size
|
||||||
|
if d == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// maximum input value for which the formula needs to work.
|
||||||
|
max := c.npages*pageSize - 1
|
||||||
|
|
||||||
|
if powerOfTwo(d) {
|
||||||
|
// If the size is a power of two, heapBitsForObject can divide even faster by masking.
|
||||||
|
// Compute this mask.
|
||||||
|
if max >= 1<<16 {
|
||||||
|
panic("max too big for power of two size")
|
||||||
|
}
|
||||||
|
c.mask = 1<<16 - d
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute pre-shift by factoring power of 2 out of d.
|
||||||
|
for d%2 == 0 {
|
||||||
|
c.shift++
|
||||||
|
d >>= 1
|
||||||
|
max >>= 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the smallest k that works.
|
||||||
|
// A small k allows us to fit the math required into 32 bits
|
||||||
|
// so we can use 32-bit multiplies and shifts on 32-bit platforms.
|
||||||
|
nextk:
|
||||||
|
for k := uint(0); ; k++ {
|
||||||
|
mul := (int(1)<<k + d - 1) / d // ⌈2^k / d⌉
|
||||||
|
|
||||||
|
// Test to see if mul works.
|
||||||
|
for n := 0; n <= max; n++ {
|
||||||
|
if n*mul>>k != n/d {
|
||||||
|
continue nextk
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if mul >= 1<<16 {
|
||||||
|
panic("mul too big")
|
||||||
|
}
|
||||||
|
if uint64(mul)*uint64(max) >= 1<<32 {
|
||||||
|
panic("mul*max too big")
|
||||||
|
}
|
||||||
|
c.mul = mul
|
||||||
|
c.shift2 = k
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// double-check.
|
||||||
|
for n := 0; n <= max; n++ {
|
||||||
|
if n*c.mul>>c.shift2 != n/d {
|
||||||
|
fmt.Printf("d=%d max=%d mul=%d shift2=%d n=%d\n", d, max, c.mul, c.shift2, n)
|
||||||
|
panic("bad multiply magic")
|
||||||
|
}
|
||||||
|
// Also check the exact computations that will be done by the runtime,
|
||||||
|
// for both 32 and 64 bit operations.
|
||||||
|
if uint32(n)*uint32(c.mul)>>uint8(c.shift2) != uint32(n/d) {
|
||||||
|
fmt.Printf("d=%d max=%d mul=%d shift2=%d n=%d\n", d, max, c.mul, c.shift2, n)
|
||||||
|
panic("bad 32-bit multiply magic")
|
||||||
|
}
|
||||||
|
if uint64(n)*uint64(c.mul)>>uint8(c.shift2) != uint64(n/d) {
|
||||||
|
fmt.Printf("d=%d max=%d mul=%d shift2=%d n=%d\n", d, max, c.mul, c.shift2, n)
|
||||||
|
panic("bad 64-bit multiply magic")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func printClasses(w io.Writer, classes []class) {
|
||||||
|
fmt.Fprintln(w, "const (")
|
||||||
|
fmt.Fprintf(w, "_MaxSmallSize = %d\n", maxSmallSize)
|
||||||
|
fmt.Fprintf(w, "smallSizeDiv = %d\n", smallSizeDiv)
|
||||||
|
fmt.Fprintf(w, "smallSizeMax = %d\n", smallSizeMax)
|
||||||
|
fmt.Fprintf(w, "largeSizeDiv = %d\n", largeSizeDiv)
|
||||||
|
fmt.Fprintf(w, "_NumSizeClasses = %d\n", len(classes))
|
||||||
|
fmt.Fprintf(w, "_PageShift = %d\n", pageShift)
|
||||||
|
fmt.Fprintln(w, ")")
|
||||||
|
|
||||||
|
fmt.Fprint(w, "var class_to_size = [_NumSizeClasses]uint16 {")
|
||||||
|
for _, c := range classes {
|
||||||
|
fmt.Fprintf(w, "%d,", c.size)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "}")
|
||||||
|
|
||||||
|
fmt.Fprint(w, "var class_to_allocnpages = [_NumSizeClasses]uint8 {")
|
||||||
|
for _, c := range classes {
|
||||||
|
fmt.Fprintf(w, "%d,", c.npages)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "}")
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "type divMagic struct {")
|
||||||
|
fmt.Fprintln(w, " shift uint8")
|
||||||
|
fmt.Fprintln(w, " shift2 uint8")
|
||||||
|
fmt.Fprintln(w, " mul uint16")
|
||||||
|
fmt.Fprintln(w, " baseMask uint16")
|
||||||
|
fmt.Fprintln(w, "}")
|
||||||
|
fmt.Fprint(w, "var class_to_divmagic = [_NumSizeClasses]divMagic {")
|
||||||
|
for _, c := range classes {
|
||||||
|
fmt.Fprintf(w, "{%d,%d,%d,%d},", c.shift, c.shift2, c.mul, c.mask)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "}")
|
||||||
|
|
||||||
|
// map from size to size class, for small sizes.
|
||||||
|
sc := make([]int, smallSizeMax/smallSizeDiv+1)
|
||||||
|
for i := range sc {
|
||||||
|
size := i * smallSizeDiv
|
||||||
|
for j, c := range classes {
|
||||||
|
if c.size >= size {
|
||||||
|
sc[i] = j
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Fprint(w, "var size_to_class8 = [smallSizeMax/smallSizeDiv+1]uint8 {")
|
||||||
|
for _, v := range sc {
|
||||||
|
fmt.Fprintf(w, "%d,", v)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "}")
|
||||||
|
|
||||||
|
// map from size to size class, for large sizes.
|
||||||
|
sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
|
||||||
|
for i := range sc {
|
||||||
|
size := smallSizeMax + i*largeSizeDiv
|
||||||
|
for j, c := range classes {
|
||||||
|
if c.size >= size {
|
||||||
|
sc[i] = j
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Fprint(w, "var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv+1]uint8 {")
|
||||||
|
for _, v := range sc {
|
||||||
|
fmt.Fprintf(w, "%d,", v)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "}")
|
||||||
|
}
|
||||||
|
|
@ -5,60 +5,22 @@
|
||||||
// Malloc small size classes.
|
// Malloc small size classes.
|
||||||
//
|
//
|
||||||
// See malloc.go for overview.
|
// See malloc.go for overview.
|
||||||
//
|
// See also mksizeclasses.go for how we decide what size classes to use.
|
||||||
// The size classes are chosen so that rounding an allocation
|
|
||||||
// request up to the next size class wastes at most 12.5% (1.125x).
|
|
||||||
//
|
|
||||||
// Each size class has its own page count that gets allocated
|
|
||||||
// and chopped up when new objects of the size class are needed.
|
|
||||||
// That page count is chosen so that chopping up the run of
|
|
||||||
// pages into objects of the given size wastes at most 12.5% (1.125x)
|
|
||||||
// of the memory. It is not necessary that the cutoff here be
|
|
||||||
// the same as above.
|
|
||||||
//
|
|
||||||
// The two sources of waste multiply, so the worst possible case
|
|
||||||
// for the above constraints would be that allocations of some
|
|
||||||
// size might have a 26.6% (1.266x) overhead.
|
|
||||||
// In practice, only one of the wastes comes into play for a
|
|
||||||
// given size (sizes < 512 waste mainly on the round-up,
|
|
||||||
// sizes > 512 waste mainly on the page chopping).
|
|
||||||
//
|
|
||||||
// TODO(rsc): Compute max waste for any given size.
|
|
||||||
|
|
||||||
package runtime
|
package runtime
|
||||||
|
|
||||||
// Size classes. Computed and initialized by InitSizes.
|
// sizeToClass(0 <= n <= MaxSmallSize) returns the size class,
|
||||||
//
|
|
||||||
// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
|
|
||||||
// 1 <= sizeclass < NumSizeClasses, for n.
|
// 1 <= sizeclass < NumSizeClasses, for n.
|
||||||
// Size class 0 is reserved to mean "not small".
|
// Size class 0 is reserved to mean "not small".
|
||||||
//
|
//
|
||||||
// class_to_size[i] = largest size in class i
|
// The sizeToClass lookup is implemented using two arrays,
|
||||||
// class_to_allocnpages[i] = number of pages to allocate when
|
|
||||||
// making new objects in class i
|
|
||||||
|
|
||||||
// The SizeToClass lookup is implemented using two arrays,
|
|
||||||
// one mapping sizes <= 1024 to their class and one mapping
|
// one mapping sizes <= 1024 to their class and one mapping
|
||||||
// sizes >= 1024 and <= MaxSmallSize to their class.
|
// sizes >= 1024 and <= MaxSmallSize to their class.
|
||||||
// All objects are 8-aligned, so the first array is indexed by
|
// All objects are 8-aligned, so the first array is indexed by
|
||||||
// the size divided by 8 (rounded up). Objects >= 1024 bytes
|
// the size divided by 8 (rounded up). Objects >= 1024 bytes
|
||||||
// are 128-aligned, so the second array is indexed by the
|
// are 128-aligned, so the second array is indexed by the
|
||||||
// size divided by 128 (rounded up). The arrays are filled in
|
// size divided by 128 (rounded up). The arrays are constants
|
||||||
// by InitSizes.
|
// in sizeclass.go generated by mksizeclass.go.
|
||||||
|
|
||||||
const (
|
|
||||||
smallSizeDiv = 8
|
|
||||||
smallSizeMax = 1024
|
|
||||||
largeSizeDiv = 128
|
|
||||||
)
|
|
||||||
|
|
||||||
var class_to_size [_NumSizeClasses]uint32
|
|
||||||
var class_to_allocnpages [_NumSizeClasses]uint32
|
|
||||||
var class_to_divmagic [_NumSizeClasses]divMagic
|
|
||||||
|
|
||||||
var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8
|
|
||||||
var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8
|
|
||||||
|
|
||||||
func sizeToClass(size uint32) uint32 {
|
func sizeToClass(size uint32) uint32 {
|
||||||
if size > _MaxSmallSize {
|
if size > _MaxSmallSize {
|
||||||
throw("invalid size")
|
throw("invalid size")
|
||||||
|
|
@ -69,147 +31,6 @@ func sizeToClass(size uint32) uint32 {
|
||||||
return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
|
return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
|
||||||
}
|
}
|
||||||
|
|
||||||
func initSizes() {
|
|
||||||
// Initialize the runtime·class_to_size table (and choose class sizes in the process).
|
|
||||||
class_to_size[0] = 0
|
|
||||||
sizeclass := 1 // 0 means no class
|
|
||||||
align := 8
|
|
||||||
for size := align; size <= _MaxSmallSize; size += align {
|
|
||||||
if size&(size-1) == 0 { // bump alignment once in a while
|
|
||||||
if size >= 2048 {
|
|
||||||
align = 256
|
|
||||||
} else if size >= 128 {
|
|
||||||
align = size / 8
|
|
||||||
} else if size >= 16 {
|
|
||||||
align = 16 // required for x86 SSE instructions, if we want to use them
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if align&(align-1) != 0 {
|
|
||||||
throw("incorrect alignment")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make the allocnpages big enough that
|
|
||||||
// the leftover is less than 1/8 of the total,
|
|
||||||
// so wasted space is at most 12.5%.
|
|
||||||
allocsize := _PageSize
|
|
||||||
for allocsize%size > allocsize/8 {
|
|
||||||
allocsize += _PageSize
|
|
||||||
}
|
|
||||||
npages := allocsize >> _PageShift
|
|
||||||
|
|
||||||
// If the previous sizeclass chose the same
|
|
||||||
// allocation size and fit the same number of
|
|
||||||
// objects into the page, we might as well
|
|
||||||
// use just this size instead of having two
|
|
||||||
// different sizes.
|
|
||||||
if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
|
|
||||||
class_to_size[sizeclass-1] = uint32(size)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
class_to_allocnpages[sizeclass] = uint32(npages)
|
|
||||||
class_to_size[sizeclass] = uint32(size)
|
|
||||||
sizeclass++
|
|
||||||
}
|
|
||||||
if sizeclass != _NumSizeClasses {
|
|
||||||
print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
|
|
||||||
throw("bad NumSizeClasses")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Increase object sizes if we can fit the same number of larger objects
|
|
||||||
// into the same number of pages. For example, we choose size 8448 above
|
|
||||||
// with 6 objects in 7 pages. But we can well use object size 9472,
|
|
||||||
// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
|
|
||||||
// We need to preserve at least largeSizeDiv alignment otherwise
|
|
||||||
// sizeToClass won't work.
|
|
||||||
for i := 1; i < _NumSizeClasses; i++ {
|
|
||||||
npages := class_to_allocnpages[i]
|
|
||||||
psize := npages * _PageSize
|
|
||||||
size := class_to_size[i]
|
|
||||||
new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1)
|
|
||||||
if new_size > size {
|
|
||||||
class_to_size[i] = new_size
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check maxObjsPerSpan => number of objects invariant.
|
|
||||||
for i, size := range class_to_size {
|
|
||||||
if i != 0 && class_to_size[i-1] >= size {
|
|
||||||
throw("non-monotonic size classes")
|
|
||||||
}
|
|
||||||
|
|
||||||
if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
|
|
||||||
throw("span contains too many objects")
|
|
||||||
}
|
|
||||||
if size == 0 && i != 0 {
|
|
||||||
throw("size is 0 but class is not 0")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Initialize the size_to_class tables.
|
|
||||||
nextsize := 0
|
|
||||||
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
|
|
||||||
for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
|
|
||||||
size_to_class8[nextsize/8] = uint8(sizeclass)
|
|
||||||
}
|
|
||||||
if nextsize >= 1024 {
|
|
||||||
for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
|
|
||||||
size_to_class128[(nextsize-1024)/128] = uint8(sizeclass)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check SizeToClass.
|
|
||||||
if false {
|
|
||||||
for n := uint32(0); n < _MaxSmallSize; n++ {
|
|
||||||
sizeclass := sizeToClass(n)
|
|
||||||
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
|
|
||||||
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
|
|
||||||
print("incorrect SizeToClass\n")
|
|
||||||
goto dump
|
|
||||||
}
|
|
||||||
if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
|
|
||||||
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
|
|
||||||
print("SizeToClass too big\n")
|
|
||||||
goto dump
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
testdefersizes()
|
|
||||||
|
|
||||||
// Copy out for statistics table.
|
|
||||||
for i := 0; i < len(class_to_size); i++ {
|
|
||||||
memstats.by_size[i].size = uint32(class_to_size[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 1; i < len(class_to_size); i++ {
|
|
||||||
class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i]))
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
dump:
|
|
||||||
if true {
|
|
||||||
print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
|
|
||||||
print("runtime·class_to_size:")
|
|
||||||
for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
|
|
||||||
print(" ", class_to_size[sizeclass], "")
|
|
||||||
}
|
|
||||||
print("\n\n")
|
|
||||||
print("runtime: size_to_class8:")
|
|
||||||
for i := 0; i < len(size_to_class8); i++ {
|
|
||||||
print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
|
|
||||||
}
|
|
||||||
print("\n")
|
|
||||||
print("runtime: size_to_class128:")
|
|
||||||
for i := 0; i < len(size_to_class128); i++ {
|
|
||||||
print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
|
|
||||||
}
|
|
||||||
print("\n")
|
|
||||||
}
|
|
||||||
throw("InitSizes failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
|
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
|
||||||
func roundupsize(size uintptr) uintptr {
|
func roundupsize(size uintptr) uintptr {
|
||||||
if size < _MaxSmallSize {
|
if size < _MaxSmallSize {
|
||||||
|
|
@ -224,66 +45,3 @@ func roundupsize(size uintptr) uintptr {
|
||||||
}
|
}
|
||||||
return round(size, _PageSize)
|
return round(size, _PageSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
// divMagic holds magic constants to implement division
|
|
||||||
// by a particular constant as a shift, multiply, and shift.
|
|
||||||
// That is, given
|
|
||||||
// m = computeMagic(d)
|
|
||||||
// then
|
|
||||||
// n/d == ((n>>m.shift) * m.mul) >> m.shift2
|
|
||||||
//
|
|
||||||
// The magic computation picks m such that
|
|
||||||
// d = d₁*d₂
|
|
||||||
// d₂= 2^m.shift
|
|
||||||
// m.mul = ⌈2^m.shift2 / d₁⌉
|
|
||||||
//
|
|
||||||
// The magic computation here is tailored for malloc block sizes
|
|
||||||
// and does not handle arbitrary d correctly. Malloc block sizes d are
|
|
||||||
// always even, so the first shift implements the factors of 2 in d
|
|
||||||
// and then the mul and second shift implement the odd factor
|
|
||||||
// that remains. Because the first shift divides n by at least 2 (actually 8)
|
|
||||||
// before the multiply gets involved, the huge corner cases that
|
|
||||||
// require additional adjustment are impossible, so the usual
|
|
||||||
// fixup is not needed.
|
|
||||||
//
|
|
||||||
// For more details see Hacker's Delight, Chapter 10, and
|
|
||||||
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
|
|
||||||
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
|
|
||||||
type divMagic struct {
|
|
||||||
shift uint8
|
|
||||||
mul uint32
|
|
||||||
shift2 uint8
|
|
||||||
baseMask uintptr
|
|
||||||
}
|
|
||||||
|
|
||||||
func computeDivMagic(d uint32) divMagic {
|
|
||||||
var m divMagic
|
|
||||||
|
|
||||||
// If the size is a power of two, heapBitsForObject can divide even faster by masking.
|
|
||||||
// Compute this mask.
|
|
||||||
if d&(d-1) == 0 {
|
|
||||||
// It is a power of 2 (assuming dinptr != 1)
|
|
||||||
m.baseMask = ^(uintptr(d) - 1)
|
|
||||||
} else {
|
|
||||||
m.baseMask = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute pre-shift by factoring power of 2 out of d.
|
|
||||||
for d&1 == 0 {
|
|
||||||
m.shift++
|
|
||||||
d >>= 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int.
|
|
||||||
// This is always a good enough approximation.
|
|
||||||
// We could use smaller k for some divisors but there's no point.
|
|
||||||
k := uint8(63)
|
|
||||||
d64 := uint64(d)
|
|
||||||
for ((1<<k)+d64-1)/d64 >= 1<<32 {
|
|
||||||
k--
|
|
||||||
}
|
|
||||||
m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉
|
|
||||||
m.shift2 = k
|
|
||||||
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
|
|
|
||||||
27
src/runtime/sizeclasses.go
Normal file
27
src/runtime/sizeclasses.go
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
// AUTO-GENERATED by mksizeclasses.go; DO NOT EDIT
|
||||||
|
//go:generate go run mksizeclasses.go
|
||||||
|
|
||||||
|
package runtime
|
||||||
|
|
||||||
|
const (
|
||||||
|
_MaxSmallSize = 32768
|
||||||
|
smallSizeDiv = 8
|
||||||
|
smallSizeMax = 1024
|
||||||
|
largeSizeDiv = 128
|
||||||
|
_NumSizeClasses = 67
|
||||||
|
_PageShift = 13
|
||||||
|
)
|
||||||
|
|
||||||
|
var class_to_size = [_NumSizeClasses]uint16{0, 8, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768}
|
||||||
|
var class_to_allocnpages = [_NumSizeClasses]uint8{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 3, 1, 3, 2, 3, 4, 5, 6, 1, 7, 6, 5, 4, 3, 5, 7, 2, 9, 7, 5, 8, 3, 10, 7, 4}
|
||||||
|
|
||||||
|
type divMagic struct {
|
||||||
|
shift uint8
|
||||||
|
shift2 uint8
|
||||||
|
mul uint16
|
||||||
|
baseMask uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
var class_to_divmagic = [_NumSizeClasses]divMagic{{0, 0, 0, 0}, {3, 0, 1, 65528}, {4, 0, 1, 65520}, {5, 0, 1, 65504}, {4, 9, 171, 0}, {6, 0, 1, 65472}, {4, 10, 205, 0}, {5, 9, 171, 0}, {4, 11, 293, 0}, {7, 0, 1, 65408}, {4, 9, 57, 0}, {5, 10, 205, 0}, {4, 12, 373, 0}, {6, 7, 43, 0}, {4, 13, 631, 0}, {5, 11, 293, 0}, {4, 13, 547, 0}, {8, 0, 1, 65280}, {5, 9, 57, 0}, {6, 9, 103, 0}, {5, 12, 373, 0}, {7, 7, 43, 0}, {5, 10, 79, 0}, {6, 10, 147, 0}, {5, 11, 137, 0}, {9, 0, 1, 65024}, {6, 9, 57, 0}, {7, 6, 13, 0}, {6, 11, 187, 0}, {8, 5, 11, 0}, {7, 8, 37, 0}, {10, 0, 1, 64512}, {7, 9, 57, 0}, {8, 6, 13, 0}, {7, 11, 187, 0}, {9, 5, 11, 0}, {8, 8, 37, 0}, {11, 0, 1, 63488}, {8, 9, 57, 0}, {7, 10, 49, 0}, {10, 5, 11, 0}, {7, 10, 41, 0}, {7, 9, 19, 0}, {12, 0, 1, 61440}, {8, 9, 27, 0}, {8, 10, 49, 0}, {11, 5, 11, 0}, {7, 13, 161, 0}, {7, 13, 155, 0}, {8, 9, 19, 0}, {13, 0, 1, 57344}, {8, 12, 111, 0}, {9, 9, 27, 0}, {11, 6, 13, 0}, {7, 14, 193, 0}, {12, 3, 3, 0}, {8, 13, 155, 0}, {11, 8, 37, 0}, {14, 0, 1, 49152}, {11, 8, 29, 0}, {7, 13, 55, 0}, {12, 5, 7, 0}, {8, 14, 193, 0}, {13, 3, 3, 0}, {7, 14, 77, 0}, {12, 7, 19, 0}, {15, 0, 1, 32768}}
|
||||||
|
var size_to_class8 = [smallSizeMax/smallSizeDiv + 1]uint8{0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31}
|
||||||
|
var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8{31, 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 39, 40, 40, 40, 41, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 47, 47, 47, 48, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue