go/src/runtime/sizeclasses.go

100 lines
9.2 KiB
Go
Raw Normal View History

// Code generated by mksizeclasses.go; DO NOT EDIT.
//go:generate go run mksizeclasses.go
package runtime
// class bytes/obj bytes/span objects tail waste max waste min align
// 1 8 8192 1024 0 87.50% 8
// 2 16 8192 512 0 43.75% 16
// 3 24 8192 341 8 29.24% 8
// 4 32 8192 256 0 21.88% 32
// 5 48 8192 170 32 31.52% 16
// 6 64 8192 128 0 23.44% 64
// 7 80 8192 102 32 19.07% 16
// 8 96 8192 85 32 15.95% 32
// 9 112 8192 73 16 13.56% 16
// 10 128 8192 64 0 11.72% 128
// 11 144 8192 56 128 11.82% 16
// 12 160 8192 51 32 9.73% 32
// 13 176 8192 46 96 9.59% 16
// 14 192 8192 42 128 9.25% 64
// 15 208 8192 39 80 8.12% 16
// 16 224 8192 36 128 8.15% 32
// 17 240 8192 34 32 6.62% 16
// 18 256 8192 32 0 5.86% 256
// 19 288 8192 28 128 12.16% 32
// 20 320 8192 25 192 11.80% 64
// 21 352 8192 23 96 9.88% 32
// 22 384 8192 21 128 9.51% 128
// 23 416 8192 19 288 10.71% 32
// 24 448 8192 18 128 8.37% 64
// 25 480 8192 17 32 6.82% 32
// 26 512 8192 16 0 6.05% 512
// 27 576 8192 14 128 12.33% 64
// 28 640 8192 12 512 15.48% 128
// 29 704 8192 11 448 13.93% 64
// 30 768 8192 10 512 13.94% 256
// 31 896 8192 9 128 15.52% 128
// 32 1024 8192 8 0 12.40% 1024
// 33 1152 8192 7 128 12.41% 128
// 34 1280 8192 6 512 15.55% 256
// 35 1408 16384 11 896 14.00% 128
// 36 1536 8192 5 512 14.00% 512
// 37 1792 16384 9 256 15.57% 256
// 38 2048 8192 4 0 12.45% 2048
// 39 2304 16384 7 256 12.46% 256
// 40 2688 8192 3 128 15.59% 128
// 41 3072 24576 8 0 12.47% 1024
// 42 3200 16384 5 384 6.22% 128
// 43 3456 24576 7 384 8.83% 128
// 44 4096 8192 2 0 15.60% 4096
// 45 4864 24576 5 256 16.65% 256
// 46 5376 16384 3 256 10.92% 256
// 47 6144 24576 4 0 12.48% 2048
// 48 6528 32768 5 128 6.23% 128
// 49 6784 40960 6 256 4.36% 128
// 50 6912 49152 7 768 3.37% 256
// 51 8192 8192 1 0 15.61% 8192
// 52 9472 57344 6 512 14.28% 256
// 53 9728 49152 5 512 3.64% 512
// 54 10240 40960 4 0 4.99% 2048
// 55 10880 32768 3 128 6.24% 128
// 56 12288 24576 2 0 11.45% 4096
// 57 13568 40960 3 256 9.99% 256
// 58 14336 57344 4 0 5.35% 2048
// 59 16384 16384 1 0 12.49% 8192
// 60 18432 73728 4 0 11.11% 2048
// 61 19072 57344 3 128 3.57% 128
// 62 20480 40960 2 0 6.87% 4096
// 63 21760 65536 3 256 6.25% 256
// 64 24576 24576 1 0 11.45% 8192
// 65 27264 81920 3 128 10.00% 128
// 66 28672 57344 2 0 4.91% 4096
// 67 32768 32768 1 0 12.50% 8192
// alignment bits min obj size
// 8 3 8
// 16 4 32
// 32 5 256
// 64 6 512
// 128 7 768
// 4096 12 28672
// 8192 13 32768
const (
minHeapAlign = 8
_MaxSmallSize = 32768
smallSizeDiv = 8
smallSizeMax = 1024
largeSizeDiv = 128
runtime: add 24 byte allocation size class This CL introduces a 24 byte allocation size class which fits 3 pointers on 64 bit and 6 pointers on 32 bit architectures. Notably this new size class fits a slice header on 64 bit architectures exactly while previously a 32 byte size class would have been used for allocating a slice header on the heap. The main complexity added with this CL is that heapBitsSetType needs to handle objects that aren't 16-byte aligned but contain more than a single pointer on 64-bit architectures. Due to having a non 16 byte aligned size class on 32 bit a h.shift of 2 is now possible which means a heap bitmap byte might only be partially written. Due to this already having been possible on 64 bit before the heap bitmap code only needed minor adjustments for 32 bit doublecheck code paths. Note that this CL changes the slice capacity allocated by append for slice growth to a target capacity of 17 to 24 bytes. On 64 bit architectures the capacity of the slice returned by append([]byte{}, make([]byte, 24)...)) is 32 bytes before and 24 bytes after this CL. Depending on allocation patterns of the specific Go program this can increase the number of total alloctions as subsequent appends to the slice can trigger slice growth earlier than before. On the other side if the slice is never appended to again above its capacity this will lower heap usage by 8 bytes. This CL changes the set of size classes reported in the runtime.MemStats.BySize array due to it being limited to a total of 61 size classes. The new 24 byte size class is now included and the 20480 byte size class is not included anymore. Fixes #8885 name old time/op new time/op delta Template 196ms ± 3% 194ms ± 2% ~ (p=0.247 n=10+10) Unicode 85.6ms ±16% 88.1ms ± 1% ~ (p=0.165 n=10+10) GoTypes 673ms ± 2% 668ms ± 2% ~ (p=0.258 n=9+9) Compiler 3.14s ± 6% 3.08s ± 1% ~ (p=0.243 n=10+9) SSA 6.82s ± 1% 6.76s ± 1% -0.87% (p=0.006 n=9+10) Flate 128ms ± 7% 127ms ± 3% ~ (p=0.739 n=10+10) GoParser 154ms ± 3% 153ms ± 4% ~ (p=0.730 n=9+9) Reflect 404ms ± 1% 412ms ± 4% +1.99% (p=0.022 n=9+10) Tar 172ms ± 4% 170ms ± 4% ~ (p=0.065 n=10+9) XML 231ms ± 4% 230ms ± 3% ~ (p=0.912 n=10+10) LinkCompiler 341ms ± 1% 339ms ± 1% ~ (p=0.243 n=9+10) ExternalLinkCompiler 1.72s ± 1% 1.72s ± 1% ~ (p=0.661 n=9+10) LinkWithoutDebugCompiler 221ms ± 2% 221ms ± 2% ~ (p=0.529 n=10+10) StdCmd 18.4s ± 3% 18.2s ± 1% ~ (p=0.515 n=10+8) name old user-time/op new user-time/op delta Template 238ms ± 4% 243ms ± 6% ~ (p=0.661 n=9+10) Unicode 116ms ± 6% 113ms ± 3% -3.37% (p=0.035 n=9+10) GoTypes 854ms ± 2% 848ms ± 2% ~ (p=0.604 n=9+10) Compiler 4.10s ± 1% 4.11s ± 1% ~ (p=0.481 n=8+9) SSA 9.49s ± 1% 9.41s ± 1% -0.92% (p=0.001 n=9+10) Flate 149ms ± 6% 151ms ± 7% ~ (p=0.481 n=10+10) GoParser 189ms ± 2% 190ms ± 2% ~ (p=0.497 n=9+10) Reflect 511ms ± 2% 508ms ± 2% ~ (p=0.211 n=9+10) Tar 215ms ± 4% 212ms ± 3% ~ (p=0.105 n=10+10) XML 288ms ± 2% 288ms ± 2% ~ (p=0.971 n=10+10) LinkCompiler 559ms ± 4% 557ms ± 1% ~ (p=0.968 n=9+10) ExternalLinkCompiler 1.78s ± 1% 1.77s ± 1% ~ (p=0.055 n=8+10) LinkWithoutDebugCompiler 245ms ± 3% 245ms ± 2% ~ (p=0.684 n=10+10) name old alloc/op new alloc/op delta Template 34.8MB ± 0% 34.4MB ± 0% -0.95% (p=0.000 n=9+10) Unicode 28.6MB ± 0% 28.3MB ± 0% -0.95% (p=0.000 n=10+10) GoTypes 115MB ± 0% 114MB ± 0% -1.02% (p=0.000 n=10+9) Compiler 554MB ± 0% 549MB ± 0% -0.86% (p=0.000 n=9+10) SSA 1.28GB ± 0% 1.27GB ± 0% -0.83% (p=0.000 n=10+10) Flate 21.8MB ± 0% 21.6MB ± 0% -0.87% (p=0.000 n=8+10) GoParser 26.7MB ± 0% 26.4MB ± 0% -0.97% (p=0.000 n=10+9) Reflect 75.0MB ± 0% 74.1MB ± 0% -1.18% (p=0.000 n=10+10) Tar 32.6MB ± 0% 32.3MB ± 0% -0.94% (p=0.000 n=10+7) XML 41.5MB ± 0% 41.2MB ± 0% -0.90% (p=0.000 n=10+8) LinkCompiler 105MB ± 0% 104MB ± 0% -0.94% (p=0.000 n=10+10) ExternalLinkCompiler 153MB ± 0% 152MB ± 0% -0.69% (p=0.000 n=10+10) LinkWithoutDebugCompiler 63.7MB ± 0% 63.6MB ± 0% -0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 336k ± 0% 336k ± 0% +0.02% (p=0.002 n=10+10) Unicode 332k ± 0% 332k ± 0% ~ (p=0.447 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% +0.01% (p=0.001 n=10+10) Compiler 4.92M ± 0% 4.92M ± 0% +0.01% (p=0.000 n=10+10) SSA 11.9M ± 0% 11.9M ± 0% +0.02% (p=0.000 n=9+10) Flate 214k ± 0% 214k ± 0% +0.02% (p=0.032 n=10+8) GoParser 270k ± 0% 270k ± 0% +0.02% (p=0.004 n=10+9) Reflect 877k ± 0% 877k ± 0% +0.01% (p=0.000 n=10+10) Tar 313k ± 0% 313k ± 0% ~ (p=0.075 n=9+10) XML 387k ± 0% 387k ± 0% +0.02% (p=0.007 n=10+10) LinkCompiler 455k ± 0% 456k ± 0% +0.08% (p=0.000 n=10+9) ExternalLinkCompiler 670k ± 0% 671k ± 0% +0.06% (p=0.000 n=10+10) LinkWithoutDebugCompiler 113k ± 0% 113k ± 0% ~ (p=0.149 n=10+10) name old maxRSS/op new maxRSS/op delta Template 34.1M ± 1% 34.1M ± 1% ~ (p=0.853 n=10+10) Unicode 35.1M ± 1% 34.6M ± 1% -1.43% (p=0.000 n=10+10) GoTypes 72.8M ± 3% 73.3M ± 2% ~ (p=0.724 n=10+10) Compiler 288M ± 3% 295M ± 4% ~ (p=0.393 n=10+10) SSA 630M ± 1% 622M ± 1% -1.18% (p=0.001 n=10+10) Flate 26.0M ± 1% 26.2M ± 2% ~ (p=0.493 n=10+10) GoParser 28.6M ± 1% 28.5M ± 2% ~ (p=0.256 n=10+10) Reflect 55.5M ± 2% 55.4M ± 1% ~ (p=0.436 n=10+10) Tar 33.0M ± 1% 32.8M ± 2% ~ (p=0.075 n=10+10) XML 38.7M ± 1% 39.0M ± 1% ~ (p=0.053 n=9+10) LinkCompiler 164M ± 1% 164M ± 1% -0.27% (p=0.029 n=10+10) ExternalLinkCompiler 174M ± 0% 173M ± 0% -0.33% (p=0.002 n=9+10) LinkWithoutDebugCompiler 137M ± 0% 136M ± 2% ~ (p=0.825 n=9+10) Change-Id: I9ecf2a10024513abef8fbfbe519e44e0b29b6167 Reviewed-on: https://go-review.googlesource.com/c/go/+/242258 Trust: Martin Möhrmann <moehrmann@google.com> Trust: Michael Knyszek <mknyszek@google.com> Run-TryBot: Martin Möhrmann <martisch@uos.de> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@golang.org>
2020-07-13 18:12:20 +02:00
_NumSizeClasses = 68
_PageShift = 13
runtime: implement Pinner API for object pinning Some C APIs require the use or structures that contain pointers to buffers (iovec, io_uring, ...). The pointer passing rules would require that these buffers are allocated in C memory and to process this data with Go libraries it would need to be copied. In order to provide a zero-copy way to use these C APIs, this CL implements a Pinner API that allows to pin Go objects, which guarantees that the garbage collector does not move these objects while pinned. This allows to relax the pointer passing rules so that pinned pointers can be stored in C allocated memory or can be contained in Go memory that is passed to C functions. The Pin() method accepts pointers to objects of any type and unsafe.Pointer. Slices and arrays can be pinned by calling Pin() with the pointer to the first element. Pinning of maps is not supported. If the GC collects unreachable Pinner holding pinned objects it panics. If Pin() is called with the other non-pointer types it panics as well. Performance considerations: This change has no impact on execution time on existing code, because checks are only done in code paths, that would panic otherwise. The memory footprint on existing code is one pointer per memory span. Fixes: #46787 Signed-off-by: Sven Anderson <sven@anderson.de> Change-Id: I110031fe789b92277ae45a9455624687bd1c54f2 Reviewed-on: https://go-review.googlesource.com/c/go/+/367296 Auto-Submit: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Than McIntosh <thanm@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com>
2021-11-28 13:05:16 +09:00
maxObjsPerSpan = 1024
)
runtime: add 24 byte allocation size class This CL introduces a 24 byte allocation size class which fits 3 pointers on 64 bit and 6 pointers on 32 bit architectures. Notably this new size class fits a slice header on 64 bit architectures exactly while previously a 32 byte size class would have been used for allocating a slice header on the heap. The main complexity added with this CL is that heapBitsSetType needs to handle objects that aren't 16-byte aligned but contain more than a single pointer on 64-bit architectures. Due to having a non 16 byte aligned size class on 32 bit a h.shift of 2 is now possible which means a heap bitmap byte might only be partially written. Due to this already having been possible on 64 bit before the heap bitmap code only needed minor adjustments for 32 bit doublecheck code paths. Note that this CL changes the slice capacity allocated by append for slice growth to a target capacity of 17 to 24 bytes. On 64 bit architectures the capacity of the slice returned by append([]byte{}, make([]byte, 24)...)) is 32 bytes before and 24 bytes after this CL. Depending on allocation patterns of the specific Go program this can increase the number of total alloctions as subsequent appends to the slice can trigger slice growth earlier than before. On the other side if the slice is never appended to again above its capacity this will lower heap usage by 8 bytes. This CL changes the set of size classes reported in the runtime.MemStats.BySize array due to it being limited to a total of 61 size classes. The new 24 byte size class is now included and the 20480 byte size class is not included anymore. Fixes #8885 name old time/op new time/op delta Template 196ms ± 3% 194ms ± 2% ~ (p=0.247 n=10+10) Unicode 85.6ms ±16% 88.1ms ± 1% ~ (p=0.165 n=10+10) GoTypes 673ms ± 2% 668ms ± 2% ~ (p=0.258 n=9+9) Compiler 3.14s ± 6% 3.08s ± 1% ~ (p=0.243 n=10+9) SSA 6.82s ± 1% 6.76s ± 1% -0.87% (p=0.006 n=9+10) Flate 128ms ± 7% 127ms ± 3% ~ (p=0.739 n=10+10) GoParser 154ms ± 3% 153ms ± 4% ~ (p=0.730 n=9+9) Reflect 404ms ± 1% 412ms ± 4% +1.99% (p=0.022 n=9+10) Tar 172ms ± 4% 170ms ± 4% ~ (p=0.065 n=10+9) XML 231ms ± 4% 230ms ± 3% ~ (p=0.912 n=10+10) LinkCompiler 341ms ± 1% 339ms ± 1% ~ (p=0.243 n=9+10) ExternalLinkCompiler 1.72s ± 1% 1.72s ± 1% ~ (p=0.661 n=9+10) LinkWithoutDebugCompiler 221ms ± 2% 221ms ± 2% ~ (p=0.529 n=10+10) StdCmd 18.4s ± 3% 18.2s ± 1% ~ (p=0.515 n=10+8) name old user-time/op new user-time/op delta Template 238ms ± 4% 243ms ± 6% ~ (p=0.661 n=9+10) Unicode 116ms ± 6% 113ms ± 3% -3.37% (p=0.035 n=9+10) GoTypes 854ms ± 2% 848ms ± 2% ~ (p=0.604 n=9+10) Compiler 4.10s ± 1% 4.11s ± 1% ~ (p=0.481 n=8+9) SSA 9.49s ± 1% 9.41s ± 1% -0.92% (p=0.001 n=9+10) Flate 149ms ± 6% 151ms ± 7% ~ (p=0.481 n=10+10) GoParser 189ms ± 2% 190ms ± 2% ~ (p=0.497 n=9+10) Reflect 511ms ± 2% 508ms ± 2% ~ (p=0.211 n=9+10) Tar 215ms ± 4% 212ms ± 3% ~ (p=0.105 n=10+10) XML 288ms ± 2% 288ms ± 2% ~ (p=0.971 n=10+10) LinkCompiler 559ms ± 4% 557ms ± 1% ~ (p=0.968 n=9+10) ExternalLinkCompiler 1.78s ± 1% 1.77s ± 1% ~ (p=0.055 n=8+10) LinkWithoutDebugCompiler 245ms ± 3% 245ms ± 2% ~ (p=0.684 n=10+10) name old alloc/op new alloc/op delta Template 34.8MB ± 0% 34.4MB ± 0% -0.95% (p=0.000 n=9+10) Unicode 28.6MB ± 0% 28.3MB ± 0% -0.95% (p=0.000 n=10+10) GoTypes 115MB ± 0% 114MB ± 0% -1.02% (p=0.000 n=10+9) Compiler 554MB ± 0% 549MB ± 0% -0.86% (p=0.000 n=9+10) SSA 1.28GB ± 0% 1.27GB ± 0% -0.83% (p=0.000 n=10+10) Flate 21.8MB ± 0% 21.6MB ± 0% -0.87% (p=0.000 n=8+10) GoParser 26.7MB ± 0% 26.4MB ± 0% -0.97% (p=0.000 n=10+9) Reflect 75.0MB ± 0% 74.1MB ± 0% -1.18% (p=0.000 n=10+10) Tar 32.6MB ± 0% 32.3MB ± 0% -0.94% (p=0.000 n=10+7) XML 41.5MB ± 0% 41.2MB ± 0% -0.90% (p=0.000 n=10+8) LinkCompiler 105MB ± 0% 104MB ± 0% -0.94% (p=0.000 n=10+10) ExternalLinkCompiler 153MB ± 0% 152MB ± 0% -0.69% (p=0.000 n=10+10) LinkWithoutDebugCompiler 63.7MB ± 0% 63.6MB ± 0% -0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 336k ± 0% 336k ± 0% +0.02% (p=0.002 n=10+10) Unicode 332k ± 0% 332k ± 0% ~ (p=0.447 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% +0.01% (p=0.001 n=10+10) Compiler 4.92M ± 0% 4.92M ± 0% +0.01% (p=0.000 n=10+10) SSA 11.9M ± 0% 11.9M ± 0% +0.02% (p=0.000 n=9+10) Flate 214k ± 0% 214k ± 0% +0.02% (p=0.032 n=10+8) GoParser 270k ± 0% 270k ± 0% +0.02% (p=0.004 n=10+9) Reflect 877k ± 0% 877k ± 0% +0.01% (p=0.000 n=10+10) Tar 313k ± 0% 313k ± 0% ~ (p=0.075 n=9+10) XML 387k ± 0% 387k ± 0% +0.02% (p=0.007 n=10+10) LinkCompiler 455k ± 0% 456k ± 0% +0.08% (p=0.000 n=10+9) ExternalLinkCompiler 670k ± 0% 671k ± 0% +0.06% (p=0.000 n=10+10) LinkWithoutDebugCompiler 113k ± 0% 113k ± 0% ~ (p=0.149 n=10+10) name old maxRSS/op new maxRSS/op delta Template 34.1M ± 1% 34.1M ± 1% ~ (p=0.853 n=10+10) Unicode 35.1M ± 1% 34.6M ± 1% -1.43% (p=0.000 n=10+10) GoTypes 72.8M ± 3% 73.3M ± 2% ~ (p=0.724 n=10+10) Compiler 288M ± 3% 295M ± 4% ~ (p=0.393 n=10+10) SSA 630M ± 1% 622M ± 1% -1.18% (p=0.001 n=10+10) Flate 26.0M ± 1% 26.2M ± 2% ~ (p=0.493 n=10+10) GoParser 28.6M ± 1% 28.5M ± 2% ~ (p=0.256 n=10+10) Reflect 55.5M ± 2% 55.4M ± 1% ~ (p=0.436 n=10+10) Tar 33.0M ± 1% 32.8M ± 2% ~ (p=0.075 n=10+10) XML 38.7M ± 1% 39.0M ± 1% ~ (p=0.053 n=9+10) LinkCompiler 164M ± 1% 164M ± 1% -0.27% (p=0.029 n=10+10) ExternalLinkCompiler 174M ± 0% 173M ± 0% -0.33% (p=0.002 n=9+10) LinkWithoutDebugCompiler 137M ± 0% 136M ± 2% ~ (p=0.825 n=9+10) Change-Id: I9ecf2a10024513abef8fbfbe519e44e0b29b6167 Reviewed-on: https://go-review.googlesource.com/c/go/+/242258 Trust: Martin Möhrmann <moehrmann@google.com> Trust: Michael Knyszek <mknyszek@google.com> Run-TryBot: Martin Möhrmann <martisch@uos.de> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@golang.org>
2020-07-13 18:12:20 +02:00
var class_to_size = [_NumSizeClasses]uint16{0, 8, 16, 24, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768}
var class_to_allocnpages = [_NumSizeClasses]uint8{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 3, 1, 3, 2, 3, 4, 5, 6, 1, 7, 6, 5, 4, 3, 5, 7, 2, 9, 7, 5, 8, 3, 10, 7, 4}
runtime: simplify divmagic for span calculations It's both simpler and faster to just unconditionally do two 32-bit multiplies rather than a bunch of branching to try to avoid them. This is safe thanks to the tight bounds derived in [1] and verified during mksizeclasses.go. Benchstat results below for compilebench benchmarks on my P920. See also [2] for micro benchmarks comparing the new functions against the originals (as well as several basic attempts at optimizing them). name old time/op new time/op delta Template 295ms ± 3% 290ms ± 1% -1.95% (p=0.000 n=20+20) Unicode 113ms ± 3% 110ms ± 2% -2.32% (p=0.000 n=21+17) GoTypes 1.78s ± 1% 1.76s ± 1% -1.23% (p=0.000 n=21+20) Compiler 119ms ± 2% 117ms ± 4% -1.53% (p=0.007 n=20+20) SSA 14.3s ± 1% 13.8s ± 1% -3.12% (p=0.000 n=17+20) Flate 173ms ± 2% 170ms ± 1% -1.64% (p=0.000 n=20+19) GoParser 278ms ± 2% 273ms ± 2% -1.92% (p=0.000 n=20+19) Reflect 686ms ± 3% 671ms ± 3% -2.18% (p=0.000 n=19+20) Tar 255ms ± 2% 248ms ± 2% -2.90% (p=0.000 n=20+20) XML 335ms ± 3% 327ms ± 2% -2.34% (p=0.000 n=20+20) LinkCompiler 799ms ± 1% 799ms ± 1% ~ (p=0.925 n=20+20) ExternalLinkCompiler 1.90s ± 1% 1.90s ± 0% ~ (p=0.327 n=20+20) LinkWithoutDebugCompiler 385ms ± 1% 386ms ± 1% ~ (p=0.251 n=18+20) [Geo mean] 512ms 504ms -1.61% name old user-time/op new user-time/op delta Template 286ms ± 4% 282ms ± 4% -1.42% (p=0.025 n=21+20) Unicode 104ms ± 9% 102ms ±14% ~ (p=0.294 n=21+20) GoTypes 1.75s ± 3% 1.72s ± 2% -1.36% (p=0.000 n=21+20) Compiler 109ms ±11% 108ms ± 8% ~ (p=0.187 n=21+19) SSA 14.0s ± 1% 13.5s ± 2% -3.25% (p=0.000 n=16+20) Flate 166ms ± 4% 164ms ± 4% -1.34% (p=0.032 n=19+19) GoParser 268ms ± 4% 263ms ± 4% -1.71% (p=0.011 n=18+20) Reflect 666ms ± 3% 654ms ± 4% -1.77% (p=0.002 n=18+20) Tar 245ms ± 5% 236ms ± 6% -3.34% (p=0.000 n=20+20) XML 320ms ± 4% 314ms ± 3% -2.01% (p=0.001 n=19+18) LinkCompiler 744ms ± 4% 747ms ± 3% ~ (p=0.627 n=20+19) ExternalLinkCompiler 1.71s ± 3% 1.72s ± 2% ~ (p=0.149 n=20+20) LinkWithoutDebugCompiler 345ms ± 6% 342ms ± 8% ~ (p=0.355 n=20+20) [Geo mean] 484ms 477ms -1.50% [1] Daniel Lemire, Owen Kaser, Nathan Kurz. 2019. "Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries." https://arxiv.org/abs/1902.01961 [2] https://github.com/mdempsky/benchdivmagic Change-Id: Ie4d214e7a908b0d979c878f2d404bd56bdf374f6 Reviewed-on: https://go-review.googlesource.com/c/go/+/300994 Run-TryBot: Matthew Dempsky <mdempsky@google.com> Trust: Matthew Dempsky <mdempsky@google.com> Trust: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2021-03-11 15:45:52 -08:00
var class_to_divmagic = [_NumSizeClasses]uint32{0, ^uint32(0)/8 + 1, ^uint32(0)/16 + 1, ^uint32(0)/24 + 1, ^uint32(0)/32 + 1, ^uint32(0)/48 + 1, ^uint32(0)/64 + 1, ^uint32(0)/80 + 1, ^uint32(0)/96 + 1, ^uint32(0)/112 + 1, ^uint32(0)/128 + 1, ^uint32(0)/144 + 1, ^uint32(0)/160 + 1, ^uint32(0)/176 + 1, ^uint32(0)/192 + 1, ^uint32(0)/208 + 1, ^uint32(0)/224 + 1, ^uint32(0)/240 + 1, ^uint32(0)/256 + 1, ^uint32(0)/288 + 1, ^uint32(0)/320 + 1, ^uint32(0)/352 + 1, ^uint32(0)/384 + 1, ^uint32(0)/416 + 1, ^uint32(0)/448 + 1, ^uint32(0)/480 + 1, ^uint32(0)/512 + 1, ^uint32(0)/576 + 1, ^uint32(0)/640 + 1, ^uint32(0)/704 + 1, ^uint32(0)/768 + 1, ^uint32(0)/896 + 1, ^uint32(0)/1024 + 1, ^uint32(0)/1152 + 1, ^uint32(0)/1280 + 1, ^uint32(0)/1408 + 1, ^uint32(0)/1536 + 1, ^uint32(0)/1792 + 1, ^uint32(0)/2048 + 1, ^uint32(0)/2304 + 1, ^uint32(0)/2688 + 1, ^uint32(0)/3072 + 1, ^uint32(0)/3200 + 1, ^uint32(0)/3456 + 1, ^uint32(0)/4096 + 1, ^uint32(0)/4864 + 1, ^uint32(0)/5376 + 1, ^uint32(0)/6144 + 1, ^uint32(0)/6528 + 1, ^uint32(0)/6784 + 1, ^uint32(0)/6912 + 1, ^uint32(0)/8192 + 1, ^uint32(0)/9472 + 1, ^uint32(0)/9728 + 1, ^uint32(0)/10240 + 1, ^uint32(0)/10880 + 1, ^uint32(0)/12288 + 1, ^uint32(0)/13568 + 1, ^uint32(0)/14336 + 1, ^uint32(0)/16384 + 1, ^uint32(0)/18432 + 1, ^uint32(0)/19072 + 1, ^uint32(0)/20480 + 1, ^uint32(0)/21760 + 1, ^uint32(0)/24576 + 1, ^uint32(0)/27264 + 1, ^uint32(0)/28672 + 1, ^uint32(0)/32768 + 1}
runtime: add 24 byte allocation size class This CL introduces a 24 byte allocation size class which fits 3 pointers on 64 bit and 6 pointers on 32 bit architectures. Notably this new size class fits a slice header on 64 bit architectures exactly while previously a 32 byte size class would have been used for allocating a slice header on the heap. The main complexity added with this CL is that heapBitsSetType needs to handle objects that aren't 16-byte aligned but contain more than a single pointer on 64-bit architectures. Due to having a non 16 byte aligned size class on 32 bit a h.shift of 2 is now possible which means a heap bitmap byte might only be partially written. Due to this already having been possible on 64 bit before the heap bitmap code only needed minor adjustments for 32 bit doublecheck code paths. Note that this CL changes the slice capacity allocated by append for slice growth to a target capacity of 17 to 24 bytes. On 64 bit architectures the capacity of the slice returned by append([]byte{}, make([]byte, 24)...)) is 32 bytes before and 24 bytes after this CL. Depending on allocation patterns of the specific Go program this can increase the number of total alloctions as subsequent appends to the slice can trigger slice growth earlier than before. On the other side if the slice is never appended to again above its capacity this will lower heap usage by 8 bytes. This CL changes the set of size classes reported in the runtime.MemStats.BySize array due to it being limited to a total of 61 size classes. The new 24 byte size class is now included and the 20480 byte size class is not included anymore. Fixes #8885 name old time/op new time/op delta Template 196ms ± 3% 194ms ± 2% ~ (p=0.247 n=10+10) Unicode 85.6ms ±16% 88.1ms ± 1% ~ (p=0.165 n=10+10) GoTypes 673ms ± 2% 668ms ± 2% ~ (p=0.258 n=9+9) Compiler 3.14s ± 6% 3.08s ± 1% ~ (p=0.243 n=10+9) SSA 6.82s ± 1% 6.76s ± 1% -0.87% (p=0.006 n=9+10) Flate 128ms ± 7% 127ms ± 3% ~ (p=0.739 n=10+10) GoParser 154ms ± 3% 153ms ± 4% ~ (p=0.730 n=9+9) Reflect 404ms ± 1% 412ms ± 4% +1.99% (p=0.022 n=9+10) Tar 172ms ± 4% 170ms ± 4% ~ (p=0.065 n=10+9) XML 231ms ± 4% 230ms ± 3% ~ (p=0.912 n=10+10) LinkCompiler 341ms ± 1% 339ms ± 1% ~ (p=0.243 n=9+10) ExternalLinkCompiler 1.72s ± 1% 1.72s ± 1% ~ (p=0.661 n=9+10) LinkWithoutDebugCompiler 221ms ± 2% 221ms ± 2% ~ (p=0.529 n=10+10) StdCmd 18.4s ± 3% 18.2s ± 1% ~ (p=0.515 n=10+8) name old user-time/op new user-time/op delta Template 238ms ± 4% 243ms ± 6% ~ (p=0.661 n=9+10) Unicode 116ms ± 6% 113ms ± 3% -3.37% (p=0.035 n=9+10) GoTypes 854ms ± 2% 848ms ± 2% ~ (p=0.604 n=9+10) Compiler 4.10s ± 1% 4.11s ± 1% ~ (p=0.481 n=8+9) SSA 9.49s ± 1% 9.41s ± 1% -0.92% (p=0.001 n=9+10) Flate 149ms ± 6% 151ms ± 7% ~ (p=0.481 n=10+10) GoParser 189ms ± 2% 190ms ± 2% ~ (p=0.497 n=9+10) Reflect 511ms ± 2% 508ms ± 2% ~ (p=0.211 n=9+10) Tar 215ms ± 4% 212ms ± 3% ~ (p=0.105 n=10+10) XML 288ms ± 2% 288ms ± 2% ~ (p=0.971 n=10+10) LinkCompiler 559ms ± 4% 557ms ± 1% ~ (p=0.968 n=9+10) ExternalLinkCompiler 1.78s ± 1% 1.77s ± 1% ~ (p=0.055 n=8+10) LinkWithoutDebugCompiler 245ms ± 3% 245ms ± 2% ~ (p=0.684 n=10+10) name old alloc/op new alloc/op delta Template 34.8MB ± 0% 34.4MB ± 0% -0.95% (p=0.000 n=9+10) Unicode 28.6MB ± 0% 28.3MB ± 0% -0.95% (p=0.000 n=10+10) GoTypes 115MB ± 0% 114MB ± 0% -1.02% (p=0.000 n=10+9) Compiler 554MB ± 0% 549MB ± 0% -0.86% (p=0.000 n=9+10) SSA 1.28GB ± 0% 1.27GB ± 0% -0.83% (p=0.000 n=10+10) Flate 21.8MB ± 0% 21.6MB ± 0% -0.87% (p=0.000 n=8+10) GoParser 26.7MB ± 0% 26.4MB ± 0% -0.97% (p=0.000 n=10+9) Reflect 75.0MB ± 0% 74.1MB ± 0% -1.18% (p=0.000 n=10+10) Tar 32.6MB ± 0% 32.3MB ± 0% -0.94% (p=0.000 n=10+7) XML 41.5MB ± 0% 41.2MB ± 0% -0.90% (p=0.000 n=10+8) LinkCompiler 105MB ± 0% 104MB ± 0% -0.94% (p=0.000 n=10+10) ExternalLinkCompiler 153MB ± 0% 152MB ± 0% -0.69% (p=0.000 n=10+10) LinkWithoutDebugCompiler 63.7MB ± 0% 63.6MB ± 0% -0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 336k ± 0% 336k ± 0% +0.02% (p=0.002 n=10+10) Unicode 332k ± 0% 332k ± 0% ~ (p=0.447 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% +0.01% (p=0.001 n=10+10) Compiler 4.92M ± 0% 4.92M ± 0% +0.01% (p=0.000 n=10+10) SSA 11.9M ± 0% 11.9M ± 0% +0.02% (p=0.000 n=9+10) Flate 214k ± 0% 214k ± 0% +0.02% (p=0.032 n=10+8) GoParser 270k ± 0% 270k ± 0% +0.02% (p=0.004 n=10+9) Reflect 877k ± 0% 877k ± 0% +0.01% (p=0.000 n=10+10) Tar 313k ± 0% 313k ± 0% ~ (p=0.075 n=9+10) XML 387k ± 0% 387k ± 0% +0.02% (p=0.007 n=10+10) LinkCompiler 455k ± 0% 456k ± 0% +0.08% (p=0.000 n=10+9) ExternalLinkCompiler 670k ± 0% 671k ± 0% +0.06% (p=0.000 n=10+10) LinkWithoutDebugCompiler 113k ± 0% 113k ± 0% ~ (p=0.149 n=10+10) name old maxRSS/op new maxRSS/op delta Template 34.1M ± 1% 34.1M ± 1% ~ (p=0.853 n=10+10) Unicode 35.1M ± 1% 34.6M ± 1% -1.43% (p=0.000 n=10+10) GoTypes 72.8M ± 3% 73.3M ± 2% ~ (p=0.724 n=10+10) Compiler 288M ± 3% 295M ± 4% ~ (p=0.393 n=10+10) SSA 630M ± 1% 622M ± 1% -1.18% (p=0.001 n=10+10) Flate 26.0M ± 1% 26.2M ± 2% ~ (p=0.493 n=10+10) GoParser 28.6M ± 1% 28.5M ± 2% ~ (p=0.256 n=10+10) Reflect 55.5M ± 2% 55.4M ± 1% ~ (p=0.436 n=10+10) Tar 33.0M ± 1% 32.8M ± 2% ~ (p=0.075 n=10+10) XML 38.7M ± 1% 39.0M ± 1% ~ (p=0.053 n=9+10) LinkCompiler 164M ± 1% 164M ± 1% -0.27% (p=0.029 n=10+10) ExternalLinkCompiler 174M ± 0% 173M ± 0% -0.33% (p=0.002 n=9+10) LinkWithoutDebugCompiler 137M ± 0% 136M ± 2% ~ (p=0.825 n=9+10) Change-Id: I9ecf2a10024513abef8fbfbe519e44e0b29b6167 Reviewed-on: https://go-review.googlesource.com/c/go/+/242258 Trust: Martin Möhrmann <moehrmann@google.com> Trust: Michael Knyszek <mknyszek@google.com> Run-TryBot: Martin Möhrmann <martisch@uos.de> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@golang.org>
2020-07-13 18:12:20 +02:00
var size_to_class8 = [smallSizeMax/smallSizeDiv + 1]uint8{0, 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}
var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8{32, 33, 34, 35, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 41, 41, 41, 42, 43, 43, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67}