go/src/cmd/compile/internal/gc/builtin/runtime.go

198 lines
7 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// NOTE: If you change this file you must run "go generate"
// to update builtin.go. This is not done automatically
// to avoid depending on having a working compiler binary.
// +build ignore
package runtime
// emitted by compiler, not referred to by go programs
import "unsafe"
func newobject(typ *byte) *any
func panicindex()
func panicslice()
func panicdivide()
func throwinit()
cmd/compile: make panicwrap argument-free When code defines a method on T, the compiler generates a corresponding wrapper method on *T. The first thing the wrapper does is check whether the pointer is nil and if so, call panicwrap. This is done to provide a useful error message. The existing implementation gets its information from arguments set up by the compiler. However, with some trouble, this information can be extracted from the name of the wrapper method itself. Removing the arguments to panicwrap simplifies and shrinks the wrapper method. It also means that the call to panicwrap does not require any stack space. This enables a further optimization on amd64/x86, which is to skip the function prologue if nothing else in the method requires stack space. This is frequently the case in simple, hot methods, such as Less and Swap in sort.Interface implementations. Fixes #19040. Benchmarks for package sort on amd64: name old time/op new time/op delta SearchWrappers-8 104ns ± 1% 104ns ± 1% ~ (p=0.286 n=27+27) SortString1K-8 128µs ± 1% 128µs ± 1% -0.44% (p=0.004 n=30+30) SortString1K_Slice-8 118µs ± 2% 117µs ± 1% ~ (p=0.106 n=30+30) StableString1K-8 18.6µs ± 1% 18.6µs ± 1% ~ (p=0.446 n=28+26) SortInt1K-8 65.9µs ± 1% 60.7µs ± 1% -7.96% (p=0.000 n=28+30) StableInt1K-8 75.3µs ± 2% 72.8µs ± 1% -3.41% (p=0.000 n=30+30) StableInt1K_Slice-8 57.7µs ± 1% 57.7µs ± 1% ~ (p=0.515 n=30+30) SortInt64K-8 6.28ms ± 1% 6.01ms ± 1% -4.19% (p=0.000 n=28+28) SortInt64K_Slice-8 5.04ms ± 1% 5.04ms ± 1% ~ (p=0.927 n=28+27) StableInt64K-8 6.65ms ± 1% 6.38ms ± 1% -3.97% (p=0.000 n=26+30) Sort1e2-8 37.9µs ± 1% 37.2µs ± 1% -1.89% (p=0.000 n=29+27) Stable1e2-8 77.0µs ± 1% 74.7µs ± 1% -3.06% (p=0.000 n=27+30) Sort1e4-8 8.21ms ± 2% 7.98ms ± 1% -2.77% (p=0.000 n=29+30) Stable1e4-8 24.8ms ± 1% 24.3ms ± 1% -2.31% (p=0.000 n=28+30) Sort1e6-8 1.27s ± 4% 1.22s ± 1% -3.42% (p=0.000 n=30+29) Stable1e6-8 5.06s ± 1% 4.92s ± 1% -2.77% (p=0.000 n=25+29) [Geo mean] 731µs 714µs -2.29% Before/after assembly for sort.(*intPairs).Less follows. It can be optimized further, but that's for a follow-up CL. Before: "".(*intPairs).Less t=1 size=214 args=0x20 locals=0x38 0x0000 00000 (<autogenerated>:1) TEXT "".(*intPairs).Less(SB), $56-32 0x0000 00000 (<autogenerated>:1) MOVQ (TLS), CX 0x0009 00009 (<autogenerated>:1) CMPQ SP, 16(CX) 0x000d 00013 (<autogenerated>:1) JLS 204 0x0013 00019 (<autogenerated>:1) SUBQ $56, SP 0x0017 00023 (<autogenerated>:1) MOVQ BP, 48(SP) 0x001c 00028 (<autogenerated>:1) LEAQ 48(SP), BP 0x0021 00033 (<autogenerated>:1) MOVQ 32(CX), BX 0x0025 00037 (<autogenerated>:1) TESTQ BX, BX 0x0028 00040 (<autogenerated>:1) JEQ 55 0x002a 00042 (<autogenerated>:1) LEAQ 64(SP), DI 0x002f 00047 (<autogenerated>:1) CMPQ (BX), DI 0x0032 00050 (<autogenerated>:1) JNE 55 0x0034 00052 (<autogenerated>:1) MOVQ SP, (BX) 0x0037 00055 (<autogenerated>:1) NOP 0x0037 00055 (<autogenerated>:1) FUNCDATA $0, gclocals·4032f753396f2012ad1784f398b170f4(SB) 0x0037 00055 (<autogenerated>:1) FUNCDATA $1, gclocals·69c1753bd5f81501d95132d08af04464(SB) 0x0037 00055 (<autogenerated>:1) MOVQ ""..this+64(FP), AX 0x003c 00060 (<autogenerated>:1) TESTQ AX, AX 0x003f 00063 (<autogenerated>:1) JEQ $0, 135 0x0041 00065 (<autogenerated>:1) MOVQ (AX), CX 0x0044 00068 (<autogenerated>:1) MOVQ 8(AX), AX 0x0048 00072 (<autogenerated>:1) MOVQ "".i+72(FP), DX 0x004d 00077 (<autogenerated>:1) CMPQ DX, AX 0x0050 00080 (<autogenerated>:1) JCC $0, 128 0x0052 00082 (<autogenerated>:1) SHLQ $4, DX 0x0056 00086 (<autogenerated>:1) MOVQ (CX)(DX*1), DX 0x005a 00090 (<autogenerated>:1) MOVQ "".j+80(FP), BX 0x005f 00095 (<autogenerated>:1) CMPQ BX, AX 0x0062 00098 (<autogenerated>:1) JCC $0, 128 0x0064 00100 (<autogenerated>:1) SHLQ $4, BX 0x0068 00104 (<autogenerated>:1) MOVQ (CX)(BX*1), AX 0x006c 00108 (<autogenerated>:1) CMPQ DX, AX 0x006f 00111 (<autogenerated>:1) SETLT AL 0x0072 00114 (<autogenerated>:1) MOVB AL, "".~r2+88(FP) 0x0076 00118 (<autogenerated>:1) MOVQ 48(SP), BP 0x007b 00123 (<autogenerated>:1) ADDQ $56, SP 0x007f 00127 (<autogenerated>:1) RET 0x0080 00128 (<autogenerated>:1) PCDATA $0, $1 0x0080 00128 (<autogenerated>:1) CALL runtime.panicindex(SB) 0x0085 00133 (<autogenerated>:1) UNDEF 0x0087 00135 (<autogenerated>:1) LEAQ go.string."sort_test"(SB), AX 0x008e 00142 (<autogenerated>:1) MOVQ AX, (SP) 0x0092 00146 (<autogenerated>:1) MOVQ $9, 8(SP) 0x009b 00155 (<autogenerated>:1) LEAQ go.string."intPairs"(SB), AX 0x00a2 00162 (<autogenerated>:1) MOVQ AX, 16(SP) 0x00a7 00167 (<autogenerated>:1) MOVQ $8, 24(SP) 0x00b0 00176 (<autogenerated>:1) LEAQ go.string."Less"(SB), AX 0x00b7 00183 (<autogenerated>:1) MOVQ AX, 32(SP) 0x00bc 00188 (<autogenerated>:1) MOVQ $4, 40(SP) 0x00c5 00197 (<autogenerated>:1) PCDATA $0, $1 0x00c5 00197 (<autogenerated>:1) CALL runtime.panicwrap(SB) 0x00ca 00202 (<autogenerated>:1) UNDEF 0x00cc 00204 (<autogenerated>:1) NOP 0x00cc 00204 (<autogenerated>:1) PCDATA $0, $-1 0x00cc 00204 (<autogenerated>:1) CALL runtime.morestack_noctxt(SB) 0x00d1 00209 (<autogenerated>:1) JMP 0 After: "".(*intPairs).Swap t=1 size=147 args=0x18 locals=0x8 0x0000 00000 (<autogenerated>:1) TEXT "".(*intPairs).Swap(SB), $8-24 0x0000 00000 (<autogenerated>:1) MOVQ (TLS), CX 0x0009 00009 (<autogenerated>:1) SUBQ $8, SP 0x000d 00013 (<autogenerated>:1) MOVQ BP, (SP) 0x0011 00017 (<autogenerated>:1) LEAQ (SP), BP 0x0015 00021 (<autogenerated>:1) MOVQ 32(CX), BX 0x0019 00025 (<autogenerated>:1) TESTQ BX, BX 0x001c 00028 (<autogenerated>:1) JEQ 43 0x001e 00030 (<autogenerated>:1) LEAQ 16(SP), DI 0x0023 00035 (<autogenerated>:1) CMPQ (BX), DI 0x0026 00038 (<autogenerated>:1) JNE 43 0x0028 00040 (<autogenerated>:1) MOVQ SP, (BX) 0x002b 00043 (<autogenerated>:1) NOP 0x002b 00043 (<autogenerated>:1) FUNCDATA $0, gclocals·e6397a44f8e1b6e77d0f200b4fba5269(SB) 0x002b 00043 (<autogenerated>:1) FUNCDATA $1, gclocals·69c1753bd5f81501d95132d08af04464(SB) 0x002b 00043 (<autogenerated>:1) MOVQ ""..this+16(FP), AX 0x0030 00048 (<autogenerated>:1) TESTQ AX, AX 0x0033 00051 (<autogenerated>:1) JEQ $0, 140 0x0035 00053 (<autogenerated>:1) MOVQ (AX), CX 0x0038 00056 (<autogenerated>:1) MOVQ 8(AX), AX 0x003c 00060 (<autogenerated>:1) MOVQ "".i+24(FP), DX 0x0041 00065 (<autogenerated>:1) CMPQ DX, AX 0x0044 00068 (<autogenerated>:1) JCC $0, 133 0x0046 00070 (<autogenerated>:1) SHLQ $4, DX 0x004a 00074 (<autogenerated>:1) MOVQ 8(CX)(DX*1), BX 0x004f 00079 (<autogenerated>:1) MOVQ (CX)(DX*1), SI 0x0053 00083 (<autogenerated>:1) MOVQ "".j+32(FP), DI 0x0058 00088 (<autogenerated>:1) CMPQ DI, AX 0x005b 00091 (<autogenerated>:1) JCC $0, 133 0x005d 00093 (<autogenerated>:1) SHLQ $4, DI 0x0061 00097 (<autogenerated>:1) MOVQ 8(CX)(DI*1), AX 0x0066 00102 (<autogenerated>:1) MOVQ (CX)(DI*1), R8 0x006a 00106 (<autogenerated>:1) MOVQ R8, (CX)(DX*1) 0x006e 00110 (<autogenerated>:1) MOVQ AX, 8(CX)(DX*1) 0x0073 00115 (<autogenerated>:1) MOVQ SI, (CX)(DI*1) 0x0077 00119 (<autogenerated>:1) MOVQ BX, 8(CX)(DI*1) 0x007c 00124 (<autogenerated>:1) MOVQ (SP), BP 0x0080 00128 (<autogenerated>:1) ADDQ $8, SP 0x0084 00132 (<autogenerated>:1) RET 0x0085 00133 (<autogenerated>:1) PCDATA $0, $1 0x0085 00133 (<autogenerated>:1) CALL runtime.panicindex(SB) 0x008a 00138 (<autogenerated>:1) UNDEF 0x008c 00140 (<autogenerated>:1) PCDATA $0, $1 0x008c 00140 (<autogenerated>:1) CALL runtime.panicwrap(SB) 0x0091 00145 (<autogenerated>:1) UNDEF Change-Id: I15bb8435f0690badb868799f313ed8817335efd3 Reviewed-on: https://go-review.googlesource.com/36809 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2017-02-10 21:49:07 -08:00
func panicwrap()
func gopanic(interface{})
func gorecover(*int32) interface{}
func goschedguarded()
func printbool(bool)
func printfloat(float64)
func printint(int64)
func printhex(uint64)
func printuint(uint64)
func printcomplex(complex128)
func printstring(string)
func printpointer(any)
func printiface(any)
func printeface(any)
func printslice(any)
func printnl()
func printsp()
func printlock()
func printunlock()
func concatstring2(*[32]byte, string, string) string
func concatstring3(*[32]byte, string, string, string) string
func concatstring4(*[32]byte, string, string, string, string) string
func concatstring5(*[32]byte, string, string, string, string, string) string
func concatstrings(*[32]byte, []string) string
func cmpstring(string, string) int
func intstring(*[4]byte, int64) string
func slicebytetostring(*[32]byte, []byte) string
func slicebytetostringtmp([]byte) string
func slicerunetostring(*[32]byte, []rune) string
func stringtoslicebyte(*[32]byte, string) []byte
func stringtoslicerune(*[32]rune, string) []rune
func decoderune(string, int) (retv rune, retk int)
func slicecopy(to any, fr any, wid uintptr) int
func slicestringcopy(to any, fr any) int
// interface conversions
func convI2I(typ *byte, elem any) (ret any)
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
func convT2E(typ *byte, elem *any) (ret any)
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
func convT2E16(typ *byte, elem *any) (ret any)
func convT2E32(typ *byte, elem *any) (ret any)
func convT2E64(typ *byte, elem *any) (ret any)
func convT2Estring(typ *byte, elem *any) (ret any)
func convT2Eslice(typ *byte, elem *any) (ret any)
func convT2Enoptr(typ *byte, elem *any) (ret any)
func convT2I(tab *byte, elem *any) (ret any)
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals Prior to this CL, all runtime conversions from a concrete value to an interface went through one of two runtime calls: convT2E or convT2I. However, in practice, basic types are very common. Specializing convT2x for those basic types allows for a more efficient implementation for those types. For basic scalars and strings, allocation and copying can use the same methods as normal code. For pointer-free types, allocation can occur without zeroing, and copying can take place without GC calls. For slices, copying is cheaper and simpler. This CL adds twelve runtime routines: convT2E16, convT2I16 convT2E32, convT2I32 convT2E64, convT2I64 convT2Estring, convT2Istring convT2Eslice, convT2Islice convT2Enoptr, convT2Inoptr While compiling make.bash, 93% of all convT2x calls are now to one of these specialized convT2x call. Within specialized convT2x routines, it is cheap to check for a zero value, in a way that it is not in general. When we detect a zero value there, we return a pointer to zeroVal, rather than allocating. name old time/op new time/op delta ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56) ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60) ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57) ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60) ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59) ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57) ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60) ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57) ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58) ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55) ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52) ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59) Benchmarks on a real program (the compiler): name old time/op new time/op delta Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26) Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26) GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30) Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27) Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28) GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30) Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30) Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30) XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29) name old user-ns/op new user-ns/op delta Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28) Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27) GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30) Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28) Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29) GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30) Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30) Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30) XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29) Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30) GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30) Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29) Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30) GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28) Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30) Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28) XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30) name old allocs/op new allocs/op delta Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30) Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30) GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30) Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29) Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30) GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28) Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30) Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29) XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30) Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f Reviewed-on: https://go-review.googlesource.com/36476 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-02-03 22:40:56 -08:00
func convT2I16(tab *byte, elem *any) (ret any)
func convT2I32(tab *byte, elem *any) (ret any)
func convT2I64(tab *byte, elem *any) (ret any)
func convT2Istring(tab *byte, elem *any) (ret any)
func convT2Islice(tab *byte, elem *any) (ret any)
func convT2Inoptr(tab *byte, elem *any) (ret any)
// interface type assertions x.(T)
func assertE2I(typ *byte, iface any) (ret any)
func assertE2I2(typ *byte, iface any) (ret any, b bool)
func assertI2I(typ *byte, iface any) (ret any)
func assertI2I2(typ *byte, iface any) (ret any, b bool)
func panicdottypeE(have, want, iface *byte)
func panicdottypeI(have, want, iface *byte)
func panicnildottype(want *byte)
// interface equality. Type/itab pointers are already known to be equal, so
// we only need to pass one.
func ifaceeq(tab *uintptr, x, y unsafe.Pointer) (ret bool)
func efaceeq(typ *uintptr, x, y unsafe.Pointer) (ret bool)
func fastrand() uint32
// *byte is really *runtime.Type
func makemap64(mapType *byte, hint int64, mapbuf *any) (hmap map[any]any)
func makemap(mapType *byte, hint int, mapbuf *any) (hmap map[any]any)
func makemap_small() (hmap map[any]any)
func mapaccess1(mapType *byte, hmap map[any]any, key *any) (val *any)
func mapaccess1_fast32(mapType *byte, hmap map[any]any, key any) (val *any)
func mapaccess1_fast64(mapType *byte, hmap map[any]any, key any) (val *any)
func mapaccess1_faststr(mapType *byte, hmap map[any]any, key any) (val *any)
func mapaccess1_fat(mapType *byte, hmap map[any]any, key *any, zero *byte) (val *any)
func mapaccess2(mapType *byte, hmap map[any]any, key *any) (val *any, pres bool)
func mapaccess2_fast32(mapType *byte, hmap map[any]any, key any) (val *any, pres bool)
func mapaccess2_fast64(mapType *byte, hmap map[any]any, key any) (val *any, pres bool)
func mapaccess2_faststr(mapType *byte, hmap map[any]any, key any) (val *any, pres bool)
func mapaccess2_fat(mapType *byte, hmap map[any]any, key *any, zero *byte) (val *any, pres bool)
func mapassign(mapType *byte, hmap map[any]any, key *any) (val *any)
runtime: add mapassign_fast* Add benchmarks for map assignment with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapAssignInt32_255-8 24.7ns ± 3% 17.4ns ± 2% -29.75% (p=0.000 n=10+10) MapAssignInt32_64k-8 45.5ns ± 4% 37.6ns ± 4% -17.18% (p=0.000 n=10+10) MapAssignInt64_255-8 26.0ns ± 3% 17.9ns ± 4% -31.03% (p=0.000 n=10+10) MapAssignInt64_64k-8 46.9ns ± 5% 38.7ns ± 2% -17.53% (p=0.000 n=9+10) MapAssignStr_255-8 47.8ns ± 3% 24.8ns ± 4% -48.01% (p=0.000 n=10+10) MapAssignStr_64k-8 83.0ns ± 3% 51.9ns ± 3% -37.45% (p=0.000 n=10+9) name old time/op new time/op delta BinaryTree17-8 3.11s ±19% 2.78s ± 3% ~ (p=0.095 n=5+5) Fannkuch11-8 3.26s ± 1% 3.21s ± 2% ~ (p=0.056 n=5+5) FmtFprintfEmpty-8 50.3ns ± 1% 50.8ns ± 2% ~ (p=0.246 n=5+5) FmtFprintfString-8 82.7ns ± 4% 80.1ns ± 5% ~ (p=0.238 n=5+5) FmtFprintfInt-8 82.6ns ± 2% 81.9ns ± 3% ~ (p=0.508 n=5+5) FmtFprintfIntInt-8 124ns ± 4% 121ns ± 3% ~ (p=0.111 n=5+5) FmtFprintfPrefixedInt-8 158ns ± 6% 160ns ± 2% ~ (p=0.341 n=5+5) FmtFprintfFloat-8 249ns ± 2% 245ns ± 2% ~ (p=0.095 n=5+5) FmtManyArgs-8 513ns ± 2% 519ns ± 3% ~ (p=0.151 n=5+5) GobDecode-8 7.48ms ±12% 7.11ms ± 2% ~ (p=0.222 n=5+5) GobEncode-8 6.25ms ± 1% 6.03ms ± 2% -3.56% (p=0.008 n=5+5) Gzip-8 252ms ± 4% 252ms ± 4% ~ (p=1.000 n=5+5) Gunzip-8 38.4ms ± 3% 38.6ms ± 2% ~ (p=0.690 n=5+5) HTTPClientServer-8 76.9µs ±41% 66.4µs ± 6% ~ (p=0.310 n=5+5) JSONEncode-8 16.5ms ± 3% 16.7ms ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 54.6ms ± 1% 54.3ms ± 2% ~ (p=0.548 n=5+5) Mandelbrot200-8 4.45ms ± 3% 4.47ms ± 1% ~ (p=0.841 n=5+5) GoParse-8 3.43ms ± 1% 3.32ms ± 2% -3.28% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 88.2ns ± 3% 89.4ns ± 2% ~ (p=0.333 n=5+5) RegexpMatchEasy0_1K-8 205ns ± 1% 206ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchEasy1_32-8 85.1ns ± 1% 85.5ns ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 365ns ± 1% 371ns ± 9% ~ (p=1.000 n=5+5) RegexpMatchMedium_32-8 129ns ± 2% 128ns ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 39.8µs ± 0% 39.7µs ± 4% ~ (p=0.730 n=4+5) RegexpMatchHard_32-8 1.99µs ± 3% 2.05µs ±16% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 59.3µs ± 1% 60.3µs ± 7% ~ (p=1.000 n=5+5) Revcomp-8 1.36s ±63% 0.52s ± 5% ~ (p=0.095 n=5+5) Template-8 62.6ms ±14% 60.5ms ± 5% ~ (p=0.690 n=5+5) TimeParse-8 330ns ± 2% 324ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 350ns ± 3% 340ns ± 1% -2.86% (p=0.008 n=5+5) name old speed new speed delta GobDecode-8 103MB/s ±11% 108MB/s ± 2% ~ (p=0.222 n=5+5) GobEncode-8 123MB/s ± 1% 127MB/s ± 2% +3.71% (p=0.008 n=5+5) Gzip-8 77.1MB/s ± 4% 76.9MB/s ± 3% ~ (p=1.000 n=5+5) Gunzip-8 505MB/s ± 3% 503MB/s ± 2% ~ (p=0.690 n=5+5) JSONEncode-8 118MB/s ± 3% 116MB/s ± 3% ~ (p=0.421 n=5+5) JSONDecode-8 35.5MB/s ± 1% 35.8MB/s ± 2% ~ (p=0.397 n=5+5) GoParse-8 16.9MB/s ± 1% 17.4MB/s ± 2% +3.45% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 363MB/s ± 3% 358MB/s ± 2% ~ (p=0.421 n=5+5) RegexpMatchEasy0_1K-8 4.98GB/s ± 1% 4.97GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 376MB/s ± 1% 375MB/s ± 5% ~ (p=0.690 n=5+5) RegexpMatchEasy1_1K-8 2.80GB/s ± 1% 2.76GB/s ± 9% ~ (p=0.841 n=5+5) RegexpMatchMedium_32-8 7.73MB/s ± 1% 7.76MB/s ± 3% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 25.8MB/s ± 0% 25.8MB/s ± 4% ~ (p=0.651 n=4+5) RegexpMatchHard_32-8 16.1MB/s ± 3% 15.7MB/s ±14% ~ (p=0.794 n=5+5) RegexpMatchHard_1K-8 17.3MB/s ± 1% 17.0MB/s ± 7% ~ (p=0.984 n=5+5) Revcomp-8 273MB/s ±83% 488MB/s ± 5% ~ (p=0.095 n=5+5) Template-8 31.1MB/s ±13% 32.1MB/s ± 5% ~ (p=0.690 n=5+5) Updates #19495 Change-Id: I116e9a2a4594769318b22d736464de8a98499909 Reviewed-on: https://go-review.googlesource.com/38091 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-12 14:47:59 -07:00
func mapassign_fast32(mapType *byte, hmap map[any]any, key any) (val *any)
func mapassign_fast64(mapType *byte, hmap map[any]any, key any) (val *any)
func mapassign_faststr(mapType *byte, hmap map[any]any, key any) (val *any)
func mapiterinit(mapType *byte, hmap map[any]any, hiter *any)
func mapdelete(mapType *byte, hmap map[any]any, key *any)
runtime: add mapdelete_fast* Add benchmarks for map delete with int32/int64/string key Benchmark results on darwin/amd64 name old time/op new time/op delta MapDelete/Int32/1-8 151ns ± 8% 99ns ± 3% -34.39% (p=0.008 n=5+5) MapDelete/Int32/2-8 128ns ± 2% 111ns ±15% -13.40% (p=0.040 n=5+5) MapDelete/Int32/4-8 128ns ± 5% 114ns ± 2% -10.82% (p=0.008 n=5+5) MapDelete/Int64/1-8 144ns ± 0% 104ns ± 3% -27.53% (p=0.016 n=4+5) MapDelete/Int64/2-8 153ns ± 1% 126ns ± 3% -17.17% (p=0.008 n=5+5) MapDelete/Int64/4-8 178ns ± 3% 136ns ± 2% -23.60% (p=0.008 n=5+5) MapDelete/Str/1-8 187ns ± 3% 171ns ± 3% -8.54% (p=0.008 n=5+5) MapDelete/Str/2-8 221ns ± 3% 206ns ± 4% -7.18% (p=0.016 n=5+4) MapDelete/Str/4-8 256ns ± 5% 232ns ± 2% -9.36% (p=0.016 n=4+5) name old time/op new time/op delta BinaryTree17-8 2.78s ± 7% 2.70s ± 1% ~ (p=0.151 n=5+5) Fannkuch11-8 3.21s ± 2% 3.19s ± 1% ~ (p=0.310 n=5+5) FmtFprintfEmpty-8 49.1ns ± 3% 50.2ns ± 2% ~ (p=0.095 n=5+5) FmtFprintfString-8 78.6ns ± 4% 80.2ns ± 5% ~ (p=0.460 n=5+5) FmtFprintfInt-8 79.7ns ± 1% 81.0ns ± 3% ~ (p=0.103 n=5+5) FmtFprintfIntInt-8 117ns ± 2% 119ns ± 0% ~ (p=0.079 n=5+4) FmtFprintfPrefixedInt-8 153ns ± 1% 146ns ± 3% -4.19% (p=0.024 n=5+5) FmtFprintfFloat-8 239ns ± 1% 237ns ± 1% ~ (p=0.246 n=5+5) FmtManyArgs-8 506ns ± 2% 509ns ± 2% ~ (p=0.238 n=5+5) GobDecode-8 7.06ms ± 4% 6.86ms ± 1% ~ (p=0.222 n=5+5) GobEncode-8 6.01ms ± 5% 5.87ms ± 2% ~ (p=0.222 n=5+5) Gzip-8 246ms ± 4% 236ms ± 1% -4.12% (p=0.008 n=5+5) Gunzip-8 37.7ms ± 4% 37.3ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-8 64.9µs ± 1% 64.4µs ± 0% -0.80% (p=0.032 n=5+4) JSONEncode-8 16.0ms ± 2% 16.2ms ±11% ~ (p=0.548 n=5+5) JSONDecode-8 53.2ms ± 2% 53.1ms ± 4% ~ (p=1.000 n=5+5) Mandelbrot200-8 4.33ms ± 2% 4.32ms ± 2% ~ (p=0.841 n=5+5) GoParse-8 3.24ms ± 2% 3.27ms ± 4% ~ (p=0.690 n=5+5) RegexpMatchEasy0_32-8 86.2ns ± 1% 85.2ns ± 3% ~ (p=0.286 n=5+5) RegexpMatchEasy0_1K-8 198ns ± 2% 199ns ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_32-8 82.6ns ± 2% 81.8ns ± 1% ~ (p=0.294 n=5+5) RegexpMatchEasy1_1K-8 359ns ± 2% 354ns ± 1% -1.39% (p=0.048 n=5+5) RegexpMatchMedium_32-8 123ns ± 2% 123ns ± 1% ~ (p=0.905 n=5+5) RegexpMatchMedium_1K-8 38.2µs ± 2% 38.6µs ± 8% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 1.92µs ± 2% 1.91µs ± 5% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-8 57.6µs ± 1% 57.0µs ± 2% ~ (p=0.310 n=5+5) Revcomp-8 483ms ± 7% 441ms ± 1% -8.79% (p=0.016 n=5+4) Template-8 58.0ms ± 1% 58.2ms ± 7% ~ (p=0.310 n=5+5) TimeParse-8 324ns ± 6% 312ns ± 2% ~ (p=0.087 n=5+5) TimeFormat-8 330ns ± 1% 329ns ± 1% ~ (p=0.968 n=5+5) name old speed new speed delta GobDecode-8 109MB/s ± 4% 112MB/s ± 1% ~ (p=0.222 n=5+5) GobEncode-8 128MB/s ± 5% 131MB/s ± 2% ~ (p=0.222 n=5+5) Gzip-8 78.9MB/s ± 4% 82.3MB/s ± 1% +4.25% (p=0.008 n=5+5) Gunzip-8 514MB/s ± 4% 521MB/s ± 1% ~ (p=0.841 n=5+5) JSONEncode-8 121MB/s ± 2% 120MB/s ±10% ~ (p=0.548 n=5+5) JSONDecode-8 36.5MB/s ± 2% 36.6MB/s ± 4% ~ (p=1.000 n=5+5) GoParse-8 17.9MB/s ± 2% 17.7MB/s ± 4% ~ (p=0.730 n=5+5) RegexpMatchEasy0_32-8 371MB/s ± 1% 375MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy0_1K-8 5.15GB/s ± 1% 5.13GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_32-8 387MB/s ± 2% 391MB/s ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K-8 2.85GB/s ± 2% 2.89GB/s ± 1% ~ (p=0.056 n=5+5) RegexpMatchMedium_32-8 8.07MB/s ± 2% 8.06MB/s ± 1% ~ (p=0.730 n=5+5) RegexpMatchMedium_1K-8 26.8MB/s ± 2% 26.6MB/s ± 7% ~ (p=0.690 n=5+5) RegexpMatchHard_32-8 16.7MB/s ± 2% 16.7MB/s ± 5% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-8 17.8MB/s ± 1% 18.0MB/s ± 2% ~ (p=0.310 n=5+5) Revcomp-8 527MB/s ± 6% 577MB/s ± 1% +9.44% (p=0.016 n=5+4) Template-8 33.5MB/s ± 1% 33.4MB/s ± 7% ~ (p=0.310 n=5+5) Updates #19495 Change-Id: Ib9ece1690813d9b4788455db43d30891e2138df5 Reviewed-on: https://go-review.googlesource.com/38172 Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-14 11:11:28 -07:00
func mapdelete_fast32(mapType *byte, hmap map[any]any, key any)
func mapdelete_fast64(mapType *byte, hmap map[any]any, key any)
func mapdelete_faststr(mapType *byte, hmap map[any]any, key any)
func mapiternext(hiter *any)
// *byte is really *runtime.Type
cmd/compile: generate makechan calls with int arguments Where possible generate calls to runtime makechan with int arguments during compile time instead of makechan with int64 arguments. This eliminates converting arguments for calls to makechan with int64 arguments for platforms where int64 values do not fit into arguments of type int. A similar optimization for makeslice was introduced in CL golang.org/cl/27851. 386: name old time/op new time/op delta MakeChan/Byte 52.4ns ± 6% 45.0ns ± 1% -14.14% (p=0.000 n=10+10) MakeChan/Int 54.5ns ± 1% 49.1ns ± 1% -9.87% (p=0.000 n=10+10) MakeChan/Ptr 150ns ± 1% 143ns ± 0% -4.38% (p=0.000 n=9+7) MakeChan/Struct/0 49.2ns ± 2% 43.2ns ± 2% -12.27% (p=0.000 n=10+10) MakeChan/Struct/32 81.7ns ± 2% 76.2ns ± 1% -6.71% (p=0.000 n=10+10) MakeChan/Struct/40 88.4ns ± 2% 82.5ns ± 2% -6.60% (p=0.000 n=10+10) AMD64: name old time/op new time/op delta MakeChan/Byte 83.4ns ± 8% 80.8ns ± 3% ~ (p=0.171 n=10+10) MakeChan/Int 101ns ± 3% 101ns ± 2% ~ (p=0.412 n=10+10) MakeChan/Ptr 128ns ± 1% 128ns ± 1% ~ (p=0.191 n=10+10) MakeChan/Struct/0 67.6ns ± 3% 68.7ns ± 4% ~ (p=0.224 n=10+10) MakeChan/Struct/32 138ns ± 1% 139ns ± 1% ~ (p=0.185 n=10+9) MakeChan/Struct/40 154ns ± 1% 154ns ± 1% -0.55% (p=0.027 n=10+9) Change-Id: Ie854cb066007232c5e9f71ea7d6fe27e81a9c050 Reviewed-on: https://go-review.googlesource.com/55140 Run-TryBot: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-08-13 20:03:02 +02:00
func makechan64(chanType *byte, size int64) (hchan chan any)
func makechan(chanType *byte, size int) (hchan chan any)
func chanrecv1(hchan <-chan any, elem *any)
func chanrecv2(hchan <-chan any, elem *any) bool
func chansend1(hchan chan<- any, elem *any)
func closechan(hchan any)
var writeBarrier struct {
enabled bool
pad [3]byte
needed bool
cgo bool
alignme uint64
}
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr I believe the benchmarks that get slower are under register pressure, and not making the call unconditionally makes the pressure worse, and the register allocator doesn't do a great job. But part of the point of this sequence is to get the write barriers out of the way so I can work on the register allocator, so that's okay. name old new delta BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~ BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~ BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~ BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00% BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88% BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84% BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26% BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37% BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~ BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54% BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~ BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~ BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~ BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~ BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~ BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~ BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23% BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~ BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~ BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27% BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65% BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17% BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55% BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38% BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~ BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~ BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~ BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30% BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31% BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~ Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2 Reviewed-on: https://go-review.googlesource.com/9224 Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
func writebarrierptr(dst *any, src any)
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr I believe the benchmarks that get slower are under register pressure, and not making the call unconditionally makes the pressure worse, and the register allocator doesn't do a great job. But part of the point of this sequence is to get the write barriers out of the way so I can work on the register allocator, so that's okay. name old new delta BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~ BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~ BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~ BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00% BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88% BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84% BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26% BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37% BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~ BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54% BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~ BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~ BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~ BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~ BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~ BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~ BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23% BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~ BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~ BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27% BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65% BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17% BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55% BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38% BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~ BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~ BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~ BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30% BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31% BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~ Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2 Reviewed-on: https://go-review.googlesource.com/9224 Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
// *byte is really *runtime.Type
func typedmemmove(typ *byte, dst *any, src *any)
func typedmemclr(typ *byte, dst *any)
func typedslicecopy(typ *byte, dst any, src any) int
func selectnbsend(hchan chan<- any, elem *any) bool
func selectnbrecv(elem *any, hchan <-chan any) bool
func selectnbrecv2(elem *any, received *bool, hchan <-chan any) bool
func newselect(sel *byte, selsize int64, size int32)
func selectsend(sel *byte, hchan chan<- any, elem *any)
func selectrecv(sel *byte, hchan <-chan any, elem *any, received *bool)
func selectdefault(sel *byte)
func selectgo(sel *byte) int
func block()
func makeslice(typ *byte, len int, cap int) (ary []any)
func makeslice64(typ *byte, len int64, cap int64) (ary []any)
func growslice(typ *byte, old []any, cap int) (ary []any)
func memmove(to *any, frm *any, length uintptr)
func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
func memclrHasPointers(ptr unsafe.Pointer, n uintptr)
func memequal(x, y *any, size uintptr) bool
func memequal8(x, y *any) bool
func memequal16(x, y *any) bool
func memequal32(x, y *any) bool
func memequal64(x, y *any) bool
func memequal128(x, y *any) bool
// only used on 32-bit
func int64div(int64, int64) int64
func uint64div(uint64, uint64) uint64
func int64mod(int64, int64) int64
func uint64mod(uint64, uint64) uint64
func float64toint64(float64) int64
func float64touint64(float64) uint64
func float64touint32(float64) uint32
func int64tofloat64(int64) float64
func uint64tofloat64(uint64) float64
func uint32tofloat64(uint32) float64
func complex128div(num complex128, den complex128) (quo complex128)
// race detection
func racefuncenter(uintptr)
func racefuncexit()
func raceread(uintptr)
func racewrite(uintptr)
func racereadrange(addr, size uintptr)
func racewriterange(addr, size uintptr)
// memory sanitizer
func msanread(addr, size uintptr)
func msanwrite(addr, size uintptr)
// architecture variants
var support_popcnt bool
var support_sse41 bool