go/src/runtime/slice.go

217 lines
6 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import (
"unsafe"
)
type slice struct {
array unsafe.Pointer
len int
cap int
}
// maxElems is a lookup table containing the maximum capacity for a slice.
// The index is the size of the slice element.
var maxElems = [...]uintptr{
^uintptr(0),
_MaxMem / 1, _MaxMem / 2, _MaxMem / 3, _MaxMem / 4,
_MaxMem / 5, _MaxMem / 6, _MaxMem / 7, _MaxMem / 8,
_MaxMem / 9, _MaxMem / 10, _MaxMem / 11, _MaxMem / 12,
_MaxMem / 13, _MaxMem / 14, _MaxMem / 15, _MaxMem / 16,
_MaxMem / 17, _MaxMem / 18, _MaxMem / 19, _MaxMem / 20,
_MaxMem / 21, _MaxMem / 22, _MaxMem / 23, _MaxMem / 24,
_MaxMem / 25, _MaxMem / 26, _MaxMem / 27, _MaxMem / 28,
_MaxMem / 29, _MaxMem / 30, _MaxMem / 31, _MaxMem / 32,
}
// maxSliceCap returns the maximum capacity for a slice.
func maxSliceCap(elemsize uintptr) uintptr {
if elemsize < uintptr(len(maxElems)) {
return maxElems[elemsize]
}
return _MaxMem / elemsize
}
func makeslice(et *_type, len, cap int) slice {
// NOTE: The len > maxElements check here is not strictly necessary,
// but it produces a 'len out of range' error instead of a 'cap out of range' error
// when someone does make([]T, bignumber). 'cap out of range' is true too,
// but since the cap is only being supplied implicitly, saying len is clearer.
// See issue 4085.
maxElements := maxSliceCap(et.size)
if len < 0 || uintptr(len) > maxElements {
panic(errorString("makeslice: len out of range"))
}
if cap < len || uintptr(cap) > maxElements {
panic(errorString("makeslice: cap out of range"))
}
p := mallocgc(et.size*uintptr(cap), et, true)
return slice{p, len, cap}
}
func makeslice64(et *_type, len64, cap64 int64) slice {
len := int(len64)
if int64(len) != len64 {
panic(errorString("makeslice: len out of range"))
}
cap := int(cap64)
if int64(cap) != cap64 {
panic(errorString("makeslice: cap out of range"))
}
return makeslice(et, len, cap)
}
// growslice handles slice growth during append.
// It is passed the slice element type, the old slice, and the desired new minimum capacity,
// and it returns a new slice with at least that capacity, with the old data
// copied into it.
cmd/compile: avoid a spill in append fast path Instead of spilling newlen, recalculate it. This removes a spill from the fast path, at the cost of a cheap recalculation on the (rare) growth path. This uses 8 bytes less of stack space. It generates two more bytes of code, but that is due to suboptimal register allocation; see far below. Runtime append microbenchmarks are all over the map, presumably due to incidental code movement. Sample code: func s(b []byte) []byte { b = append(b, 1, 2, 3) return b } Before: "".s t=1 size=160 args=0x30 locals=0x48 0x0000 00000 (append.go:8) TEXT "".s(SB), $72-48 0x0000 00000 (append.go:8) MOVQ (TLS), CX 0x0009 00009 (append.go:8) CMPQ SP, 16(CX) 0x000d 00013 (append.go:8) JLS 149 0x0013 00019 (append.go:8) SUBQ $72, SP 0x0017 00023 (append.go:8) FUNCDATA $0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB) 0x0017 00023 (append.go:8) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:9) MOVQ "".b+88(FP), CX 0x001c 00028 (append.go:9) LEAQ 3(CX), DX 0x0020 00032 (append.go:9) MOVQ DX, "".autotmp_0+64(SP) 0x0025 00037 (append.go:9) MOVQ "".b+96(FP), BX 0x002a 00042 (append.go:9) CMPQ DX, BX 0x002d 00045 (append.go:9) JGT $0, 86 0x002f 00047 (append.go:8) MOVQ "".b+80(FP), AX 0x0034 00052 (append.go:9) MOVB $1, (AX)(CX*1) 0x0038 00056 (append.go:9) MOVB $2, 1(AX)(CX*1) 0x003d 00061 (append.go:9) MOVB $3, 2(AX)(CX*1) 0x0042 00066 (append.go:10) MOVQ AX, "".~r1+104(FP) 0x0047 00071 (append.go:10) MOVQ DX, "".~r1+112(FP) 0x004c 00076 (append.go:10) MOVQ BX, "".~r1+120(FP) 0x0051 00081 (append.go:10) ADDQ $72, SP 0x0055 00085 (append.go:10) RET 0x0056 00086 (append.go:9) LEAQ type.[]uint8(SB), AX 0x005d 00093 (append.go:9) MOVQ AX, (SP) 0x0061 00097 (append.go:9) MOVQ "".b+80(FP), BP 0x0066 00102 (append.go:9) MOVQ BP, 8(SP) 0x006b 00107 (append.go:9) MOVQ CX, 16(SP) 0x0070 00112 (append.go:9) MOVQ BX, 24(SP) 0x0075 00117 (append.go:9) MOVQ DX, 32(SP) 0x007a 00122 (append.go:9) PCDATA $0, $0 0x007a 00122 (append.go:9) CALL runtime.growslice(SB) 0x007f 00127 (append.go:9) MOVQ 40(SP), AX 0x0084 00132 (append.go:9) MOVQ 56(SP), BX 0x0089 00137 (append.go:8) MOVQ "".b+88(FP), CX 0x008e 00142 (append.go:9) MOVQ "".autotmp_0+64(SP), DX 0x0093 00147 (append.go:9) JMP 52 0x0095 00149 (append.go:9) NOP 0x0095 00149 (append.go:8) CALL runtime.morestack_noctxt(SB) 0x009a 00154 (append.go:8) JMP 0 After: "".s t=1 size=176 args=0x30 locals=0x40 0x0000 00000 (append.go:8) TEXT "".s(SB), $64-48 0x0000 00000 (append.go:8) MOVQ (TLS), CX 0x0009 00009 (append.go:8) CMPQ SP, 16(CX) 0x000d 00013 (append.go:8) JLS 151 0x0013 00019 (append.go:8) SUBQ $64, SP 0x0017 00023 (append.go:8) FUNCDATA $0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB) 0x0017 00023 (append.go:8) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:9) MOVQ "".b+80(FP), CX 0x001c 00028 (append.go:9) LEAQ 3(CX), DX 0x0020 00032 (append.go:9) MOVQ "".b+88(FP), BX 0x0025 00037 (append.go:9) CMPQ DX, BX 0x0028 00040 (append.go:9) JGT $0, 81 0x002a 00042 (append.go:8) MOVQ "".b+72(FP), AX 0x002f 00047 (append.go:9) MOVB $1, (AX)(CX*1) 0x0033 00051 (append.go:9) MOVB $2, 1(AX)(CX*1) 0x0038 00056 (append.go:9) MOVB $3, 2(AX)(CX*1) 0x003d 00061 (append.go:10) MOVQ AX, "".~r1+96(FP) 0x0042 00066 (append.go:10) MOVQ DX, "".~r1+104(FP) 0x0047 00071 (append.go:10) MOVQ BX, "".~r1+112(FP) 0x004c 00076 (append.go:10) ADDQ $64, SP 0x0050 00080 (append.go:10) RET 0x0051 00081 (append.go:9) LEAQ type.[]uint8(SB), AX 0x0058 00088 (append.go:9) MOVQ AX, (SP) 0x005c 00092 (append.go:9) MOVQ "".b+72(FP), BP 0x0061 00097 (append.go:9) MOVQ BP, 8(SP) 0x0066 00102 (append.go:9) MOVQ CX, 16(SP) 0x006b 00107 (append.go:9) MOVQ BX, 24(SP) 0x0070 00112 (append.go:9) MOVQ DX, 32(SP) 0x0075 00117 (append.go:9) PCDATA $0, $0 0x0075 00117 (append.go:9) CALL runtime.growslice(SB) 0x007a 00122 (append.go:9) MOVQ 40(SP), AX 0x007f 00127 (append.go:9) MOVQ 48(SP), CX 0x0084 00132 (append.go:9) MOVQ 56(SP), BX 0x0089 00137 (append.go:9) ADDQ $3, CX 0x008d 00141 (append.go:9) MOVQ CX, DX 0x0090 00144 (append.go:8) MOVQ "".b+80(FP), CX 0x0095 00149 (append.go:9) JMP 47 0x0097 00151 (append.go:9) NOP 0x0097 00151 (append.go:8) CALL runtime.morestack_noctxt(SB) 0x009c 00156 (append.go:8) JMP 0 Observe that in the following sequence, we should use DX directly instead of using CX as a temporary register, which would make the new code a strict improvement on the old: 0x007f 00127 (append.go:9) MOVQ 48(SP), CX 0x0084 00132 (append.go:9) MOVQ 56(SP), BX 0x0089 00137 (append.go:9) ADDQ $3, CX 0x008d 00141 (append.go:9) MOVQ CX, DX 0x0090 00144 (append.go:8) MOVQ "".b+80(FP), CX Change-Id: I4ee50b18fa53865901d2d7f86c2cbb54c6fa6924 Reviewed-on: https://go-review.googlesource.com/21812 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-04-10 09:08:00 -07:00
// The new slice's length is set to the old slice's length,
// NOT to the new requested capacity.
// This is for codegen convenience. The old slice's length is used immediately
// to calculate where to write new values during an append.
// TODO: When the old backend is gone, reconsider this decision.
// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
func growslice(et *_type, old slice, cap int) slice {
if raceenabled {
callerpc := getcallerpc(unsafe.Pointer(&et))
racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice))
}
if msanenabled {
msanread(old.array, uintptr(old.len*int(et.size)))
}
if et.size == 0 {
if cap < old.cap {
panic(errorString("growslice: cap out of range"))
}
// append should not create a slice with nil pointer but non-zero len.
// We assume that append doesn't need to preserve old.array in this case.
return slice{unsafe.Pointer(&zerobase), old.len, cap}
}
newcap := old.cap
doublecap := newcap + newcap
if cap > doublecap {
newcap = cap
} else {
if old.len < 1024 {
newcap = doublecap
} else {
for newcap < cap {
newcap += newcap / 4
}
}
}
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
var lenmem, newlenmem, capmem uintptr
const ptrSize = unsafe.Sizeof((*byte)(nil))
switch et.size {
case 1:
lenmem = uintptr(old.len)
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
newlenmem = uintptr(cap)
capmem = roundupsize(uintptr(newcap))
newcap = int(capmem)
case ptrSize:
lenmem = uintptr(old.len) * ptrSize
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
newlenmem = uintptr(cap) * ptrSize
capmem = roundupsize(uintptr(newcap) * ptrSize)
newcap = int(capmem / ptrSize)
default:
lenmem = uintptr(old.len) * et.size
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
newlenmem = uintptr(cap) * et.size
capmem = roundupsize(uintptr(newcap) * et.size)
newcap = int(capmem / et.size)
}
if cap < old.cap || uintptr(newcap) > maxSliceCap(et.size) {
panic(errorString("growslice: cap out of range"))
}
var p unsafe.Pointer
if et.kind&kindNoPointers != 0 {
p = mallocgc(capmem, nil, false)
memmove(p, old.array, lenmem)
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
// Only clear the part that will not be overwritten.
memclr(add(p, newlenmem), capmem-newlenmem)
} else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
p = mallocgc(capmem, et, true)
if !writeBarrier.enabled {
runtime: use memmove during slice append The effect of this CL: name old mean new mean delta BinaryTree17 5.97s × (0.96,1.04) 5.95s × (0.98,1.02) ~ (p=0.697) Fannkuch11 4.39s × (1.00,1.01) 4.41s × (1.00,1.01) +0.52% (p=0.015) FmtFprintfEmpty 90.8ns × (0.97,1.05) 89.4ns × (0.94,1.13) ~ (p=0.571) FmtFprintfString 305ns × (0.99,1.01) 292ns × (0.98,1.05) -4.35% (p=0.000) FmtFprintfInt 278ns × (0.96,1.03) 279ns × (0.98,1.04) ~ (p=0.741) FmtFprintfIntInt 489ns × (0.99,1.02) 482ns × (0.98,1.03) -1.43% (p=0.024) FmtFprintfPrefixedInt 402ns × (0.98,1.02) 395ns × (0.98,1.03) -1.67% (p=0.014) FmtFprintfFloat 578ns × (1.00,1.00) 569ns × (0.99,1.01) -1.48% (p=0.000) FmtManyArgs 1.88µs × (0.99,1.01) 1.88µs × (1.00,1.01) ~ (p=0.055) GobDecode 15.3ms × (0.99,1.01) 15.2ms × (1.00,1.01) -0.61% (p=0.007) GobEncode 11.8ms × (0.98,1.05) 11.6ms × (0.99,1.01) ~ (p=0.075) Gzip 647ms × (0.99,1.01) 647ms × (1.00,1.00) ~ (p=0.790) Gunzip 143ms × (1.00,1.00) 142ms × (1.00,1.00) ~ (p=0.370) HTTPClientServer 91.2µs × (0.99,1.01) 91.7µs × (0.99,1.02) ~ (p=0.233) JSONEncode 31.5ms × (0.98,1.01) 31.8ms × (0.99,1.02) +1.09% (p=0.015) JSONDecode 110ms × (0.99,1.01) 110ms × (0.99,1.02) ~ (p=0.577) Mandelbrot200 6.00ms × (1.00,1.00) 6.02ms × (1.00,1.00) +0.24% (p=0.001) GoParse 6.68ms × (0.98,1.02) 6.61ms × (0.99,1.01) -1.10% (p=0.027) RegexpMatchEasy0_32 162ns × (1.00,1.00) 161ns × (1.00,1.01) -0.66% (p=0.001) RegexpMatchEasy0_1K 539ns × (1.00,1.00) 539ns × (0.99,1.01) ~ (p=0.509) RegexpMatchEasy1_32 140ns × (0.99,1.02) 139ns × (0.99,1.02) ~ (p=0.163) RegexpMatchEasy1_1K 886ns × (1.00,1.00) 887ns × (1.00,1.00) ~ (p=0.408) RegexpMatchMedium_32 252ns × (1.00,1.00) 255ns × (0.99,1.01) +1.01% (p=0.000) RegexpMatchMedium_1K 72.6µs × (1.00,1.00) 72.6µs × (1.00,1.00) ~ (p=0.176) RegexpMatchHard_32 3.84µs × (1.00,1.00) 3.84µs × (1.00,1.00) ~ (p=0.403) RegexpMatchHard_1K 117µs × (1.00,1.00) 117µs × (1.00,1.00) ~ (p=0.351) Revcomp 926ms × (0.99,1.01) 925ms × (0.99,1.01) ~ (p=0.541) Template 126ms × (0.99,1.02) 130ms × (0.99,1.01) +3.42% (p=0.000) TimeParse 632ns × (0.99,1.01) 626ns × (1.00,1.00) -0.88% (p=0.000) TimeFormat 658ns × (0.99,1.01) 662ns × (0.99,1.02) ~ (p=0.111) The effect of this CL combined with CL 9886: name old mean new mean delta BinaryTree17 5.90s × (0.98,1.03) 5.95s × (0.98,1.02) ~ (p=0.175) Fannkuch11 4.34s × (1.00,1.00) 4.41s × (1.00,1.01) +1.69% (p=0.000) FmtFprintfEmpty 87.3ns × (0.97,1.17) 89.4ns × (0.94,1.13) ~ (p=0.499) FmtFprintfString 288ns × (0.98,1.04) 292ns × (0.98,1.05) ~ (p=0.292) FmtFprintfInt 290ns × (0.98,1.05) 279ns × (0.98,1.04) -3.76% (p=0.001) FmtFprintfIntInt 493ns × (0.98,1.04) 482ns × (0.98,1.03) -2.27% (p=0.017) FmtFprintfPrefixedInt 399ns × (0.98,1.02) 395ns × (0.98,1.03) ~ (p=0.159) FmtFprintfFloat 569ns × (1.00,1.00) 569ns × (0.99,1.01) ~ (p=0.847) FmtManyArgs 1.90µs × (0.99,1.03) 1.88µs × (1.00,1.01) -1.14% (p=0.009) GobDecode 15.2ms × (1.00,1.01) 15.2ms × (1.00,1.01) ~ (p=0.170) GobEncode 11.8ms × (0.99,1.02) 11.6ms × (0.99,1.01) -1.47% (p=0.003) Gzip 649ms × (0.99,1.00) 647ms × (1.00,1.00) ~ (p=0.200) Gunzip 144ms × (0.99,1.01) 142ms × (1.00,1.00) -1.04% (p=0.000) HTTPClientServer 91.1µs × (0.98,1.03) 91.7µs × (0.99,1.02) ~ (p=0.345) JSONEncode 31.5ms × (0.99,1.01) 31.8ms × (0.99,1.02) +0.98% (p=0.021) JSONDecode 110ms × (1.00,1.01) 110ms × (0.99,1.02) ~ (p=0.259) Mandelbrot200 6.02ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.500) GoParse 6.68ms × (1.00,1.01) 6.61ms × (0.99,1.01) -1.17% (p=0.001) RegexpMatchEasy0_32 161ns × (1.00,1.00) 161ns × (1.00,1.01) -0.39% (p=0.033) RegexpMatchEasy0_1K 539ns × (1.00,1.00) 539ns × (0.99,1.01) ~ (p=0.445) RegexpMatchEasy1_32 138ns × (1.00,1.01) 139ns × (0.99,1.02) ~ (p=0.281) RegexpMatchEasy1_1K 887ns × (1.00,1.01) 887ns × (1.00,1.00) ~ (p=0.610) RegexpMatchMedium_32 251ns × (1.00,1.02) 255ns × (0.99,1.01) +1.42% (p=0.000) RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 72.6µs × (1.00,1.00) ~ (p=0.097) RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.84µs × (1.00,1.00) -0.31% (p=0.000) RegexpMatchHard_1K 117µs × (1.00,1.00) 117µs × (1.00,1.00) ~ (p=0.704) Revcomp 923ms × (0.98,1.02) 925ms × (0.99,1.01) ~ (p=0.574) Template 126ms × (0.98,1.03) 130ms × (0.99,1.01) +3.28% (p=0.000) TimeParse 631ns × (0.99,1.02) 626ns × (1.00,1.00) ~ (p=0.053) TimeFormat 660ns × (0.99,1.01) 662ns × (0.99,1.02) ~ (p=0.398) Change-Id: I59c03d329fe7bc178a31477c6f1f01062b881041 Reviewed-on: https://go-review.googlesource.com/9993 Reviewed-by: Austin Clements <austin@google.com>
2015-05-13 10:48:05 -04:00
memmove(p, old.array, lenmem)
} else {
for i := uintptr(0); i < lenmem; i += et.size {
typedmemmove(et, add(p, i), add(old.array, i))
}
}
}
return slice{p, old.len, newcap}
}
func slicecopy(to, fm slice, width uintptr) int {
if fm.len == 0 || to.len == 0 {
return 0
}
n := fm.len
if to.len < n {
n = to.len
}
if width == 0 {
return n
}
if raceenabled {
callerpc := getcallerpc(unsafe.Pointer(&to))
pc := funcPC(slicecopy)
racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc)
racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc)
}
if msanenabled {
msanwrite(to.array, uintptr(n*int(width)))
msanread(fm.array, uintptr(n*int(width)))
}
size := uintptr(n) * width
if size == 1 { // common case worth about 2x to do here
// TODO: is this still worth it with new memmove impl?
*(*byte)(to.array) = *(*byte)(fm.array) // known to be a byte pointer
} else {
memmove(to.array, fm.array, size)
}
return n
}
func slicestringcopy(to []byte, fm string) int {
if len(fm) == 0 || len(to) == 0 {
return 0
}
n := len(fm)
if len(to) < n {
n = len(to)
}
if raceenabled {
callerpc := getcallerpc(unsafe.Pointer(&to))
pc := funcPC(slicestringcopy)
racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc)
}
if msanenabled {
msanwrite(unsafe.Pointer(&to[0]), uintptr(n))
}
memmove(unsafe.Pointer(&to[0]), stringStructOf(&fm).str, uintptr(n))
return n
}