go/src/runtime/slice.go

274 lines
7.6 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
type slice struct {
array unsafe.Pointer
len int
cap int
}
// An notInHeapSlice is a slice backed by go:notinheap memory.
type notInHeapSlice struct {
array *notInHeap
len int
cap int
}
// maxElems is a lookup table containing the maximum capacity for a slice.
// The index is the size of the slice element.
var maxElems = [...]uintptr{
^uintptr(0),
maxAlloc / 1, maxAlloc / 2, maxAlloc / 3, maxAlloc / 4,
maxAlloc / 5, maxAlloc / 6, maxAlloc / 7, maxAlloc / 8,
maxAlloc / 9, maxAlloc / 10, maxAlloc / 11, maxAlloc / 12,
maxAlloc / 13, maxAlloc / 14, maxAlloc / 15, maxAlloc / 16,
maxAlloc / 17, maxAlloc / 18, maxAlloc / 19, maxAlloc / 20,
maxAlloc / 21, maxAlloc / 22, maxAlloc / 23, maxAlloc / 24,
maxAlloc / 25, maxAlloc / 26, maxAlloc / 27, maxAlloc / 28,
maxAlloc / 29, maxAlloc / 30, maxAlloc / 31, maxAlloc / 32,
}
// maxSliceCap returns the maximum capacity for a slice.
func maxSliceCap(elemsize uintptr) uintptr {
if elemsize < uintptr(len(maxElems)) {
return maxElems[elemsize]
}
return maxAlloc / elemsize
}
func panicmakeslicelen() {
panic(errorString("makeslice: len out of range"))
}
func panicmakeslicecap() {
panic(errorString("makeslice: cap out of range"))
}
func makeslice(et *_type, len, cap int) slice {
// NOTE: The len > maxElements check here is not strictly necessary,
// but it produces a 'len out of range' error instead of a 'cap out of range' error
// when someone does make([]T, bignumber). 'cap out of range' is true too,
// but since the cap is only being supplied implicitly, saying len is clearer.
// See issue 4085.
maxElements := maxSliceCap(et.size)
if len < 0 || uintptr(len) > maxElements {
panicmakeslicelen()
}
if cap < len || uintptr(cap) > maxElements {
panicmakeslicecap()
}
p := mallocgc(et.size*uintptr(cap), et, true)
return slice{p, len, cap}
}
func makeslice64(et *_type, len64, cap64 int64) slice {
len := int(len64)
if int64(len) != len64 {
panicmakeslicelen()
}
cap := int(cap64)
if int64(cap) != cap64 {
panicmakeslicecap()
}
return makeslice(et, len, cap)
}
// growslice handles slice growth during append.
// It is passed the slice element type, the old slice, and the desired new minimum capacity,
// and it returns a new slice with at least that capacity, with the old data
// copied into it.
cmd/compile: avoid a spill in append fast path Instead of spilling newlen, recalculate it. This removes a spill from the fast path, at the cost of a cheap recalculation on the (rare) growth path. This uses 8 bytes less of stack space. It generates two more bytes of code, but that is due to suboptimal register allocation; see far below. Runtime append microbenchmarks are all over the map, presumably due to incidental code movement. Sample code: func s(b []byte) []byte { b = append(b, 1, 2, 3) return b } Before: "".s t=1 size=160 args=0x30 locals=0x48 0x0000 00000 (append.go:8) TEXT "".s(SB), $72-48 0x0000 00000 (append.go:8) MOVQ (TLS), CX 0x0009 00009 (append.go:8) CMPQ SP, 16(CX) 0x000d 00013 (append.go:8) JLS 149 0x0013 00019 (append.go:8) SUBQ $72, SP 0x0017 00023 (append.go:8) FUNCDATA $0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB) 0x0017 00023 (append.go:8) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:9) MOVQ "".b+88(FP), CX 0x001c 00028 (append.go:9) LEAQ 3(CX), DX 0x0020 00032 (append.go:9) MOVQ DX, "".autotmp_0+64(SP) 0x0025 00037 (append.go:9) MOVQ "".b+96(FP), BX 0x002a 00042 (append.go:9) CMPQ DX, BX 0x002d 00045 (append.go:9) JGT $0, 86 0x002f 00047 (append.go:8) MOVQ "".b+80(FP), AX 0x0034 00052 (append.go:9) MOVB $1, (AX)(CX*1) 0x0038 00056 (append.go:9) MOVB $2, 1(AX)(CX*1) 0x003d 00061 (append.go:9) MOVB $3, 2(AX)(CX*1) 0x0042 00066 (append.go:10) MOVQ AX, "".~r1+104(FP) 0x0047 00071 (append.go:10) MOVQ DX, "".~r1+112(FP) 0x004c 00076 (append.go:10) MOVQ BX, "".~r1+120(FP) 0x0051 00081 (append.go:10) ADDQ $72, SP 0x0055 00085 (append.go:10) RET 0x0056 00086 (append.go:9) LEAQ type.[]uint8(SB), AX 0x005d 00093 (append.go:9) MOVQ AX, (SP) 0x0061 00097 (append.go:9) MOVQ "".b+80(FP), BP 0x0066 00102 (append.go:9) MOVQ BP, 8(SP) 0x006b 00107 (append.go:9) MOVQ CX, 16(SP) 0x0070 00112 (append.go:9) MOVQ BX, 24(SP) 0x0075 00117 (append.go:9) MOVQ DX, 32(SP) 0x007a 00122 (append.go:9) PCDATA $0, $0 0x007a 00122 (append.go:9) CALL runtime.growslice(SB) 0x007f 00127 (append.go:9) MOVQ 40(SP), AX 0x0084 00132 (append.go:9) MOVQ 56(SP), BX 0x0089 00137 (append.go:8) MOVQ "".b+88(FP), CX 0x008e 00142 (append.go:9) MOVQ "".autotmp_0+64(SP), DX 0x0093 00147 (append.go:9) JMP 52 0x0095 00149 (append.go:9) NOP 0x0095 00149 (append.go:8) CALL runtime.morestack_noctxt(SB) 0x009a 00154 (append.go:8) JMP 0 After: "".s t=1 size=176 args=0x30 locals=0x40 0x0000 00000 (append.go:8) TEXT "".s(SB), $64-48 0x0000 00000 (append.go:8) MOVQ (TLS), CX 0x0009 00009 (append.go:8) CMPQ SP, 16(CX) 0x000d 00013 (append.go:8) JLS 151 0x0013 00019 (append.go:8) SUBQ $64, SP 0x0017 00023 (append.go:8) FUNCDATA $0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB) 0x0017 00023 (append.go:8) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:9) MOVQ "".b+80(FP), CX 0x001c 00028 (append.go:9) LEAQ 3(CX), DX 0x0020 00032 (append.go:9) MOVQ "".b+88(FP), BX 0x0025 00037 (append.go:9) CMPQ DX, BX 0x0028 00040 (append.go:9) JGT $0, 81 0x002a 00042 (append.go:8) MOVQ "".b+72(FP), AX 0x002f 00047 (append.go:9) MOVB $1, (AX)(CX*1) 0x0033 00051 (append.go:9) MOVB $2, 1(AX)(CX*1) 0x0038 00056 (append.go:9) MOVB $3, 2(AX)(CX*1) 0x003d 00061 (append.go:10) MOVQ AX, "".~r1+96(FP) 0x0042 00066 (append.go:10) MOVQ DX, "".~r1+104(FP) 0x0047 00071 (append.go:10) MOVQ BX, "".~r1+112(FP) 0x004c 00076 (append.go:10) ADDQ $64, SP 0x0050 00080 (append.go:10) RET 0x0051 00081 (append.go:9) LEAQ type.[]uint8(SB), AX 0x0058 00088 (append.go:9) MOVQ AX, (SP) 0x005c 00092 (append.go:9) MOVQ "".b+72(FP), BP 0x0061 00097 (append.go:9) MOVQ BP, 8(SP) 0x0066 00102 (append.go:9) MOVQ CX, 16(SP) 0x006b 00107 (append.go:9) MOVQ BX, 24(SP) 0x0070 00112 (append.go:9) MOVQ DX, 32(SP) 0x0075 00117 (append.go:9) PCDATA $0, $0 0x0075 00117 (append.go:9) CALL runtime.growslice(SB) 0x007a 00122 (append.go:9) MOVQ 40(SP), AX 0x007f 00127 (append.go:9) MOVQ 48(SP), CX 0x0084 00132 (append.go:9) MOVQ 56(SP), BX 0x0089 00137 (append.go:9) ADDQ $3, CX 0x008d 00141 (append.go:9) MOVQ CX, DX 0x0090 00144 (append.go:8) MOVQ "".b+80(FP), CX 0x0095 00149 (append.go:9) JMP 47 0x0097 00151 (append.go:9) NOP 0x0097 00151 (append.go:8) CALL runtime.morestack_noctxt(SB) 0x009c 00156 (append.go:8) JMP 0 Observe that in the following sequence, we should use DX directly instead of using CX as a temporary register, which would make the new code a strict improvement on the old: 0x007f 00127 (append.go:9) MOVQ 48(SP), CX 0x0084 00132 (append.go:9) MOVQ 56(SP), BX 0x0089 00137 (append.go:9) ADDQ $3, CX 0x008d 00141 (append.go:9) MOVQ CX, DX 0x0090 00144 (append.go:8) MOVQ "".b+80(FP), CX Change-Id: I4ee50b18fa53865901d2d7f86c2cbb54c6fa6924 Reviewed-on: https://go-review.googlesource.com/21812 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2016-04-10 09:08:00 -07:00
// The new slice's length is set to the old slice's length,
// NOT to the new requested capacity.
// This is for codegen convenience. The old slice's length is used immediately
// to calculate where to write new values during an append.
// TODO: When the old backend is gone, reconsider this decision.
// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
func growslice(et *_type, old slice, cap int) slice {
if raceenabled {
callerpc := getcallerpc()
racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice))
}
if msanenabled {
msanread(old.array, uintptr(old.len*int(et.size)))
}
if et.size == 0 {
if cap < old.cap {
panic(errorString("growslice: cap out of range"))
}
// append should not create a slice with nil pointer but non-zero len.
// We assume that append doesn't need to preserve old.array in this case.
return slice{unsafe.Pointer(&zerobase), old.len, cap}
}
newcap := old.cap
doublecap := newcap + newcap
if cap > doublecap {
newcap = cap
} else {
if old.len < 1024 {
newcap = doublecap
} else {
// Check 0 < newcap to detect overflow
// and prevent an infinite loop.
for 0 < newcap && newcap < cap {
newcap += newcap / 4
}
// Set newcap to the requested cap when
// the newcap calculation overflowed.
if newcap <= 0 {
newcap = cap
}
}
}
var overflow bool
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
var lenmem, newlenmem, capmem uintptr
// Specialize for common values of et.size.
// For 1 we don't need any division/multiplication.
// For sys.PtrSize, compiler will optimize division/multiplication into a shift by a constant.
// For powers of 2, use a variable shift.
switch {
case et.size == 1:
lenmem = uintptr(old.len)
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
newlenmem = uintptr(cap)
capmem = roundupsize(uintptr(newcap))
overflow = uintptr(newcap) > maxAlloc
newcap = int(capmem)
case et.size == sys.PtrSize:
lenmem = uintptr(old.len) * sys.PtrSize
newlenmem = uintptr(cap) * sys.PtrSize
capmem = roundupsize(uintptr(newcap) * sys.PtrSize)
overflow = uintptr(newcap) > maxAlloc/sys.PtrSize
newcap = int(capmem / sys.PtrSize)
case isPowerOfTwo(et.size):
var shift uintptr
if sys.PtrSize == 8 {
// Mask shift for better code generation.
shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
} else {
shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
}
lenmem = uintptr(old.len) << shift
newlenmem = uintptr(cap) << shift
capmem = roundupsize(uintptr(newcap) << shift)
overflow = uintptr(newcap) > (maxAlloc >> shift)
newcap = int(capmem >> shift)
default:
lenmem = uintptr(old.len) * et.size
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
newlenmem = uintptr(cap) * et.size
capmem = roundupsize(uintptr(newcap) * et.size)
overflow = uintptr(newcap) > maxSliceCap(et.size)
newcap = int(capmem / et.size)
}
// The check of overflow (uintptr(newcap) > maxSliceCap(et.size))
// in addition to capmem > _MaxMem is needed to prevent an overflow
// which can be used to trigger a segfault on 32bit architectures
// with this example program:
//
// type T [1<<27 + 1]int64
//
// var d T
// var s []T
//
// func main() {
// s = append(s, d, d, d, d)
// print(len(s), "\n")
// }
if cap < old.cap || overflow || capmem > maxAlloc {
panic(errorString("growslice: cap out of range"))
}
var p unsafe.Pointer
if et.kind&kindNoPointers != 0 {
p = mallocgc(capmem, nil, false)
runtime: make append only clear uncopied memory Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-10-03 12:45:12 -07:00
// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
// Only clear the part that will not be overwritten.
memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
} else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
p = mallocgc(capmem, et, true)
runtime: replace typedmemmmove with bulkBarrierPreWrite and memmove in growslice A bulkBarrierPreWrite together with a memmove as used in typedslicecopy is faster than a typedmemmove for each element of the old slice that needs to be copied to the new slice. typedslicecopy is not used here as runtime functions should not call other instrumented runtime functions and some conditions like dst == src or the destination slice not being large enought that are checked for in typedslicecopy can not happen in growslice. Append 13.5ns ± 6% 13.3ns ± 3% ~ (p=0.304 n=10+10) AppendGrowByte 1.18ms ± 2% 1.19ms ± 1% ~ (p=0.113 n=10+9) AppendGrowString 123ms ± 1% 73ms ± 1% -40.39% (p=0.000 n=9+8) AppendSlice/1Bytes 3.81ns ± 1% 3.78ns ± 1% ~ (p=0.116 n=10+10) AppendSlice/4Bytes 3.71ns ± 1% 3.70ns ± 0% ~ (p=0.095 n=10+9) AppendSlice/7Bytes 3.73ns ± 0% 3.75ns ± 1% ~ (p=0.442 n=10+10) AppendSlice/8Bytes 4.00ns ± 1% 4.01ns ± 1% ~ (p=0.330 n=10+10) AppendSlice/15Bytes 4.29ns ± 1% 4.28ns ± 1% ~ (p=0.536 n=10+10) AppendSlice/16Bytes 4.28ns ± 1% 4.31ns ± 1% +0.75% (p=0.019 n=10+10) AppendSlice/32Bytes 4.57ns ± 2% 4.58ns ± 2% ~ (p=0.236 n=10+10) AppendSliceLarge/1024Bytes 305ns ± 2% 306ns ± 1% ~ (p=0.236 n=10+10) AppendSliceLarge/4096Bytes 1.06µs ± 1% 1.06µs ± 0% ~ (p=1.000 n=9+10) AppendSliceLarge/16384Bytes 3.12µs ± 2% 3.11µs ± 1% ~ (p=0.493 n=10+10) AppendSliceLarge/65536Bytes 5.61µs ± 5% 5.36µs ± 2% -4.58% (p=0.003 n=10+8) AppendSliceLarge/262144Bytes 21.0µs ± 1% 19.5µs ± 1% -7.09% (p=0.000 n=8+10) AppendSliceLarge/1048576Bytes 78.4µs ± 1% 78.7µs ± 2% ~ (p=0.315 n=8+10) AppendStr/1Bytes 3.96ns ± 6% 3.99ns ± 9% ~ (p=0.591 n=10+10) AppendStr/4Bytes 3.98ns ± 1% 3.99ns ± 1% ~ (p=0.515 n=9+9) AppendStr/8Bytes 4.27ns ± 1% 4.27ns ± 1% ~ (p=0.633 n=10+10) AppendStr/16Bytes 4.56ns ± 2% 4.55ns ± 1% ~ (p=0.869 n=10+10) AppendStr/32Bytes 4.85ns ± 1% 4.89ns ± 1% +0.71% (p=0.003 n=10+8) AppendSpecialCase 18.7ns ± 1% 18.7ns ± 1% ~ (p=0.144 n=10+10) AppendInPlace/NoGrow/Byte 438ns ± 1% 439ns ± 1% ~ (p=0.135 n=10+8) AppendInPlace/NoGrow/1Ptr 1.05µs ± 2% 1.05µs ± 1% ~ (p=0.469 n=10+10) AppendInPlace/NoGrow/2Ptr 1.77µs ± 1% 1.78µs ± 2% ~ (p=0.469 n=10+10) AppendInPlace/NoGrow/3Ptr 1.94µs ± 1% 1.93µs ± 2% ~ (p=0.517 n=10+10) AppendInPlace/NoGrow/4Ptr 3.18µs ± 1% 3.17µs ± 0% ~ (p=0.483 n=10+9) AppendInPlace/Grow/Byte 382ns ± 2% 383ns ± 2% ~ (p=0.705 n=9+10) AppendInPlace/Grow/1Ptr 383ns ± 1% 384ns ± 1% ~ (p=0.844 n=10+10) AppendInPlace/Grow/2Ptr 459ns ± 2% 467ns ± 2% +1.74% (p=0.001 n=10+10) AppendInPlace/Grow/3Ptr 593ns ± 1% 597ns ± 2% ~ (p=0.195 n=10+10) AppendInPlace/Grow/4Ptr 583ns ± 2% 589ns ± 2% ~ (p=0.084 n=10+10) Change-Id: I629872f065a22b29267c1adbfc578aaedd36d365 Reviewed-on: https://go-review.googlesource.com/115755 Run-TryBot: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-06-01 14:30:49 +02:00
if writeBarrier.enabled {
bulkBarrierPreWrite(uintptr(p), uintptr(old.array), lenmem)
}
}
runtime: replace typedmemmmove with bulkBarrierPreWrite and memmove in growslice A bulkBarrierPreWrite together with a memmove as used in typedslicecopy is faster than a typedmemmove for each element of the old slice that needs to be copied to the new slice. typedslicecopy is not used here as runtime functions should not call other instrumented runtime functions and some conditions like dst == src or the destination slice not being large enought that are checked for in typedslicecopy can not happen in growslice. Append 13.5ns ± 6% 13.3ns ± 3% ~ (p=0.304 n=10+10) AppendGrowByte 1.18ms ± 2% 1.19ms ± 1% ~ (p=0.113 n=10+9) AppendGrowString 123ms ± 1% 73ms ± 1% -40.39% (p=0.000 n=9+8) AppendSlice/1Bytes 3.81ns ± 1% 3.78ns ± 1% ~ (p=0.116 n=10+10) AppendSlice/4Bytes 3.71ns ± 1% 3.70ns ± 0% ~ (p=0.095 n=10+9) AppendSlice/7Bytes 3.73ns ± 0% 3.75ns ± 1% ~ (p=0.442 n=10+10) AppendSlice/8Bytes 4.00ns ± 1% 4.01ns ± 1% ~ (p=0.330 n=10+10) AppendSlice/15Bytes 4.29ns ± 1% 4.28ns ± 1% ~ (p=0.536 n=10+10) AppendSlice/16Bytes 4.28ns ± 1% 4.31ns ± 1% +0.75% (p=0.019 n=10+10) AppendSlice/32Bytes 4.57ns ± 2% 4.58ns ± 2% ~ (p=0.236 n=10+10) AppendSliceLarge/1024Bytes 305ns ± 2% 306ns ± 1% ~ (p=0.236 n=10+10) AppendSliceLarge/4096Bytes 1.06µs ± 1% 1.06µs ± 0% ~ (p=1.000 n=9+10) AppendSliceLarge/16384Bytes 3.12µs ± 2% 3.11µs ± 1% ~ (p=0.493 n=10+10) AppendSliceLarge/65536Bytes 5.61µs ± 5% 5.36µs ± 2% -4.58% (p=0.003 n=10+8) AppendSliceLarge/262144Bytes 21.0µs ± 1% 19.5µs ± 1% -7.09% (p=0.000 n=8+10) AppendSliceLarge/1048576Bytes 78.4µs ± 1% 78.7µs ± 2% ~ (p=0.315 n=8+10) AppendStr/1Bytes 3.96ns ± 6% 3.99ns ± 9% ~ (p=0.591 n=10+10) AppendStr/4Bytes 3.98ns ± 1% 3.99ns ± 1% ~ (p=0.515 n=9+9) AppendStr/8Bytes 4.27ns ± 1% 4.27ns ± 1% ~ (p=0.633 n=10+10) AppendStr/16Bytes 4.56ns ± 2% 4.55ns ± 1% ~ (p=0.869 n=10+10) AppendStr/32Bytes 4.85ns ± 1% 4.89ns ± 1% +0.71% (p=0.003 n=10+8) AppendSpecialCase 18.7ns ± 1% 18.7ns ± 1% ~ (p=0.144 n=10+10) AppendInPlace/NoGrow/Byte 438ns ± 1% 439ns ± 1% ~ (p=0.135 n=10+8) AppendInPlace/NoGrow/1Ptr 1.05µs ± 2% 1.05µs ± 1% ~ (p=0.469 n=10+10) AppendInPlace/NoGrow/2Ptr 1.77µs ± 1% 1.78µs ± 2% ~ (p=0.469 n=10+10) AppendInPlace/NoGrow/3Ptr 1.94µs ± 1% 1.93µs ± 2% ~ (p=0.517 n=10+10) AppendInPlace/NoGrow/4Ptr 3.18µs ± 1% 3.17µs ± 0% ~ (p=0.483 n=10+9) AppendInPlace/Grow/Byte 382ns ± 2% 383ns ± 2% ~ (p=0.705 n=9+10) AppendInPlace/Grow/1Ptr 383ns ± 1% 384ns ± 1% ~ (p=0.844 n=10+10) AppendInPlace/Grow/2Ptr 459ns ± 2% 467ns ± 2% +1.74% (p=0.001 n=10+10) AppendInPlace/Grow/3Ptr 593ns ± 1% 597ns ± 2% ~ (p=0.195 n=10+10) AppendInPlace/Grow/4Ptr 583ns ± 2% 589ns ± 2% ~ (p=0.084 n=10+10) Change-Id: I629872f065a22b29267c1adbfc578aaedd36d365 Reviewed-on: https://go-review.googlesource.com/115755 Run-TryBot: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-06-01 14:30:49 +02:00
memmove(p, old.array, lenmem)
return slice{p, old.len, newcap}
}
func isPowerOfTwo(x uintptr) bool {
return x&(x-1) == 0
}
func slicecopy(to, fm slice, width uintptr) int {
if fm.len == 0 || to.len == 0 {
return 0
}
n := fm.len
if to.len < n {
n = to.len
}
if width == 0 {
return n
}
if raceenabled {
callerpc := getcallerpc()
pc := funcPC(slicecopy)
racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc)
racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc)
}
if msanenabled {
msanwrite(to.array, uintptr(n*int(width)))
msanread(fm.array, uintptr(n*int(width)))
}
size := uintptr(n) * width
if size == 1 { // common case worth about 2x to do here
// TODO: is this still worth it with new memmove impl?
*(*byte)(to.array) = *(*byte)(fm.array) // known to be a byte pointer
} else {
memmove(to.array, fm.array, size)
}
return n
}
func slicestringcopy(to []byte, fm string) int {
if len(fm) == 0 || len(to) == 0 {
return 0
}
n := len(fm)
if len(to) < n {
n = len(to)
}
if raceenabled {
callerpc := getcallerpc()
pc := funcPC(slicestringcopy)
racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc)
}
if msanenabled {
msanwrite(unsafe.Pointer(&to[0]), uintptr(n))
}
memmove(unsafe.Pointer(&to[0]), stringStructOf(&fm).str, uintptr(n))
return n
}