mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
strings: optimize Count for amd64
Move optimized Count implementation from bytes to runtime. Use in both bytes and strings packages. Add CountByte benchmark to strings. Strings benchmarks: name old time/op new time/op delta CountHard1-4 226µs ± 1% 226µs ± 2% ~ (p=0.247 n=10+10) CountHard2-4 316µs ± 1% 315µs ± 0% ~ (p=0.133 n=9+10) CountHard3-4 919µs ± 1% 920µs ± 1% ~ (p=0.968 n=10+9) CountTorture-4 15.4µs ± 1% 15.7µs ± 1% +2.47% (p=0.000 n=10+9) CountTortureOverlapping-4 9.60ms ± 0% 9.65ms ± 1% ~ (p=0.247 n=10+10) CountByte/10-4 26.3ns ± 1% 10.9ns ± 1% -58.71% (p=0.000 n=9+9) CountByte/32-4 42.7ns ± 0% 14.2ns ± 0% -66.64% (p=0.000 n=10+10) CountByte/4096-4 3.07µs ± 0% 0.31µs ± 2% -89.99% (p=0.000 n=9+10) CountByte/4194304-4 3.48ms ± 1% 0.34ms ± 1% -90.09% (p=0.000 n=10+9) CountByte/67108864-4 55.6ms ± 1% 7.0ms ± 0% -87.49% (p=0.000 n=9+8) name old speed new speed delta CountByte/10-4 380MB/s ± 1% 919MB/s ± 1% +142.21% (p=0.000 n=9+9) CountByte/32-4 750MB/s ± 0% 2247MB/s ± 0% +199.62% (p=0.000 n=10+10) CountByte/4096-4 1.33GB/s ± 0% 13.32GB/s ± 2% +898.13% (p=0.000 n=9+10) CountByte/4194304-4 1.21GB/s ± 1% 12.17GB/s ± 1% +908.87% (p=0.000 n=10+9) CountByte/67108864-4 1.21GB/s ± 1% 9.65GB/s ± 0% +699.29% (p=0.000 n=9+8) Fixes #19411 Change-Id: I8d2d409f0fa6df6d03b60790aa86e540b4a4e3b0 Reviewed-on: https://go-review.googlesource.com/38693 Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
03d1aa6024
commit
d206af1e6c
9 changed files with 247 additions and 194 deletions
|
|
@ -11,6 +11,7 @@ package bytes
|
||||||
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
||||||
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||||
func supportPOPCNT() bool // ../runtime/asm_$GOARCH.s
|
func supportPOPCNT() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
func countByte(s []byte, c byte) int // ../runtime/asm_$GOARCH.s
|
||||||
|
|
||||||
var shortStringLen int
|
var shortStringLen int
|
||||||
|
|
||||||
|
|
@ -95,9 +96,6 @@ func Index(s, sep []byte) int {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special case for when we must count occurrences of a single byte.
|
|
||||||
func countByte(s []byte, c byte) int
|
|
||||||
|
|
||||||
// Count counts the number of non-overlapping instances of sep in s.
|
// Count counts the number of non-overlapping instances of sep in s.
|
||||||
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
|
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
|
||||||
func Count(s, sep []byte) int {
|
func Count(s, sep []byte) int {
|
||||||
|
|
|
||||||
|
|
@ -1,183 +0,0 @@
|
||||||
// Copyright 2017 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
#include "textflag.h"
|
|
||||||
|
|
||||||
// We use:
|
|
||||||
// SI: data
|
|
||||||
// BX: data len
|
|
||||||
// AL: byte sought
|
|
||||||
// This requires the POPCNT instruction
|
|
||||||
TEXT ·countByte(SB),NOSPLIT,$0-40
|
|
||||||
MOVQ s+0(FP), SI
|
|
||||||
MOVQ s_len+8(FP), BX
|
|
||||||
MOVB c+24(FP), AL
|
|
||||||
|
|
||||||
// Shuffle X0 around so that each byte contains
|
|
||||||
// the character we're looking for.
|
|
||||||
MOVD AX, X0
|
|
||||||
PUNPCKLBW X0, X0
|
|
||||||
PUNPCKLBW X0, X0
|
|
||||||
PSHUFL $0, X0, X0
|
|
||||||
|
|
||||||
CMPQ BX, $16
|
|
||||||
JLT small
|
|
||||||
|
|
||||||
MOVQ $0, R12 // Accumulator
|
|
||||||
|
|
||||||
MOVQ SI, DI
|
|
||||||
|
|
||||||
CMPQ BX, $32
|
|
||||||
JA avx2
|
|
||||||
sse:
|
|
||||||
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
|
|
||||||
JMP sseloopentry
|
|
||||||
|
|
||||||
sseloop:
|
|
||||||
// Move the next 16-byte chunk of the data into X1.
|
|
||||||
MOVOU (DI), X1
|
|
||||||
// Compare bytes in X0 to X1.
|
|
||||||
PCMPEQB X0, X1
|
|
||||||
// Take the top bit of each byte in X1 and put the result in DX.
|
|
||||||
PMOVMSKB X1, DX
|
|
||||||
// Count number of matching bytes
|
|
||||||
POPCNTL DX, DX
|
|
||||||
// Accumulate into R12
|
|
||||||
ADDQ DX, R12
|
|
||||||
// Advance to next block.
|
|
||||||
ADDQ $16, DI
|
|
||||||
sseloopentry:
|
|
||||||
CMPQ DI, AX
|
|
||||||
JBE sseloop
|
|
||||||
|
|
||||||
// Get the number of bytes to consider in the last 16 bytes
|
|
||||||
ANDQ $15, BX
|
|
||||||
JZ end
|
|
||||||
|
|
||||||
// Create mask to ignore overlap between previous 16 byte block
|
|
||||||
// and the next.
|
|
||||||
MOVQ $16,CX
|
|
||||||
SUBQ BX, CX
|
|
||||||
MOVQ $0xFFFF, R10
|
|
||||||
SARQ CL, R10
|
|
||||||
SALQ CL, R10
|
|
||||||
|
|
||||||
// Process the last 16-byte chunk. This chunk may overlap with the
|
|
||||||
// chunks we've already searched so we need to mask part of it.
|
|
||||||
MOVOU (AX), X1
|
|
||||||
PCMPEQB X0, X1
|
|
||||||
PMOVMSKB X1, DX
|
|
||||||
// Apply mask
|
|
||||||
ANDQ R10, DX
|
|
||||||
POPCNTL DX, DX
|
|
||||||
ADDQ DX, R12
|
|
||||||
end:
|
|
||||||
MOVQ R12, ret+32(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// handle for lengths < 16
|
|
||||||
small:
|
|
||||||
TESTQ BX, BX
|
|
||||||
JEQ endzero
|
|
||||||
|
|
||||||
// Check if we'll load across a page boundary.
|
|
||||||
LEAQ 16(SI), AX
|
|
||||||
TESTW $0xff0, AX
|
|
||||||
JEQ endofpage
|
|
||||||
|
|
||||||
// We must ignore high bytes as they aren't part of our slice.
|
|
||||||
// Create mask.
|
|
||||||
MOVB BX, CX
|
|
||||||
MOVQ $1, R10
|
|
||||||
SALQ CL, R10
|
|
||||||
SUBQ $1, R10
|
|
||||||
|
|
||||||
// Load data
|
|
||||||
MOVOU (SI), X1
|
|
||||||
// Compare target byte with each byte in data.
|
|
||||||
PCMPEQB X0, X1
|
|
||||||
// Move result bits to integer register.
|
|
||||||
PMOVMSKB X1, DX
|
|
||||||
// Apply mask
|
|
||||||
ANDQ R10, DX
|
|
||||||
POPCNTL DX, DX
|
|
||||||
// Directly return DX, we don't need to accumulate
|
|
||||||
// since we have <16 bytes.
|
|
||||||
MOVQ DX, ret+32(FP)
|
|
||||||
RET
|
|
||||||
endzero:
|
|
||||||
MOVQ $0, ret+32(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
endofpage:
|
|
||||||
// We must ignore low bytes as they aren't part of our slice.
|
|
||||||
MOVQ $16,CX
|
|
||||||
SUBQ BX, CX
|
|
||||||
MOVQ $0xFFFF, R10
|
|
||||||
SARQ CL, R10
|
|
||||||
SALQ CL, R10
|
|
||||||
|
|
||||||
// Load data into the high end of X1.
|
|
||||||
MOVOU -16(SI)(BX*1), X1
|
|
||||||
// Compare target byte with each byte in data.
|
|
||||||
PCMPEQB X0, X1
|
|
||||||
// Move result bits to integer register.
|
|
||||||
PMOVMSKB X1, DX
|
|
||||||
// Apply mask
|
|
||||||
ANDQ R10, DX
|
|
||||||
// Directly return DX, we don't need to accumulate
|
|
||||||
// since we have <16 bytes.
|
|
||||||
POPCNTL DX, DX
|
|
||||||
MOVQ DX, ret+32(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
avx2:
|
|
||||||
CMPB runtime·support_avx2(SB), $1
|
|
||||||
JNE sse
|
|
||||||
MOVD AX, X0
|
|
||||||
LEAQ -32(SI)(BX*1), R11
|
|
||||||
VPBROADCASTB X0, Y1
|
|
||||||
avx2_loop:
|
|
||||||
VMOVDQU (DI), Y2
|
|
||||||
VPCMPEQB Y1, Y2, Y3
|
|
||||||
VPMOVMSKB Y3, DX
|
|
||||||
POPCNTL DX, DX
|
|
||||||
ADDQ DX, R12
|
|
||||||
ADDQ $32, DI
|
|
||||||
CMPQ DI, R11
|
|
||||||
JLE avx2_loop
|
|
||||||
|
|
||||||
// If last block is already processed,
|
|
||||||
// skip to the end.
|
|
||||||
CMPQ DI, R11
|
|
||||||
JEQ endavx
|
|
||||||
|
|
||||||
// Load address of the last 32 bytes.
|
|
||||||
// There is an overlap with the previous block.
|
|
||||||
MOVQ R11, DI
|
|
||||||
VMOVDQU (DI), Y2
|
|
||||||
VPCMPEQB Y1, Y2, Y3
|
|
||||||
VPMOVMSKB Y3, DX
|
|
||||||
// Exit AVX mode.
|
|
||||||
VZEROUPPER
|
|
||||||
|
|
||||||
// Create mask to ignore overlap between previous 32 byte block
|
|
||||||
// and the next.
|
|
||||||
ANDQ $31, BX
|
|
||||||
MOVQ $32,CX
|
|
||||||
SUBQ BX, CX
|
|
||||||
MOVQ $0xFFFFFFFF, R10
|
|
||||||
SARQ CL, R10
|
|
||||||
SALQ CL, R10
|
|
||||||
// Apply mask
|
|
||||||
ANDQ R10, DX
|
|
||||||
POPCNTL DX, DX
|
|
||||||
ADDQ DX, R12
|
|
||||||
MOVQ R12, ret+32(FP)
|
|
||||||
RET
|
|
||||||
endavx:
|
|
||||||
// Exit AVX mode.
|
|
||||||
VZEROUPPER
|
|
||||||
MOVQ R12, ret+32(FP)
|
|
||||||
RET
|
|
||||||
|
|
@ -17,7 +17,10 @@ runtime/asm_amd64.s: [GOARCH] cannot check cross-package assembly function: Comp
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package strings
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package strings
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package bytes
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package bytes
|
||||||
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportPOPCNT is in package strings
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportPOPCNT is in package bytes
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportPOPCNT is in package bytes
|
||||||
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: countByte is in package strings
|
||||||
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: countByte is in package bytes
|
||||||
|
|
||||||
// Intentionally missing declarations. These are special assembly routines.
|
// Intentionally missing declarations. These are special assembly routines.
|
||||||
// Some are jumped into from other routines, with values in specific registers.
|
// Some are jumped into from other routines, with values in specific registers.
|
||||||
|
|
|
||||||
|
|
@ -1702,6 +1702,11 @@ TEXT bytes·supportAVX2(SB),NOSPLIT,$0-1
|
||||||
MOVB AX, ret+0(FP)
|
MOVB AX, ret+0(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT strings·supportPOPCNT(SB),NOSPLIT,$0-1
|
||||||
|
MOVBLZX runtime·support_popcnt(SB), AX
|
||||||
|
MOVB AX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT bytes·supportPOPCNT(SB),NOSPLIT,$0-1
|
TEXT bytes·supportPOPCNT(SB),NOSPLIT,$0-1
|
||||||
MOVBLZX runtime·support_popcnt(SB), AX
|
MOVBLZX runtime·support_popcnt(SB), AX
|
||||||
MOVB AX, ret+0(FP)
|
MOVB AX, ret+0(FP)
|
||||||
|
|
@ -2131,6 +2136,196 @@ eqret:
|
||||||
MOVB $0, ret+48(FP)
|
MOVB $0, ret+48(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
|
||||||
|
TEXT bytes·countByte(SB),NOSPLIT,$0-40
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), BX
|
||||||
|
MOVB c+24(FP), AL
|
||||||
|
LEAQ ret+32(FP), R8
|
||||||
|
JMP runtime·countByte(SB)
|
||||||
|
|
||||||
|
TEXT strings·countByte(SB),NOSPLIT,$0-32
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), BX
|
||||||
|
MOVB c+16(FP), AL
|
||||||
|
LEAQ ret+24(FP), R8
|
||||||
|
JMP runtime·countByte(SB)
|
||||||
|
|
||||||
|
// input:
|
||||||
|
// SI: data
|
||||||
|
// BX: data len
|
||||||
|
// AL: byte sought
|
||||||
|
// R8: address to put result
|
||||||
|
// This requires the POPCNT instruction
|
||||||
|
TEXT runtime·countByte(SB),NOSPLIT,$0
|
||||||
|
// Shuffle X0 around so that each byte contains
|
||||||
|
// the character we're looking for.
|
||||||
|
MOVD AX, X0
|
||||||
|
PUNPCKLBW X0, X0
|
||||||
|
PUNPCKLBW X0, X0
|
||||||
|
PSHUFL $0, X0, X0
|
||||||
|
|
||||||
|
CMPQ BX, $16
|
||||||
|
JLT small
|
||||||
|
|
||||||
|
MOVQ $0, R12 // Accumulator
|
||||||
|
|
||||||
|
MOVQ SI, DI
|
||||||
|
|
||||||
|
CMPQ BX, $32
|
||||||
|
JA avx2
|
||||||
|
sse:
|
||||||
|
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
|
||||||
|
JMP sseloopentry
|
||||||
|
|
||||||
|
sseloop:
|
||||||
|
// Move the next 16-byte chunk of the data into X1.
|
||||||
|
MOVOU (DI), X1
|
||||||
|
// Compare bytes in X0 to X1.
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
// Take the top bit of each byte in X1 and put the result in DX.
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Count number of matching bytes
|
||||||
|
POPCNTL DX, DX
|
||||||
|
// Accumulate into R12
|
||||||
|
ADDQ DX, R12
|
||||||
|
// Advance to next block.
|
||||||
|
ADDQ $16, DI
|
||||||
|
sseloopentry:
|
||||||
|
CMPQ DI, AX
|
||||||
|
JBE sseloop
|
||||||
|
|
||||||
|
// Get the number of bytes to consider in the last 16 bytes
|
||||||
|
ANDQ $15, BX
|
||||||
|
JZ end
|
||||||
|
|
||||||
|
// Create mask to ignore overlap between previous 16 byte block
|
||||||
|
// and the next.
|
||||||
|
MOVQ $16,CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ $0xFFFF, R10
|
||||||
|
SARQ CL, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
|
||||||
|
// Process the last 16-byte chunk. This chunk may overlap with the
|
||||||
|
// chunks we've already searched so we need to mask part of it.
|
||||||
|
MOVOU (AX), X1
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
ADDQ DX, R12
|
||||||
|
end:
|
||||||
|
MOVQ R12, (R8)
|
||||||
|
RET
|
||||||
|
|
||||||
|
// handle for lengths < 16
|
||||||
|
small:
|
||||||
|
TESTQ BX, BX
|
||||||
|
JEQ endzero
|
||||||
|
|
||||||
|
// Check if we'll load across a page boundary.
|
||||||
|
LEAQ 16(SI), AX
|
||||||
|
TESTW $0xff0, AX
|
||||||
|
JEQ endofpage
|
||||||
|
|
||||||
|
// We must ignore high bytes as they aren't part of our slice.
|
||||||
|
// Create mask.
|
||||||
|
MOVB BX, CX
|
||||||
|
MOVQ $1, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
SUBQ $1, R10
|
||||||
|
|
||||||
|
// Load data
|
||||||
|
MOVOU (SI), X1
|
||||||
|
// Compare target byte with each byte in data.
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
// Move result bits to integer register.
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
// Directly return DX, we don't need to accumulate
|
||||||
|
// since we have <16 bytes.
|
||||||
|
MOVQ DX, (R8)
|
||||||
|
RET
|
||||||
|
endzero:
|
||||||
|
MOVQ $0, (R8)
|
||||||
|
RET
|
||||||
|
|
||||||
|
endofpage:
|
||||||
|
// We must ignore low bytes as they aren't part of our slice.
|
||||||
|
MOVQ $16,CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ $0xFFFF, R10
|
||||||
|
SARQ CL, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
|
||||||
|
// Load data into the high end of X1.
|
||||||
|
MOVOU -16(SI)(BX*1), X1
|
||||||
|
// Compare target byte with each byte in data.
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
// Move result bits to integer register.
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
// Directly return DX, we don't need to accumulate
|
||||||
|
// since we have <16 bytes.
|
||||||
|
POPCNTL DX, DX
|
||||||
|
MOVQ DX, (R8)
|
||||||
|
RET
|
||||||
|
|
||||||
|
avx2:
|
||||||
|
CMPB runtime·support_avx2(SB), $1
|
||||||
|
JNE sse
|
||||||
|
MOVD AX, X0
|
||||||
|
LEAQ -32(SI)(BX*1), R11
|
||||||
|
VPBROADCASTB X0, Y1
|
||||||
|
avx2_loop:
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
ADDQ DX, R12
|
||||||
|
ADDQ $32, DI
|
||||||
|
CMPQ DI, R11
|
||||||
|
JLE avx2_loop
|
||||||
|
|
||||||
|
// If last block is already processed,
|
||||||
|
// skip to the end.
|
||||||
|
CMPQ DI, R11
|
||||||
|
JEQ endavx
|
||||||
|
|
||||||
|
// Load address of the last 32 bytes.
|
||||||
|
// There is an overlap with the previous block.
|
||||||
|
MOVQ R11, DI
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, DX
|
||||||
|
// Exit AVX mode.
|
||||||
|
VZEROUPPER
|
||||||
|
|
||||||
|
// Create mask to ignore overlap between previous 32 byte block
|
||||||
|
// and the next.
|
||||||
|
ANDQ $31, BX
|
||||||
|
MOVQ $32,CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ $0xFFFFFFFF, R10
|
||||||
|
SARQ CL, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
ADDQ DX, R12
|
||||||
|
MOVQ R12, (R8)
|
||||||
|
RET
|
||||||
|
endavx:
|
||||||
|
// Exit AVX mode.
|
||||||
|
VZEROUPPER
|
||||||
|
MOVQ R12, (R8)
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT runtime·return0(SB), NOSPLIT, $0
|
TEXT runtime·return0(SB), NOSPLIT, $0
|
||||||
MOVL $0, AX
|
MOVL $0, AX
|
||||||
RET
|
RET
|
||||||
|
|
|
||||||
|
|
@ -72,9 +72,8 @@ func hashStrRev(sep string) (uint32, uint32) {
|
||||||
return hash, pow
|
return hash, pow
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count counts the number of non-overlapping instances of substr in s.
|
// countGeneric implements Count.
|
||||||
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
|
func countGeneric(s, substr string) int {
|
||||||
func Count(s, substr string) int {
|
|
||||||
// special case
|
// special case
|
||||||
if len(substr) == 0 {
|
if len(substr) == 0 {
|
||||||
return utf8.RuneCountInString(s) + 1
|
return utf8.RuneCountInString(s) + 1
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ package strings
|
||||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||||
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
|
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
|
||||||
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
func supportPOPCNT() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
func countByte(s string, c byte) int // ../runtime/asm_$GOARCH.s
|
||||||
|
|
||||||
var shortStringLen int
|
var shortStringLen int
|
||||||
|
|
||||||
|
|
@ -93,3 +95,12 @@ func Index(s, substr string) int {
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count counts the number of non-overlapping instances of substr in s.
|
||||||
|
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
|
||||||
|
func Count(s, substr string) int {
|
||||||
|
if len(substr) == 1 && supportPOPCNT() {
|
||||||
|
return countByte(s, byte(substr[0]))
|
||||||
|
}
|
||||||
|
return countGeneric(s, substr)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,3 +45,9 @@ func Index(s, substr string) int {
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count counts the number of non-overlapping instances of substr in s.
|
||||||
|
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
|
||||||
|
func Count(s, substr string) int {
|
||||||
|
return countGeneric(s, substr)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -96,3 +96,9 @@ func Index(s, substr string) int {
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count counts the number of non-overlapping instances of substr in s.
|
||||||
|
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
|
||||||
|
func Count(s, substr string) int {
|
||||||
|
return countGeneric(s, substr)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1457,6 +1457,24 @@ func BenchmarkCountTortureOverlapping(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkCountByte(b *testing.B) {
|
||||||
|
indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
|
||||||
|
benchStr := Repeat(benchmarkString,
|
||||||
|
(indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
|
||||||
|
benchFunc := func(b *testing.B, benchStr string) {
|
||||||
|
b.SetBytes(int64(len(benchStr)))
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
Count(benchStr, "=")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, size := range indexSizes {
|
||||||
|
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
|
||||||
|
benchFunc(b, benchStr[:size])
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
var makeFieldsInput = func() string {
|
var makeFieldsInput = func() string {
|
||||||
x := make([]byte, 1<<20)
|
x := make([]byte, 1<<20)
|
||||||
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
|
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue