mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
strings: use AVX2 for Index if available
IndexHard4-4 1.50ms ± 2% 0.71ms ± 0% -52.36% (p=0.000 n=20+19) This also fixes a bug, that caused a string of length 16 to use two 8-byte comparisons instead of one 16-byte. And adds a test for cases when partial_match fails. Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0 Reviewed-on: https://go-review.googlesource.com/22551 Run-TryBot: Ilya Tocar <ilya.tocar@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
83c73a85db
commit
0cff219c12
4 changed files with 99 additions and 5 deletions
|
|
@ -9,7 +9,17 @@ package bytes
|
|||
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
||||
const shortStringLen = 31
|
||||
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||
|
||||
var shortStringLen int
|
||||
|
||||
func init() {
|
||||
if supportAVX2() {
|
||||
shortStringLen = 63
|
||||
} else {
|
||||
shortStringLen = 31
|
||||
}
|
||||
}
|
||||
|
||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||
func Index(s, sep []byte) int {
|
||||
|
|
|
|||
|
|
@ -1695,6 +1695,16 @@ big_loop_avx2_exit:
|
|||
JMP loop
|
||||
|
||||
|
||||
TEXT strings·supportAVX2(SB),NOSPLIT,$0-1
|
||||
MOVBLZX runtime·support_avx2(SB), AX
|
||||
MOVB AX, ret+0(FP)
|
||||
RET
|
||||
|
||||
TEXT bytes·supportAVX2(SB),NOSPLIT,$0-1
|
||||
MOVBLZX runtime·support_avx2(SB), AX
|
||||
MOVB AX, ret+0(FP)
|
||||
RET
|
||||
|
||||
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
||||
MOVQ s+0(FP), DI
|
||||
// We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
||||
|
|
@ -1809,7 +1819,7 @@ loop8:
|
|||
JB loop8
|
||||
JMP fail
|
||||
_9_or_more:
|
||||
CMPQ AX, $16
|
||||
CMPQ AX, $15
|
||||
JA _16_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
|
|
@ -1833,7 +1843,7 @@ partial_success9to15:
|
|||
JMP fail
|
||||
_16_or_more:
|
||||
CMPQ AX, $16
|
||||
JA _17_to_31
|
||||
JA _17_or_more
|
||||
MOVOU (BP), X1
|
||||
LEAQ -15(DI)(DX*1), DX
|
||||
loop16:
|
||||
|
|
@ -1846,7 +1856,9 @@ loop16:
|
|||
CMPQ DI,DX
|
||||
JB loop16
|
||||
JMP fail
|
||||
_17_to_31:
|
||||
_17_or_more:
|
||||
CMPQ AX, $31
|
||||
JA _32_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVOU -16(BP)(AX*1), X0
|
||||
|
|
@ -1870,9 +1882,56 @@ partial_success17to31:
|
|||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop17to31
|
||||
JMP fail
|
||||
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
|
||||
// So no need to check cpuid
|
||||
_32_or_more:
|
||||
CMPQ AX, $32
|
||||
JA _33_to_63
|
||||
VMOVDQU (BP), Y1
|
||||
LEAQ -31(DI)(DX*1), DX
|
||||
loop32:
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPMOVMSKB Y3, SI
|
||||
CMPL SI, $0xffffffff
|
||||
JE success_avx2
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop32
|
||||
JMP fail_avx2
|
||||
_33_to_63:
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
VMOVDQU -32(BP)(AX*1), Y0
|
||||
VMOVDQU (BP), Y1
|
||||
loop33to63:
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPMOVMSKB Y3, SI
|
||||
CMPL SI, $0xffffffff
|
||||
JE partial_success33to63
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop33to63
|
||||
JMP fail_avx2
|
||||
partial_success33to63:
|
||||
VMOVDQU -32(AX)(DI*1), Y3
|
||||
VPCMPEQB Y0, Y3, Y4
|
||||
VPMOVMSKB Y4, SI
|
||||
CMPL SI, $0xffffffff
|
||||
JE success_avx2
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop33to63
|
||||
fail_avx2:
|
||||
VZEROUPPER
|
||||
fail:
|
||||
MOVQ $-1, (R11)
|
||||
RET
|
||||
success_avx2:
|
||||
VZEROUPPER
|
||||
JMP success
|
||||
sse42:
|
||||
MOVL runtime·cpuid_ecx(SB), CX
|
||||
ANDL $0x100000, CX
|
||||
|
|
|
|||
|
|
@ -9,7 +9,17 @@ package strings
|
|||
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
|
||||
const shortStringLen = 31
|
||||
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||
|
||||
var shortStringLen int
|
||||
|
||||
func init() {
|
||||
if supportAVX2() {
|
||||
shortStringLen = 63
|
||||
} else {
|
||||
shortStringLen = 31
|
||||
}
|
||||
}
|
||||
|
||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||
func Index(s, sep string) int {
|
||||
|
|
|
|||
|
|
@ -86,32 +86,44 @@ var indexTests = []IndexTest{
|
|||
{"32145678", "01234567", -1},
|
||||
{"01234567", "01234567", 0},
|
||||
{"x01234567", "01234567", 1},
|
||||
{"x0123456x01234567", "01234567", 9},
|
||||
{"xx01234567"[:9], "01234567", -1},
|
||||
{"", "0123456789", -1},
|
||||
{"3214567844", "0123456789", -1},
|
||||
{"0123456789", "0123456789", 0},
|
||||
{"x0123456789", "0123456789", 1},
|
||||
{"x012345678x0123456789", "0123456789", 11},
|
||||
{"xyz0123456789"[:12], "0123456789", -1},
|
||||
{"x01234567x89", "0123456789", -1},
|
||||
{"", "0123456789012345", -1},
|
||||
{"3214567889012345", "0123456789012345", -1},
|
||||
{"0123456789012345", "0123456789012345", 0},
|
||||
{"x0123456789012345", "0123456789012345", 1},
|
||||
{"x012345678901234x0123456789012345", "0123456789012345", 17},
|
||||
{"", "01234567890123456789", -1},
|
||||
{"32145678890123456789", "01234567890123456789", -1},
|
||||
{"01234567890123456789", "01234567890123456789", 0},
|
||||
{"x01234567890123456789", "01234567890123456789", 1},
|
||||
{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
|
||||
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
|
||||
{"", "0123456789012345678901234567890", -1},
|
||||
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
|
||||
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
|
||||
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
|
||||
{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
|
||||
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
|
||||
{"", "01234567890123456789012345678901", -1},
|
||||
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
|
||||
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
|
||||
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
|
||||
{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
|
||||
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
|
||||
{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
|
||||
{"", "0123456789012345678901234567890123456789", -1},
|
||||
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
|
||||
{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
|
||||
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
|
||||
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
|
||||
}
|
||||
|
||||
var lastIndexTests = []IndexTest{
|
||||
|
|
@ -1315,6 +1327,9 @@ func benchmarkCountHard(b *testing.B, sep string) {
|
|||
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
|
||||
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
|
||||
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
|
||||
func BenchmarkIndexHard4(b *testing.B) {
|
||||
benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
|
||||
}
|
||||
|
||||
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
|
||||
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue