mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
strings: use AVX2 for Index if available
IndexHard4-4 1.50ms ± 2% 0.71ms ± 0% -52.36% (p=0.000 n=20+19) This also fixes a bug, that caused a string of length 16 to use two 8-byte comparisons instead of one 16-byte. And adds a test for cases when partial_match fails. Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0 Reviewed-on: https://go-review.googlesource.com/22551 Run-TryBot: Ilya Tocar <ilya.tocar@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
83c73a85db
commit
0cff219c12
4 changed files with 99 additions and 5 deletions
|
|
@ -9,7 +9,17 @@ package bytes
|
||||||
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
||||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||||
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
||||||
const shortStringLen = 31
|
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
|
||||||
|
var shortStringLen int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if supportAVX2() {
|
||||||
|
shortStringLen = 63
|
||||||
|
} else {
|
||||||
|
shortStringLen = 31
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||||
func Index(s, sep []byte) int {
|
func Index(s, sep []byte) int {
|
||||||
|
|
|
||||||
|
|
@ -1695,6 +1695,16 @@ big_loop_avx2_exit:
|
||||||
JMP loop
|
JMP loop
|
||||||
|
|
||||||
|
|
||||||
|
TEXT strings·supportAVX2(SB),NOSPLIT,$0-1
|
||||||
|
MOVBLZX runtime·support_avx2(SB), AX
|
||||||
|
MOVB AX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT bytes·supportAVX2(SB),NOSPLIT,$0-1
|
||||||
|
MOVBLZX runtime·support_avx2(SB), AX
|
||||||
|
MOVB AX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
||||||
MOVQ s+0(FP), DI
|
MOVQ s+0(FP), DI
|
||||||
// We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
// We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
||||||
|
|
@ -1809,7 +1819,7 @@ loop8:
|
||||||
JB loop8
|
JB loop8
|
||||||
JMP fail
|
JMP fail
|
||||||
_9_or_more:
|
_9_or_more:
|
||||||
CMPQ AX, $16
|
CMPQ AX, $15
|
||||||
JA _16_or_more
|
JA _16_or_more
|
||||||
LEAQ 1(DI)(DX*1), DX
|
LEAQ 1(DI)(DX*1), DX
|
||||||
SUBQ AX, DX
|
SUBQ AX, DX
|
||||||
|
|
@ -1833,7 +1843,7 @@ partial_success9to15:
|
||||||
JMP fail
|
JMP fail
|
||||||
_16_or_more:
|
_16_or_more:
|
||||||
CMPQ AX, $16
|
CMPQ AX, $16
|
||||||
JA _17_to_31
|
JA _17_or_more
|
||||||
MOVOU (BP), X1
|
MOVOU (BP), X1
|
||||||
LEAQ -15(DI)(DX*1), DX
|
LEAQ -15(DI)(DX*1), DX
|
||||||
loop16:
|
loop16:
|
||||||
|
|
@ -1846,7 +1856,9 @@ loop16:
|
||||||
CMPQ DI,DX
|
CMPQ DI,DX
|
||||||
JB loop16
|
JB loop16
|
||||||
JMP fail
|
JMP fail
|
||||||
_17_to_31:
|
_17_or_more:
|
||||||
|
CMPQ AX, $31
|
||||||
|
JA _32_or_more
|
||||||
LEAQ 1(DI)(DX*1), DX
|
LEAQ 1(DI)(DX*1), DX
|
||||||
SUBQ AX, DX
|
SUBQ AX, DX
|
||||||
MOVOU -16(BP)(AX*1), X0
|
MOVOU -16(BP)(AX*1), X0
|
||||||
|
|
@ -1870,9 +1882,56 @@ partial_success17to31:
|
||||||
ADDQ $1,DI
|
ADDQ $1,DI
|
||||||
CMPQ DI,DX
|
CMPQ DI,DX
|
||||||
JB loop17to31
|
JB loop17to31
|
||||||
|
JMP fail
|
||||||
|
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
|
||||||
|
// So no need to check cpuid
|
||||||
|
_32_or_more:
|
||||||
|
CMPQ AX, $32
|
||||||
|
JA _33_to_63
|
||||||
|
VMOVDQU (BP), Y1
|
||||||
|
LEAQ -31(DI)(DX*1), DX
|
||||||
|
loop32:
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, SI
|
||||||
|
CMPL SI, $0xffffffff
|
||||||
|
JE success_avx2
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop32
|
||||||
|
JMP fail_avx2
|
||||||
|
_33_to_63:
|
||||||
|
LEAQ 1(DI)(DX*1), DX
|
||||||
|
SUBQ AX, DX
|
||||||
|
VMOVDQU -32(BP)(AX*1), Y0
|
||||||
|
VMOVDQU (BP), Y1
|
||||||
|
loop33to63:
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, SI
|
||||||
|
CMPL SI, $0xffffffff
|
||||||
|
JE partial_success33to63
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop33to63
|
||||||
|
JMP fail_avx2
|
||||||
|
partial_success33to63:
|
||||||
|
VMOVDQU -32(AX)(DI*1), Y3
|
||||||
|
VPCMPEQB Y0, Y3, Y4
|
||||||
|
VPMOVMSKB Y4, SI
|
||||||
|
CMPL SI, $0xffffffff
|
||||||
|
JE success_avx2
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop33to63
|
||||||
|
fail_avx2:
|
||||||
|
VZEROUPPER
|
||||||
fail:
|
fail:
|
||||||
MOVQ $-1, (R11)
|
MOVQ $-1, (R11)
|
||||||
RET
|
RET
|
||||||
|
success_avx2:
|
||||||
|
VZEROUPPER
|
||||||
|
JMP success
|
||||||
sse42:
|
sse42:
|
||||||
MOVL runtime·cpuid_ecx(SB), CX
|
MOVL runtime·cpuid_ecx(SB), CX
|
||||||
ANDL $0x100000, CX
|
ANDL $0x100000, CX
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,17 @@ package strings
|
||||||
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
||||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||||
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
|
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
|
||||||
const shortStringLen = 31
|
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
|
||||||
|
var shortStringLen int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if supportAVX2() {
|
||||||
|
shortStringLen = 63
|
||||||
|
} else {
|
||||||
|
shortStringLen = 31
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||||
func Index(s, sep string) int {
|
func Index(s, sep string) int {
|
||||||
|
|
|
||||||
|
|
@ -86,32 +86,44 @@ var indexTests = []IndexTest{
|
||||||
{"32145678", "01234567", -1},
|
{"32145678", "01234567", -1},
|
||||||
{"01234567", "01234567", 0},
|
{"01234567", "01234567", 0},
|
||||||
{"x01234567", "01234567", 1},
|
{"x01234567", "01234567", 1},
|
||||||
|
{"x0123456x01234567", "01234567", 9},
|
||||||
{"xx01234567"[:9], "01234567", -1},
|
{"xx01234567"[:9], "01234567", -1},
|
||||||
{"", "0123456789", -1},
|
{"", "0123456789", -1},
|
||||||
{"3214567844", "0123456789", -1},
|
{"3214567844", "0123456789", -1},
|
||||||
{"0123456789", "0123456789", 0},
|
{"0123456789", "0123456789", 0},
|
||||||
{"x0123456789", "0123456789", 1},
|
{"x0123456789", "0123456789", 1},
|
||||||
|
{"x012345678x0123456789", "0123456789", 11},
|
||||||
{"xyz0123456789"[:12], "0123456789", -1},
|
{"xyz0123456789"[:12], "0123456789", -1},
|
||||||
{"x01234567x89", "0123456789", -1},
|
{"x01234567x89", "0123456789", -1},
|
||||||
{"", "0123456789012345", -1},
|
{"", "0123456789012345", -1},
|
||||||
{"3214567889012345", "0123456789012345", -1},
|
{"3214567889012345", "0123456789012345", -1},
|
||||||
{"0123456789012345", "0123456789012345", 0},
|
{"0123456789012345", "0123456789012345", 0},
|
||||||
{"x0123456789012345", "0123456789012345", 1},
|
{"x0123456789012345", "0123456789012345", 1},
|
||||||
|
{"x012345678901234x0123456789012345", "0123456789012345", 17},
|
||||||
{"", "01234567890123456789", -1},
|
{"", "01234567890123456789", -1},
|
||||||
{"32145678890123456789", "01234567890123456789", -1},
|
{"32145678890123456789", "01234567890123456789", -1},
|
||||||
{"01234567890123456789", "01234567890123456789", 0},
|
{"01234567890123456789", "01234567890123456789", 0},
|
||||||
{"x01234567890123456789", "01234567890123456789", 1},
|
{"x01234567890123456789", "01234567890123456789", 1},
|
||||||
|
{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
|
||||||
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
|
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
|
||||||
{"", "0123456789012345678901234567890", -1},
|
{"", "0123456789012345678901234567890", -1},
|
||||||
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
|
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
|
||||||
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
|
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
|
||||||
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
|
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
|
||||||
|
{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
|
||||||
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
|
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
|
||||||
{"", "01234567890123456789012345678901", -1},
|
{"", "01234567890123456789012345678901", -1},
|
||||||
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
|
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
|
||||||
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
|
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
|
||||||
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
|
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
|
||||||
|
{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
|
||||||
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
|
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
|
||||||
|
{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
|
||||||
|
{"", "0123456789012345678901234567890123456789", -1},
|
||||||
|
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
|
||||||
|
{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
|
||||||
|
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
|
||||||
|
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
|
||||||
}
|
}
|
||||||
|
|
||||||
var lastIndexTests = []IndexTest{
|
var lastIndexTests = []IndexTest{
|
||||||
|
|
@ -1315,6 +1327,9 @@ func benchmarkCountHard(b *testing.B, sep string) {
|
||||||
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
|
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
|
||||||
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
|
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
|
||||||
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
|
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
|
||||||
|
func BenchmarkIndexHard4(b *testing.B) {
|
||||||
|
benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
|
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
|
||||||
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
|
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue