mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
strings: add asm version of Index() for short strings on amd64
Currently we have special case for 1-byte strings, This extends this to strings shorter than 32 bytes on amd64. Results (broadwell): name old time/op new time/op delta IndexRune-4 57.4ns ± 0% 57.5ns ± 0% +0.10% (p=0.000 n=20+19) IndexRuneFastPath-4 20.4ns ± 0% 20.4ns ± 0% ~ (all samples are equal) Index-4 21.0ns ± 0% 21.8ns ± 0% +3.81% (p=0.000 n=20+20) LastIndex-4 7.07ns ± 1% 6.98ns ± 0% -1.21% (p=0.000 n=20+16) IndexByte-4 18.3ns ± 0% 18.3ns ± 0% ~ (all samples are equal) IndexHard1-4 1.46ms ± 0% 0.39ms ± 0% -73.06% (p=0.000 n=16+16) IndexHard2-4 1.46ms ± 0% 0.30ms ± 0% -79.55% (p=0.000 n=18+18) IndexHard3-4 1.46ms ± 0% 0.66ms ± 0% -54.68% (p=0.000 n=19+19) LastIndexHard1-4 1.46ms ± 0% 1.46ms ± 0% -0.01% (p=0.036 n=18+20) LastIndexHard2-4 1.46ms ± 0% 1.46ms ± 0% ~ (p=0.588 n=19+19) LastIndexHard3-4 1.46ms ± 0% 1.46ms ± 0% ~ (p=0.283 n=17+20) IndexTorture-4 11.1µs ± 0% 11.1µs ± 0% +0.01% (p=0.000 n=18+17) Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5 Reviewed-on: https://go-review.googlesource.com/16430 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
This commit is contained in:
parent
1870572180
commit
95333aea53
5 changed files with 311 additions and 37 deletions
|
|
@ -1725,6 +1725,168 @@ big_loop_avx2_exit:
|
||||||
JMP loop
|
JMP loop
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Also use this in bytes.Index
|
||||||
|
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
||||||
|
MOVQ s+0(FP), DI
|
||||||
|
MOVQ s_len+8(FP), CX
|
||||||
|
MOVQ c+16(FP), AX
|
||||||
|
MOVQ c_len+24(FP), BX
|
||||||
|
CMPQ BX, CX
|
||||||
|
JA fail
|
||||||
|
CMPQ BX, $2
|
||||||
|
JA _3_or_more
|
||||||
|
MOVW (AX), AX
|
||||||
|
LEAQ -1(DI)(CX*1), CX
|
||||||
|
loop2:
|
||||||
|
MOVW (DI), SI
|
||||||
|
CMPW SI,AX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop2
|
||||||
|
JMP fail
|
||||||
|
_3_or_more:
|
||||||
|
CMPQ BX, $3
|
||||||
|
JA _4_or_more
|
||||||
|
MOVW 1(AX), DX
|
||||||
|
MOVW (AX), AX
|
||||||
|
LEAQ -2(DI)(CX*1), CX
|
||||||
|
loop3:
|
||||||
|
MOVW (DI), SI
|
||||||
|
CMPW SI,AX
|
||||||
|
JZ partial_success3
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop3
|
||||||
|
JMP fail
|
||||||
|
partial_success3:
|
||||||
|
MOVW 1(DI), SI
|
||||||
|
CMPW SI,DX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop3
|
||||||
|
JMP fail
|
||||||
|
_4_or_more:
|
||||||
|
CMPQ BX, $4
|
||||||
|
JA _5_or_more
|
||||||
|
MOVL (AX), AX
|
||||||
|
LEAQ -3(DI)(CX*1), CX
|
||||||
|
loop4:
|
||||||
|
MOVL (DI), SI
|
||||||
|
CMPL SI,AX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop4
|
||||||
|
JMP fail
|
||||||
|
_5_or_more:
|
||||||
|
CMPQ BX, $7
|
||||||
|
JA _8_or_more
|
||||||
|
LEAQ 1(DI)(CX*1), CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVL -4(AX)(BX*1), DX
|
||||||
|
MOVL (AX), AX
|
||||||
|
loop5to7:
|
||||||
|
MOVL (DI), SI
|
||||||
|
CMPL SI,AX
|
||||||
|
JZ partial_success5to7
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop5to7
|
||||||
|
JMP fail
|
||||||
|
partial_success5to7:
|
||||||
|
MOVL -4(BX)(DI*1), SI
|
||||||
|
CMPL SI,DX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop5to7
|
||||||
|
JMP fail
|
||||||
|
_8_or_more:
|
||||||
|
CMPQ BX, $8
|
||||||
|
JA _9_or_more
|
||||||
|
MOVQ (AX), AX
|
||||||
|
LEAQ -7(DI)(CX*1), CX
|
||||||
|
loop8:
|
||||||
|
MOVQ (DI), SI
|
||||||
|
CMPQ SI,AX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop8
|
||||||
|
JMP fail
|
||||||
|
_9_or_more:
|
||||||
|
CMPQ BX, $16
|
||||||
|
JA _16_or_more
|
||||||
|
LEAQ 1(DI)(CX*1), CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ -8(AX)(BX*1), DX
|
||||||
|
MOVQ (AX), AX
|
||||||
|
loop9to15:
|
||||||
|
MOVQ (DI), SI
|
||||||
|
CMPQ SI,AX
|
||||||
|
JZ partial_success9to15
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop9to15
|
||||||
|
JMP fail
|
||||||
|
partial_success9to15:
|
||||||
|
MOVQ -8(BX)(DI*1), SI
|
||||||
|
CMPQ SI,DX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop9to15
|
||||||
|
JMP fail
|
||||||
|
_16_or_more:
|
||||||
|
CMPQ BX, $16
|
||||||
|
JA _17_to_31
|
||||||
|
MOVOU (AX), X1
|
||||||
|
LEAQ -15(DI)(CX*1), CX
|
||||||
|
loop16:
|
||||||
|
MOVOU (DI), X2
|
||||||
|
PCMPEQB X1, X2
|
||||||
|
PMOVMSKB X2, SI
|
||||||
|
CMPQ SI, $0xffff
|
||||||
|
JE success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop16
|
||||||
|
JMP fail
|
||||||
|
_17_to_31:
|
||||||
|
LEAQ 1(DI)(CX*1), CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVOU -16(AX)(BX*1), X0
|
||||||
|
MOVOU (AX), X1
|
||||||
|
loop17to31:
|
||||||
|
MOVOU (DI), X2
|
||||||
|
PCMPEQB X1,X2
|
||||||
|
PMOVMSKB X2, SI
|
||||||
|
CMPQ SI, $0xffff
|
||||||
|
JE partial_success17to31
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop17to31
|
||||||
|
JMP fail
|
||||||
|
partial_success17to31:
|
||||||
|
MOVOU -16(BX)(DI*1), X3
|
||||||
|
PCMPEQB X0, X3
|
||||||
|
PMOVMSKB X3, SI
|
||||||
|
CMPQ SI, $0xffff
|
||||||
|
JE success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,CX
|
||||||
|
JB loop17to31
|
||||||
|
fail:
|
||||||
|
MOVQ $-1, ret+32(FP)
|
||||||
|
RET
|
||||||
|
success:
|
||||||
|
SUBQ s+0(FP), DI
|
||||||
|
MOVQ DI, ret+32(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
|
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
|
||||||
MOVQ s+0(FP), SI
|
MOVQ s+0(FP), SI
|
||||||
MOVQ s_len+8(FP), BX
|
MOVQ s_len+8(FP), BX
|
||||||
|
|
|
||||||
|
|
@ -143,43 +143,6 @@ func ContainsRune(s string, r rune) bool {
|
||||||
return IndexRune(s, r) >= 0
|
return IndexRune(s, r) >= 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
|
||||||
func Index(s, sep string) int {
|
|
||||||
n := len(sep)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, sep[0])
|
|
||||||
case n == len(s):
|
|
||||||
if sep == s {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
// Rabin-Karp search
|
|
||||||
hashsep, pow := hashStr(sep)
|
|
||||||
var h uint32
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
h = h*primeRK + uint32(s[i])
|
|
||||||
}
|
|
||||||
if h == hashsep && s[:n] == sep {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
for i := n; i < len(s); {
|
|
||||||
h *= primeRK
|
|
||||||
h += uint32(s[i])
|
|
||||||
h -= pow * uint32(s[i-n])
|
|
||||||
i++
|
|
||||||
if h == hashsep && s[i-n:i] == sep {
|
|
||||||
return i - n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
|
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
|
||||||
func LastIndex(s, sep string) int {
|
func LastIndex(s, sep string) int {
|
||||||
n := len(sep)
|
n := len(sep)
|
||||||
|
|
|
||||||
49
src/strings/strings_amd64.go
Normal file
49
src/strings/strings_amd64.go
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package strings
|
||||||
|
|
||||||
|
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
||||||
|
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||||
|
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
|
||||||
|
const shortStringLen = 31
|
||||||
|
|
||||||
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||||
|
func Index(s, sep string) int {
|
||||||
|
n := len(sep)
|
||||||
|
switch {
|
||||||
|
case n == 0:
|
||||||
|
return 0
|
||||||
|
case n == 1:
|
||||||
|
return IndexByte(s, sep[0])
|
||||||
|
case n <= shortStringLen:
|
||||||
|
return indexShortStr(s, sep)
|
||||||
|
case n == len(s):
|
||||||
|
if sep == s {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
case n > len(s):
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
// Rabin-Karp search
|
||||||
|
hashsep, pow := hashStr(sep)
|
||||||
|
var h uint32
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
h = h*primeRK + uint32(s[i])
|
||||||
|
}
|
||||||
|
if h == hashsep && s[:n] == sep {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for i := n; i < len(s); {
|
||||||
|
h *= primeRK
|
||||||
|
h += uint32(s[i])
|
||||||
|
h -= pow * uint32(s[i-n])
|
||||||
|
i++
|
||||||
|
if h == hashsep && s[i-n:i] == sep {
|
||||||
|
return i - n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
47
src/strings/strings_generic.go
Normal file
47
src/strings/strings_generic.go
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !amd64
|
||||||
|
|
||||||
|
package strings
|
||||||
|
|
||||||
|
// TODO: implements short string optimization on non amd64 platforms
|
||||||
|
// and get rid of strings_amd64.go
|
||||||
|
|
||||||
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||||
|
func Index(s, sep string) int {
|
||||||
|
n := len(sep)
|
||||||
|
switch {
|
||||||
|
case n == 0:
|
||||||
|
return 0
|
||||||
|
case n == 1:
|
||||||
|
return IndexByte(s, sep[0])
|
||||||
|
case n == len(s):
|
||||||
|
if sep == s {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
case n > len(s):
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
// Rabin-Karp search
|
||||||
|
hashsep, pow := hashStr(sep)
|
||||||
|
var h uint32
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
h = h*primeRK + uint32(s[i])
|
||||||
|
}
|
||||||
|
if h == hashsep && s[:n] == sep {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for i := n; i < len(s); {
|
||||||
|
h *= primeRK
|
||||||
|
h += uint32(s[i])
|
||||||
|
h -= pow * uint32(s[i-n])
|
||||||
|
i++
|
||||||
|
if h == hashsep && s[i-n:i] == sep {
|
||||||
|
return i - n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
@ -59,6 +59,59 @@ var indexTests = []IndexTest{
|
||||||
{"abc", "b", 1},
|
{"abc", "b", 1},
|
||||||
{"abc", "c", 2},
|
{"abc", "c", 2},
|
||||||
{"abc", "x", -1},
|
{"abc", "x", -1},
|
||||||
|
// test special cases in Index() for short strings
|
||||||
|
{"", "ab", -1},
|
||||||
|
{"bc", "ab", -1},
|
||||||
|
{"ab", "ab", 0},
|
||||||
|
{"xab", "ab", 1},
|
||||||
|
{"xab"[:2], "ab", -1},
|
||||||
|
{"", "abc", -1},
|
||||||
|
{"xbc", "abc", -1},
|
||||||
|
{"abc", "abc", 0},
|
||||||
|
{"xabc", "abc", 1},
|
||||||
|
{"xabc"[:3], "abc", -1},
|
||||||
|
{"xabxc", "abc", -1},
|
||||||
|
{"", "abcd", -1},
|
||||||
|
{"xbcd", "abcd", -1},
|
||||||
|
{"abcd", "abcd", 0},
|
||||||
|
{"xabcd", "abcd", 1},
|
||||||
|
{"xyabcd"[:5], "abcd", -1},
|
||||||
|
{"xbcqq", "abcqq", -1},
|
||||||
|
{"abcqq", "abcqq", 0},
|
||||||
|
{"xabcqq", "abcqq", 1},
|
||||||
|
{"xyabcqq"[:6], "abcqq", -1},
|
||||||
|
{"xabxcqq", "abcqq", -1},
|
||||||
|
{"xabcqxq", "abcqq", -1},
|
||||||
|
{"", "01234567", -1},
|
||||||
|
{"32145678", "01234567", -1},
|
||||||
|
{"01234567", "01234567", 0},
|
||||||
|
{"x01234567", "01234567", 1},
|
||||||
|
{"xx01234567"[:9], "01234567", -1},
|
||||||
|
{"", "0123456789", -1},
|
||||||
|
{"3214567844", "0123456789", -1},
|
||||||
|
{"0123456789", "0123456789", 0},
|
||||||
|
{"x0123456789", "0123456789", 1},
|
||||||
|
{"xyz0123456789"[:12], "0123456789", -1},
|
||||||
|
{"x01234567x89", "0123456789", -1},
|
||||||
|
{"", "0123456789012345", -1},
|
||||||
|
{"3214567889012345", "0123456789012345", -1},
|
||||||
|
{"0123456789012345", "0123456789012345", 0},
|
||||||
|
{"x0123456789012345", "0123456789012345", 1},
|
||||||
|
{"", "01234567890123456789", -1},
|
||||||
|
{"32145678890123456789", "01234567890123456789", -1},
|
||||||
|
{"01234567890123456789", "01234567890123456789", 0},
|
||||||
|
{"x01234567890123456789", "01234567890123456789", 1},
|
||||||
|
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
|
||||||
|
{"", "0123456789012345678901234567890", -1},
|
||||||
|
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
|
||||||
|
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
|
||||||
|
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
|
||||||
|
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
|
||||||
|
{"", "01234567890123456789012345678901", -1},
|
||||||
|
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
|
||||||
|
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
|
||||||
|
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
|
||||||
|
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
|
||||||
}
|
}
|
||||||
|
|
||||||
var lastIndexTests = []IndexTest{
|
var lastIndexTests = []IndexTest{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue