mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
internal/bytealg: move short string Index implementations into bytealg
Also move the arm64 CountByte implementation while we're here. Fixes #19792 Change-Id: I1e0fdf1e03e3135af84150a2703b58dad1b0d57e Reviewed-on: https://go-review.googlesource.com/98518 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
f6332bb84a
commit
ee58eccc56
27 changed files with 932 additions and 1123 deletions
|
|
@ -829,6 +829,92 @@ func EqualFold(s, t []byte) bool {
|
||||||
return len(s) == len(t)
|
return len(s) == len(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||||
|
func Index(s, sep []byte) int {
|
||||||
|
n := len(sep)
|
||||||
|
switch {
|
||||||
|
case n == 0:
|
||||||
|
return 0
|
||||||
|
case n == 1:
|
||||||
|
return IndexByte(s, sep[0])
|
||||||
|
case n == len(s):
|
||||||
|
if Equal(sep, s) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
case n > len(s):
|
||||||
|
return -1
|
||||||
|
case n <= bytealg.MaxLen:
|
||||||
|
// Use brute force when s and sep both are small
|
||||||
|
if len(s) <= bytealg.MaxBruteForce {
|
||||||
|
return bytealg.Index(s, sep)
|
||||||
|
}
|
||||||
|
c := sep[0]
|
||||||
|
i := 0
|
||||||
|
t := s[:len(s)-n+1]
|
||||||
|
fails := 0
|
||||||
|
for i < len(t) {
|
||||||
|
if t[i] != c {
|
||||||
|
// IndexByte is faster than bytealg.Index, so use it as long as
|
||||||
|
// we're not getting lots of false positives.
|
||||||
|
o := IndexByte(t[i:], c)
|
||||||
|
if o < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
i += o
|
||||||
|
}
|
||||||
|
if Equal(s[i:i+n], sep) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
fails++
|
||||||
|
i++
|
||||||
|
// Switch to bytealg.Index when IndexByte produces too many false positives.
|
||||||
|
if fails > bytealg.Cutover(i) {
|
||||||
|
r := bytealg.Index(s[i:], sep)
|
||||||
|
if r >= 0 {
|
||||||
|
return r + i
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
c := sep[0]
|
||||||
|
i := 0
|
||||||
|
fails := 0
|
||||||
|
t := s[:len(s)-n+1]
|
||||||
|
for i < len(t) {
|
||||||
|
if t[i] != c {
|
||||||
|
o := IndexByte(t[i:], c)
|
||||||
|
if o < 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
i += o
|
||||||
|
}
|
||||||
|
if Equal(s[i:i+n], sep) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
fails++
|
||||||
|
if fails >= 4+i>>4 && i < len(t) {
|
||||||
|
// Give up on IndexByte, it isn't skipping ahead
|
||||||
|
// far enough to be better than Rabin-Karp.
|
||||||
|
// Experiments (using IndexPeriodic) suggest
|
||||||
|
// the cutover is about 16 byte skips.
|
||||||
|
// TODO: if large prefixes of sep are matching
|
||||||
|
// we should cutover at even larger average skips,
|
||||||
|
// because Equal becomes that much more expensive.
|
||||||
|
// This code does not take that effect into account.
|
||||||
|
j := indexRabinKarp(s[i:], sep)
|
||||||
|
if j < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return i + j
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
func indexRabinKarp(s, sep []byte) int {
|
func indexRabinKarp(s, sep []byte) int {
|
||||||
// Rabin-Karp search
|
// Rabin-Karp search
|
||||||
hashsep, pow := hashStr(sep)
|
hashsep, pow := hashStr(sep)
|
||||||
|
|
|
||||||
|
|
@ -1,79 +0,0 @@
|
||||||
// Copyright 2016 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package bytes
|
|
||||||
|
|
||||||
import "internal/cpu"
|
|
||||||
|
|
||||||
//go:noescape
|
|
||||||
|
|
||||||
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
|
||||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
|
||||||
func indexShortStr(s, c []byte) int // ../runtime/asm_amd64.s
|
|
||||||
func countByte(s []byte, c byte) int // ../runtime/asm_amd64.s
|
|
||||||
|
|
||||||
var shortStringLen int
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
if cpu.X86.HasAVX2 {
|
|
||||||
shortStringLen = 63
|
|
||||||
} else {
|
|
||||||
shortStringLen = 31
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
|
||||||
func Index(s, sep []byte) int {
|
|
||||||
n := len(sep)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, sep[0])
|
|
||||||
case n == len(s):
|
|
||||||
if Equal(sep, s) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
case n <= shortStringLen:
|
|
||||||
// Use brute force when s and sep both are small
|
|
||||||
if len(s) <= 64 {
|
|
||||||
return indexShortStr(s, sep)
|
|
||||||
}
|
|
||||||
c := sep[0]
|
|
||||||
i := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
fails := 0
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
// IndexByte skips 16/32 bytes per iteration,
|
|
||||||
// so it's faster than indexShortStr.
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if Equal(s[i:i+n], sep) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
fails++
|
|
||||||
i++
|
|
||||||
// Switch to indexShortStr when IndexByte produces too many false positives.
|
|
||||||
// Too many means more that 1 error per 8 characters.
|
|
||||||
// Allow some errors in the beginning.
|
|
||||||
if fails > (i+16)/8 {
|
|
||||||
r := indexShortStr(s[i:], sep)
|
|
||||||
if r >= 0 {
|
|
||||||
return r + i
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return indexRabinKarp(s, sep)
|
|
||||||
}
|
|
||||||
|
|
@ -1,72 +0,0 @@
|
||||||
// Copyright 2017 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package bytes
|
|
||||||
|
|
||||||
func countByte(s []byte, c byte) int // bytes_arm64.s
|
|
||||||
|
|
||||||
// 8 bytes can be completely loaded into 1 register.
|
|
||||||
const shortStringLen = 8
|
|
||||||
|
|
||||||
//go:noescape
|
|
||||||
func indexShortStr(s, sep []byte) int
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
|
||||||
func Index(s, sep []byte) int {
|
|
||||||
n := len(sep)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, sep[0])
|
|
||||||
case n == len(s):
|
|
||||||
if Equal(sep, s) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
case n <= shortStringLen:
|
|
||||||
// Use brute force when both s and sep are small.
|
|
||||||
// Empirical data shows that it can get better
|
|
||||||
// performance when len(s) <= 16.
|
|
||||||
if len(s) <= 16 {
|
|
||||||
return indexShortStr(s, sep)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
c := sep[0]
|
|
||||||
i := 0
|
|
||||||
fails := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if Equal(s[i:i+n], sep) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
fails++
|
|
||||||
if fails >= 4+i>>4 && i < len(t) {
|
|
||||||
// Give up on IndexByte, it isn't skipping ahead
|
|
||||||
// far enough to be better than Rabin-Karp.
|
|
||||||
// Experiments (using IndexPeriodic) suggest
|
|
||||||
// the cutover is about 16 byte skips.
|
|
||||||
// TODO: if large prefixes of sep are matching
|
|
||||||
// we should cutover at even larger average skips,
|
|
||||||
// because Equal becomes that much more expensive.
|
|
||||||
// This code does not take that effect into account.
|
|
||||||
j := indexRabinKarp(s[i:], sep)
|
|
||||||
if j < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return i + j
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
// Copyright 2015 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build !amd64,!s390x,!arm64
|
|
||||||
|
|
||||||
package bytes
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
|
||||||
func Index(s, sep []byte) int {
|
|
||||||
n := len(sep)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, sep[0])
|
|
||||||
case n == len(s):
|
|
||||||
if Equal(sep, s) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
c := sep[0]
|
|
||||||
i := 0
|
|
||||||
fails := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if Equal(s[i:i+n], sep) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
fails++
|
|
||||||
if fails >= 4+i>>4 && i < len(t) {
|
|
||||||
// Give up on IndexByte, it isn't skipping ahead
|
|
||||||
// far enough to be better than Rabin-Karp.
|
|
||||||
// Experiments (using IndexPeriodic) suggest
|
|
||||||
// the cutover is about 16 byte skips.
|
|
||||||
// TODO: if large prefixes of sep are matching
|
|
||||||
// we should cutover at even larger average skips,
|
|
||||||
// because Equal becomes that much more expensive.
|
|
||||||
// This code does not take that effect into account.
|
|
||||||
j := indexRabinKarp(s[i:], sep)
|
|
||||||
if j < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return i + j
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
// Copyright 2016 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package bytes
|
|
||||||
|
|
||||||
//go:noescape
|
|
||||||
|
|
||||||
// indexShortStr returns the index of the first instance of sep in s,
|
|
||||||
// or -1 if sep is not present in s.
|
|
||||||
// indexShortStr requires 2 <= len(sep) <= shortStringLen
|
|
||||||
func indexShortStr(s, c []byte) int // ../runtime/asm_s390x.s
|
|
||||||
|
|
||||||
// supportsVX reports whether the vector facility is available.
|
|
||||||
// indexShortStr must not be called if the vector facility is not
|
|
||||||
// available.
|
|
||||||
func supportsVX() bool // ../runtime/asm_s390x.s
|
|
||||||
|
|
||||||
var shortStringLen = -1
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
if supportsVX() {
|
|
||||||
shortStringLen = 64
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
|
||||||
func Index(s, sep []byte) int {
|
|
||||||
n := len(sep)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, sep[0])
|
|
||||||
case n == len(s):
|
|
||||||
if Equal(sep, s) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
case n <= shortStringLen:
|
|
||||||
// Use brute force when s and sep both are small
|
|
||||||
if len(s) <= 64 {
|
|
||||||
return indexShortStr(s, sep)
|
|
||||||
}
|
|
||||||
c := sep[0]
|
|
||||||
i := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
fails := 0
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
// IndexByte skips 16/32 bytes per iteration,
|
|
||||||
// so it's faster than indexShortStr.
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if Equal(s[i:i+n], sep) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
fails++
|
|
||||||
i++
|
|
||||||
// Switch to indexShortStr when IndexByte produces too many false positives.
|
|
||||||
// Too many means more that 1 error per 8 characters.
|
|
||||||
// Allow some errors in the beginning.
|
|
||||||
if fails > (i+16)/8 {
|
|
||||||
r := indexShortStr(s[i:], sep)
|
|
||||||
if r >= 0 {
|
|
||||||
return r + i
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return indexRabinKarp(s, sep)
|
|
||||||
}
|
|
||||||
|
|
@ -1,20 +1,16 @@
|
||||||
// amd64-specific vet whitelist. See readme.txt for details.
|
// amd64-specific vet whitelist. See readme.txt for details.
|
||||||
|
|
||||||
internal/bytealg/compare_amd64.s: [amd64] cannot check cross-package assembly function: Compare is in package bytes
|
|
||||||
internal/bytealg/compare_amd64.s: [amd64] cannot check cross-package assembly function: cmpstring is in package runtime
|
|
||||||
|
|
||||||
// False positives.
|
// False positives.
|
||||||
|
|
||||||
|
// Nothing much to do about cross-package assembly. Unfortunate.
|
||||||
|
internal/bytealg/compare_amd64.s: [amd64] cannot check cross-package assembly function: Compare is in package bytes
|
||||||
|
internal/bytealg/compare_amd64.s: [amd64] cannot check cross-package assembly function: cmpstring is in package runtime
|
||||||
|
|
||||||
// reflect trampolines intentionally omit arg size. Same for morestack.
|
// reflect trampolines intentionally omit arg size. Same for morestack.
|
||||||
runtime/asm_amd64.s: [amd64] morestack: use of 8(SP) points beyond argument frame
|
runtime/asm_amd64.s: [amd64] morestack: use of 8(SP) points beyond argument frame
|
||||||
runtime/asm_amd64.s: [amd64] morestack: use of 16(SP) points beyond argument frame
|
runtime/asm_amd64.s: [amd64] morestack: use of 16(SP) points beyond argument frame
|
||||||
runtime/asm_amd64.s: [amd64] morestack: use of 8(SP) points beyond argument frame
|
runtime/asm_amd64.s: [amd64] morestack: use of 8(SP) points beyond argument frame
|
||||||
|
|
||||||
// Nothing much to do about cross-package assembly. Unfortunate.
|
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package strings
|
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes
|
|
||||||
|
|
||||||
// Intentionally missing declarations. These are special assembly routines.
|
// Intentionally missing declarations. These are special assembly routines.
|
||||||
// Some are jumped into from other routines, with values in specific registers.
|
// Some are jumped into from other routines, with values in specific registers.
|
||||||
// duff* have direct calls from the compiler.
|
// duff* have direct calls from the compiler.
|
||||||
|
|
@ -25,4 +21,3 @@ runtime/asm_amd64.s: [amd64] addmoduledata: function addmoduledata missing Go de
|
||||||
runtime/duff_amd64.s: [amd64] duffzero: function duffzero missing Go declaration
|
runtime/duff_amd64.s: [amd64] duffzero: function duffzero missing Go declaration
|
||||||
runtime/duff_amd64.s: [amd64] duffcopy: function duffcopy missing Go declaration
|
runtime/duff_amd64.s: [amd64] duffcopy: function duffcopy missing Go declaration
|
||||||
runtime/asm_amd64.s: [amd64] stackcheck: function stackcheck missing Go declaration
|
runtime/asm_amd64.s: [amd64] stackcheck: function stackcheck missing Go declaration
|
||||||
runtime/asm_amd64.s: [amd64] indexShortStr: function indexShortStr missing Go declaration
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,6 @@
|
||||||
runtime/asm_s390x.s: [s390x] abort: function abort missing Go declaration
|
runtime/asm_s390x.s: [s390x] abort: function abort missing Go declaration
|
||||||
internal/bytealg/compare_s390x.s: [s390x] cannot check cross-package assembly function: Compare is in package bytes
|
internal/bytealg/compare_s390x.s: [s390x] cannot check cross-package assembly function: Compare is in package bytes
|
||||||
internal/bytealg/compare_s390x.s: [s390x] cannot check cross-package assembly function: cmpstring is in package runtime
|
internal/bytealg/compare_s390x.s: [s390x] cannot check cross-package assembly function: cmpstring is in package runtime
|
||||||
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: supportsVX is in package strings
|
|
||||||
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: supportsVX is in package bytes
|
|
||||||
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: indexShortStr is in package strings
|
|
||||||
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: indexShortStr is in package bytes
|
|
||||||
runtime/asm_s390x.s: [s390x] indexShortStr: function indexShortStr missing Go declaration
|
|
||||||
runtime/asm_s390x.s: [s390x] addmoduledata: function addmoduledata missing Go declaration
|
runtime/asm_s390x.s: [s390x] addmoduledata: function addmoduledata missing Go declaration
|
||||||
runtime/memclr_s390x.s: [s390x] memclr_s390x_exrl_xc: function memclr_s390x_exrl_xc missing Go declaration
|
runtime/memclr_s390x.s: [s390x] memclr_s390x_exrl_xc: function memclr_s390x_exrl_xc missing Go declaration
|
||||||
runtime/memmove_s390x.s: [s390x] memmove_s390x_exrl_mvc: function memmove_s390x_exrl_mvc missing Go declaration
|
runtime/memmove_s390x.s: [s390x] memmove_s390x_exrl_mvc: function memmove_s390x_exrl_mvc missing Go declaration
|
||||||
|
|
|
||||||
22
src/internal/bytealg/bytealg.go
Normal file
22
src/internal/bytealg/bytealg.go
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package bytealg
|
||||||
|
|
||||||
|
import (
|
||||||
|
"internal/cpu"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Offsets into internal/cpu records for use in assembly.
|
||||||
|
const (
|
||||||
|
x86_HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
|
||||||
|
x86_HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
|
||||||
|
x86_HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
||||||
|
x86_HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
|
||||||
|
s390x_HasVX = unsafe.Offsetof(cpu.S390X.HasVX)
|
||||||
|
)
|
||||||
|
|
||||||
|
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
|
||||||
|
var MaxLen int
|
||||||
90
src/internal/bytealg/count_arm64.s
Normal file
90
src/internal/bytealg/count_arm64.s
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "go_asm.h"
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
TEXT ·Count(SB),NOSPLIT,$0-40
|
||||||
|
MOVD b_base+0(FP), R0
|
||||||
|
MOVD b_len+8(FP), R2
|
||||||
|
MOVBU c+24(FP), R1
|
||||||
|
MOVD $ret+32(FP), R8
|
||||||
|
B countbytebody<>(SB)
|
||||||
|
|
||||||
|
TEXT ·CountString(SB),NOSPLIT,$0-32
|
||||||
|
MOVD s_base+0(FP), R0
|
||||||
|
MOVD s_len+8(FP), R2
|
||||||
|
MOVBU c+16(FP), R1
|
||||||
|
MOVD $ret+24(FP), R8
|
||||||
|
B countbytebody<>(SB)
|
||||||
|
|
||||||
|
// input:
|
||||||
|
// R0: data
|
||||||
|
// R2: data len
|
||||||
|
// R1: byte to find
|
||||||
|
// R8: address to put result
|
||||||
|
TEXT countbytebody<>(SB),NOSPLIT,$0
|
||||||
|
// R11 = count of byte to search
|
||||||
|
MOVD $0, R11
|
||||||
|
// short path to handle 0-byte case
|
||||||
|
CBZ R2, done
|
||||||
|
CMP $0x20, R2
|
||||||
|
// jump directly to tail if length < 32
|
||||||
|
BLO tail
|
||||||
|
ANDS $0x1f, R0, R9
|
||||||
|
BEQ chunk
|
||||||
|
// Work with not 32-byte aligned head
|
||||||
|
BIC $0x1f, R0, R3
|
||||||
|
ADD $0x20, R3
|
||||||
|
head_loop:
|
||||||
|
MOVBU.P 1(R0), R5
|
||||||
|
CMP R5, R1
|
||||||
|
CINC EQ, R11, R11
|
||||||
|
SUB $1, R2, R2
|
||||||
|
CMP R0, R3
|
||||||
|
BNE head_loop
|
||||||
|
// Work with 32-byte aligned chunks
|
||||||
|
chunk:
|
||||||
|
BIC $0x1f, R2, R9
|
||||||
|
// The first chunk can also be the last
|
||||||
|
CBZ R9, tail
|
||||||
|
// R3 = end of 32-byte chunks
|
||||||
|
ADD R0, R9, R3
|
||||||
|
MOVD $1, R5
|
||||||
|
VMOV R5, V5.B16
|
||||||
|
// R2 = length of tail
|
||||||
|
SUB R9, R2, R2
|
||||||
|
// Duplicate R1 (byte to search) to 16 1-byte elements of V0
|
||||||
|
VMOV R1, V0.B16
|
||||||
|
// Clear the low 64-bit element of V7 and V8
|
||||||
|
VEOR V7.B8, V7.B8, V7.B8
|
||||||
|
VEOR V8.B8, V8.B8, V8.B8
|
||||||
|
// Count the target byte in 32-byte chunk
|
||||||
|
chunk_loop:
|
||||||
|
VLD1.P (R0), [V1.B16, V2.B16]
|
||||||
|
CMP R0, R3
|
||||||
|
VCMEQ V0.B16, V1.B16, V3.B16
|
||||||
|
VCMEQ V0.B16, V2.B16, V4.B16
|
||||||
|
// Clear the higher 7 bits
|
||||||
|
VAND V5.B16, V3.B16, V3.B16
|
||||||
|
VAND V5.B16, V4.B16, V4.B16
|
||||||
|
// Count lanes match the requested byte
|
||||||
|
VADDP V4.B16, V3.B16, V6.B16 // 32B->16B
|
||||||
|
VUADDLV V6.B16, V7
|
||||||
|
// Accumulate the count in low 64-bit element of V8 when inside the loop
|
||||||
|
VADD V7, V8
|
||||||
|
BNE chunk_loop
|
||||||
|
VMOV V8.D[0], R6
|
||||||
|
ADD R6, R11, R11
|
||||||
|
CBZ R2, done
|
||||||
|
tail:
|
||||||
|
// Work with tail shorter than 32 bytes
|
||||||
|
MOVBU.P 1(R0), R5
|
||||||
|
SUB $1, R2, R2
|
||||||
|
CMP R5, R1
|
||||||
|
CINC EQ, R11, R11
|
||||||
|
CBNZ R2, tail
|
||||||
|
done:
|
||||||
|
MOVD R11, (R8)
|
||||||
|
RET
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// +build !amd64
|
// +build !amd64,!arm64
|
||||||
|
|
||||||
package bytealg
|
package bytealg
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// +build amd64
|
// +build amd64 arm64
|
||||||
|
|
||||||
package bytealg
|
package bytealg
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,24 +4,8 @@
|
||||||
|
|
||||||
package bytealg
|
package bytealg
|
||||||
|
|
||||||
import (
|
|
||||||
"internal/cpu"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Note: there's no equal_generic.go because every platform must implement at least memequal_varlen in assembly.
|
// Note: there's no equal_generic.go because every platform must implement at least memequal_varlen in assembly.
|
||||||
|
|
||||||
// Because equal_native.go is unconditional, it's a good place to compute asm constants.
|
|
||||||
// TODO: find a better way to do this?
|
|
||||||
|
|
||||||
// Offsets into internal/cpu records for use in assembly.
|
|
||||||
const (
|
|
||||||
x86_HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
|
|
||||||
x86_HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
|
||||||
x86_HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
|
|
||||||
s390x_HasVX = unsafe.Offsetof(cpu.S390X.HasVX)
|
|
||||||
)
|
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func Equal(a, b []byte) bool
|
func Equal(a, b []byte) bool
|
||||||
|
|
||||||
|
|
|
||||||
26
src/internal/bytealg/index_amd64.go
Normal file
26
src/internal/bytealg/index_amd64.go
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package bytealg
|
||||||
|
|
||||||
|
import "internal/cpu"
|
||||||
|
|
||||||
|
const MaxBruteForce = 64
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if cpu.X86.HasAVX2 {
|
||||||
|
MaxLen = 63
|
||||||
|
} else {
|
||||||
|
MaxLen = 31
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||||
|
// before switching over to Index.
|
||||||
|
// n is the number of bytes processed so far.
|
||||||
|
// See the bytes.Index implementation for details.
|
||||||
|
func Cutover(n int) int {
|
||||||
|
// 1 error per 8 characters, plus a few slop to start.
|
||||||
|
return (n + 16) / 8
|
||||||
|
}
|
||||||
274
src/internal/bytealg/index_amd64.s
Normal file
274
src/internal/bytealg/index_amd64.s
Normal file
|
|
@ -0,0 +1,274 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "go_asm.h"
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
TEXT ·Index(SB),NOSPLIT,$0-56
|
||||||
|
MOVQ a_base+0(FP), DI
|
||||||
|
MOVQ a_len+8(FP), DX
|
||||||
|
MOVQ b_base+24(FP), BP
|
||||||
|
MOVQ b_len+32(FP), AX
|
||||||
|
MOVQ DI, R10
|
||||||
|
LEAQ ret+48(FP), R11
|
||||||
|
JMP indexbody<>(SB)
|
||||||
|
|
||||||
|
TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||||
|
MOVQ a_base+0(FP), DI
|
||||||
|
MOVQ a_len+8(FP), DX
|
||||||
|
MOVQ b_base+16(FP), BP
|
||||||
|
MOVQ b_len+24(FP), AX
|
||||||
|
MOVQ DI, R10
|
||||||
|
LEAQ ret+32(FP), R11
|
||||||
|
JMP indexbody<>(SB)
|
||||||
|
|
||||||
|
// AX: length of string, that we are searching for
|
||||||
|
// DX: length of string, in which we are searching
|
||||||
|
// DI: pointer to string, in which we are searching
|
||||||
|
// BP: pointer to string, that we are searching for
|
||||||
|
// R11: address, where to put return value
|
||||||
|
// Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
||||||
|
TEXT indexbody<>(SB),NOSPLIT,$0
|
||||||
|
CMPQ AX, DX
|
||||||
|
JA fail
|
||||||
|
CMPQ DX, $16
|
||||||
|
JAE sse42
|
||||||
|
no_sse42:
|
||||||
|
CMPQ AX, $2
|
||||||
|
JA _3_or_more
|
||||||
|
MOVW (BP), BP
|
||||||
|
LEAQ -1(DI)(DX*1), DX
|
||||||
|
loop2:
|
||||||
|
MOVW (DI), SI
|
||||||
|
CMPW SI,BP
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop2
|
||||||
|
JMP fail
|
||||||
|
_3_or_more:
|
||||||
|
CMPQ AX, $3
|
||||||
|
JA _4_or_more
|
||||||
|
MOVW 1(BP), BX
|
||||||
|
MOVW (BP), BP
|
||||||
|
LEAQ -2(DI)(DX*1), DX
|
||||||
|
loop3:
|
||||||
|
MOVW (DI), SI
|
||||||
|
CMPW SI,BP
|
||||||
|
JZ partial_success3
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop3
|
||||||
|
JMP fail
|
||||||
|
partial_success3:
|
||||||
|
MOVW 1(DI), SI
|
||||||
|
CMPW SI,BX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop3
|
||||||
|
JMP fail
|
||||||
|
_4_or_more:
|
||||||
|
CMPQ AX, $4
|
||||||
|
JA _5_or_more
|
||||||
|
MOVL (BP), BP
|
||||||
|
LEAQ -3(DI)(DX*1), DX
|
||||||
|
loop4:
|
||||||
|
MOVL (DI), SI
|
||||||
|
CMPL SI,BP
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop4
|
||||||
|
JMP fail
|
||||||
|
_5_or_more:
|
||||||
|
CMPQ AX, $7
|
||||||
|
JA _8_or_more
|
||||||
|
LEAQ 1(DI)(DX*1), DX
|
||||||
|
SUBQ AX, DX
|
||||||
|
MOVL -4(BP)(AX*1), BX
|
||||||
|
MOVL (BP), BP
|
||||||
|
loop5to7:
|
||||||
|
MOVL (DI), SI
|
||||||
|
CMPL SI,BP
|
||||||
|
JZ partial_success5to7
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop5to7
|
||||||
|
JMP fail
|
||||||
|
partial_success5to7:
|
||||||
|
MOVL -4(AX)(DI*1), SI
|
||||||
|
CMPL SI,BX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop5to7
|
||||||
|
JMP fail
|
||||||
|
_8_or_more:
|
||||||
|
CMPQ AX, $8
|
||||||
|
JA _9_or_more
|
||||||
|
MOVQ (BP), BP
|
||||||
|
LEAQ -7(DI)(DX*1), DX
|
||||||
|
loop8:
|
||||||
|
MOVQ (DI), SI
|
||||||
|
CMPQ SI,BP
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop8
|
||||||
|
JMP fail
|
||||||
|
_9_or_more:
|
||||||
|
CMPQ AX, $15
|
||||||
|
JA _16_or_more
|
||||||
|
LEAQ 1(DI)(DX*1), DX
|
||||||
|
SUBQ AX, DX
|
||||||
|
MOVQ -8(BP)(AX*1), BX
|
||||||
|
MOVQ (BP), BP
|
||||||
|
loop9to15:
|
||||||
|
MOVQ (DI), SI
|
||||||
|
CMPQ SI,BP
|
||||||
|
JZ partial_success9to15
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop9to15
|
||||||
|
JMP fail
|
||||||
|
partial_success9to15:
|
||||||
|
MOVQ -8(AX)(DI*1), SI
|
||||||
|
CMPQ SI,BX
|
||||||
|
JZ success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop9to15
|
||||||
|
JMP fail
|
||||||
|
_16_or_more:
|
||||||
|
CMPQ AX, $16
|
||||||
|
JA _17_or_more
|
||||||
|
MOVOU (BP), X1
|
||||||
|
LEAQ -15(DI)(DX*1), DX
|
||||||
|
loop16:
|
||||||
|
MOVOU (DI), X2
|
||||||
|
PCMPEQB X1, X2
|
||||||
|
PMOVMSKB X2, SI
|
||||||
|
CMPQ SI, $0xffff
|
||||||
|
JE success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop16
|
||||||
|
JMP fail
|
||||||
|
_17_or_more:
|
||||||
|
CMPQ AX, $31
|
||||||
|
JA _32_or_more
|
||||||
|
LEAQ 1(DI)(DX*1), DX
|
||||||
|
SUBQ AX, DX
|
||||||
|
MOVOU -16(BP)(AX*1), X0
|
||||||
|
MOVOU (BP), X1
|
||||||
|
loop17to31:
|
||||||
|
MOVOU (DI), X2
|
||||||
|
PCMPEQB X1,X2
|
||||||
|
PMOVMSKB X2, SI
|
||||||
|
CMPQ SI, $0xffff
|
||||||
|
JE partial_success17to31
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop17to31
|
||||||
|
JMP fail
|
||||||
|
partial_success17to31:
|
||||||
|
MOVOU -16(AX)(DI*1), X3
|
||||||
|
PCMPEQB X0, X3
|
||||||
|
PMOVMSKB X3, SI
|
||||||
|
CMPQ SI, $0xffff
|
||||||
|
JE success
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop17to31
|
||||||
|
JMP fail
|
||||||
|
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
|
||||||
|
// So no need to check cpuid
|
||||||
|
_32_or_more:
|
||||||
|
CMPQ AX, $32
|
||||||
|
JA _33_to_63
|
||||||
|
VMOVDQU (BP), Y1
|
||||||
|
LEAQ -31(DI)(DX*1), DX
|
||||||
|
loop32:
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, SI
|
||||||
|
CMPL SI, $0xffffffff
|
||||||
|
JE success_avx2
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop32
|
||||||
|
JMP fail_avx2
|
||||||
|
_33_to_63:
|
||||||
|
LEAQ 1(DI)(DX*1), DX
|
||||||
|
SUBQ AX, DX
|
||||||
|
VMOVDQU -32(BP)(AX*1), Y0
|
||||||
|
VMOVDQU (BP), Y1
|
||||||
|
loop33to63:
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, SI
|
||||||
|
CMPL SI, $0xffffffff
|
||||||
|
JE partial_success33to63
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop33to63
|
||||||
|
JMP fail_avx2
|
||||||
|
partial_success33to63:
|
||||||
|
VMOVDQU -32(AX)(DI*1), Y3
|
||||||
|
VPCMPEQB Y0, Y3, Y4
|
||||||
|
VPMOVMSKB Y4, SI
|
||||||
|
CMPL SI, $0xffffffff
|
||||||
|
JE success_avx2
|
||||||
|
ADDQ $1,DI
|
||||||
|
CMPQ DI,DX
|
||||||
|
JB loop33to63
|
||||||
|
fail_avx2:
|
||||||
|
VZEROUPPER
|
||||||
|
fail:
|
||||||
|
MOVQ $-1, (R11)
|
||||||
|
RET
|
||||||
|
success_avx2:
|
||||||
|
VZEROUPPER
|
||||||
|
JMP success
|
||||||
|
sse42:
|
||||||
|
CMPB internal∕cpu·X86+const_x86_HasSSE42(SB), $1
|
||||||
|
JNE no_sse42
|
||||||
|
CMPQ AX, $12
|
||||||
|
// PCMPESTRI is slower than normal compare,
|
||||||
|
// so using it makes sense only if we advance 4+ bytes per compare
|
||||||
|
// This value was determined experimentally and is the ~same
|
||||||
|
// on Nehalem (first with SSE42) and Haswell.
|
||||||
|
JAE _9_or_more
|
||||||
|
LEAQ 16(BP), SI
|
||||||
|
TESTW $0xff0, SI
|
||||||
|
JEQ no_sse42
|
||||||
|
MOVOU (BP), X1
|
||||||
|
LEAQ -15(DI)(DX*1), SI
|
||||||
|
MOVQ $16, R9
|
||||||
|
SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
|
||||||
|
loop_sse42:
|
||||||
|
// 0x0c means: unsigned byte compare (bits 0,1 are 00)
|
||||||
|
// for equality (bits 2,3 are 11)
|
||||||
|
// result is not masked or inverted (bits 4,5 are 00)
|
||||||
|
// and corresponds to first matching byte (bit 6 is 0)
|
||||||
|
PCMPESTRI $0x0c, (DI), X1
|
||||||
|
// CX == 16 means no match,
|
||||||
|
// CX > R9 means partial match at the end of the string,
|
||||||
|
// otherwise sep is at offset CX from X1 start
|
||||||
|
CMPQ CX, R9
|
||||||
|
JBE sse42_success
|
||||||
|
ADDQ R9, DI
|
||||||
|
CMPQ DI, SI
|
||||||
|
JB loop_sse42
|
||||||
|
PCMPESTRI $0x0c, -1(SI), X1
|
||||||
|
CMPQ CX, R9
|
||||||
|
JA fail
|
||||||
|
LEAQ -1(SI), DI
|
||||||
|
sse42_success:
|
||||||
|
ADDQ CX, DI
|
||||||
|
success:
|
||||||
|
SUBQ R10, DI
|
||||||
|
MOVQ DI, (R11)
|
||||||
|
RET
|
||||||
23
src/internal/bytealg/index_arm64.go
Normal file
23
src/internal/bytealg/index_arm64.go
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package bytealg
|
||||||
|
|
||||||
|
// Empirical data shows that using IndexShortStr can get better
|
||||||
|
// performance when len(s) <= 16.
|
||||||
|
const MaxBruteForce = 16
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// 8 bytes can be completely loaded into 1 register.
|
||||||
|
MaxLen = 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||||
|
// before switching over to IndexShortStr.
|
||||||
|
// n is the number of bytes processed so far.
|
||||||
|
// See the bytes.Index implementation for details.
|
||||||
|
func Cutover(n int) int {
|
||||||
|
// 1 error per 16 characters, plus a few slop to start.
|
||||||
|
return 4 + n>>4
|
||||||
|
}
|
||||||
|
|
@ -1,88 +1,40 @@
|
||||||
// Copyright 2017 The Go Authors. All rights reserved.
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "go_asm.h"
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
// countByte(s []byte, c byte) int
|
TEXT ·Index(SB),NOSPLIT,$0-56
|
||||||
TEXT bytes·countByte(SB),NOSPLIT,$0-40
|
MOVD a_base+0(FP), R0
|
||||||
MOVD s_base+0(FP), R0
|
MOVD a_len+8(FP), R1
|
||||||
MOVD s_len+8(FP), R2
|
MOVD b_base+24(FP), R2
|
||||||
MOVBU c+24(FP), R1
|
MOVD b_len+32(FP), R3
|
||||||
// R11 = count of byte to search
|
MOVD $ret+48(FP), R9
|
||||||
MOVD $0, R11
|
B indexbody<>(SB)
|
||||||
// short path to handle 0-byte case
|
|
||||||
CBZ R2, done
|
|
||||||
CMP $0x20, R2
|
|
||||||
// jump directly to tail if length < 32
|
|
||||||
BLO tail
|
|
||||||
ANDS $0x1f, R0, R9
|
|
||||||
BEQ chunk
|
|
||||||
// Work with not 32-byte aligned head
|
|
||||||
BIC $0x1f, R0, R3
|
|
||||||
ADD $0x20, R3
|
|
||||||
head_loop:
|
|
||||||
MOVBU.P 1(R0), R5
|
|
||||||
CMP R5, R1
|
|
||||||
CINC EQ, R11, R11
|
|
||||||
SUB $1, R2, R2
|
|
||||||
CMP R0, R3
|
|
||||||
BNE head_loop
|
|
||||||
// Work with 32-byte aligned chunks
|
|
||||||
chunk:
|
|
||||||
BIC $0x1f, R2, R9
|
|
||||||
// The first chunk can also be the last
|
|
||||||
CBZ R9, tail
|
|
||||||
// R3 = end of 32-byte chunks
|
|
||||||
ADD R0, R9, R3
|
|
||||||
MOVD $1, R5
|
|
||||||
VMOV R5, V5.B16
|
|
||||||
// R2 = length of tail
|
|
||||||
SUB R9, R2, R2
|
|
||||||
// Duplicate R1 (byte to search) to 16 1-byte elements of V0
|
|
||||||
VMOV R1, V0.B16
|
|
||||||
// Clear the low 64-bit element of V7 and V8
|
|
||||||
VEOR V7.B8, V7.B8, V7.B8
|
|
||||||
VEOR V8.B8, V8.B8, V8.B8
|
|
||||||
// Count the target byte in 32-byte chunk
|
|
||||||
chunk_loop:
|
|
||||||
VLD1.P (R0), [V1.B16, V2.B16]
|
|
||||||
CMP R0, R3
|
|
||||||
VCMEQ V0.B16, V1.B16, V3.B16
|
|
||||||
VCMEQ V0.B16, V2.B16, V4.B16
|
|
||||||
// Clear the higher 7 bits
|
|
||||||
VAND V5.B16, V3.B16, V3.B16
|
|
||||||
VAND V5.B16, V4.B16, V4.B16
|
|
||||||
// Count lanes match the requested byte
|
|
||||||
VADDP V4.B16, V3.B16, V6.B16 // 32B->16B
|
|
||||||
VUADDLV V6.B16, V7
|
|
||||||
// Accumulate the count in low 64-bit element of V8 when inside the loop
|
|
||||||
VADD V7, V8
|
|
||||||
BNE chunk_loop
|
|
||||||
VMOV V8.D[0], R6
|
|
||||||
ADD R6, R11, R11
|
|
||||||
CBZ R2, done
|
|
||||||
tail:
|
|
||||||
// Work with tail shorter than 32 bytes
|
|
||||||
MOVBU.P 1(R0), R5
|
|
||||||
SUB $1, R2, R2
|
|
||||||
CMP R5, R1
|
|
||||||
CINC EQ, R11, R11
|
|
||||||
CBNZ R2, tail
|
|
||||||
done:
|
|
||||||
MOVD R11, ret+32(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// indexShortStr(s, sep []byte) int
|
TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||||
// precondition: 2 <= len(sep) <= 8
|
MOVD a_base+0(FP), R0
|
||||||
TEXT bytes·indexShortStr(SB),NOSPLIT,$0-56
|
MOVD a_len+8(FP), R1
|
||||||
|
MOVD b_base+16(FP), R2
|
||||||
|
MOVD b_len+24(FP), R3
|
||||||
|
MOVD $ret+32(FP), R9
|
||||||
|
B indexbody<>(SB)
|
||||||
|
|
||||||
|
// input:
|
||||||
|
// R0: haystack
|
||||||
|
// R1: length of haystack
|
||||||
|
// R2: needle
|
||||||
|
// R3: length of needle (2 <= len <= 8)
|
||||||
|
// R9: address to put result
|
||||||
|
TEXT indexbody<>(SB),NOSPLIT,$0-56
|
||||||
// main idea is to load 'sep' into separate register(s)
|
// main idea is to load 'sep' into separate register(s)
|
||||||
// to avoid repeatedly re-load it again and again
|
// to avoid repeatedly re-load it again and again
|
||||||
// for sebsequent substring comparisons
|
// for sebsequent substring comparisons
|
||||||
MOVD s+0(FP), R0
|
MOVD a_base+0(FP), R0
|
||||||
MOVD s_len+8(FP), R1
|
MOVD a_len+8(FP), R1
|
||||||
MOVD sep+24(FP), R2
|
MOVD b_base+24(FP), R2
|
||||||
MOVD sep_len+32(FP), R3
|
MOVD b_len+32(FP), R3
|
||||||
SUB R3, R1, R4
|
SUB R3, R1, R4
|
||||||
// R4 contains the start of last substring for comparsion
|
// R4 contains the start of last substring for comparsion
|
||||||
ADD R0, R4, R4
|
ADD R0, R4, R4
|
||||||
|
|
@ -189,9 +141,9 @@ loop_2:
|
||||||
BLS loop_2
|
BLS loop_2
|
||||||
not_found:
|
not_found:
|
||||||
MOVD $-1, R0
|
MOVD $-1, R0
|
||||||
MOVD R0, ret+48(FP)
|
MOVD R0, (R9)
|
||||||
RET
|
RET
|
||||||
found:
|
found:
|
||||||
SUB R8, R0, R0
|
SUB R8, R0, R0
|
||||||
MOVD R0, ret+48(FP)
|
MOVD R0, (R9)
|
||||||
RET
|
RET
|
||||||
29
src/internal/bytealg/index_generic.go
Normal file
29
src/internal/bytealg/index_generic.go
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !amd64,!arm64,!s390x
|
||||||
|
|
||||||
|
package bytealg
|
||||||
|
|
||||||
|
const MaxBruteForce = 0
|
||||||
|
|
||||||
|
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||||
|
// Requires 2 <= len(b) <= MaxLen.
|
||||||
|
func Index(a, b []byte) int {
|
||||||
|
panic("unimplemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||||
|
// Requires 2 <= len(b) <= MaxLen.
|
||||||
|
func IndexString(a, b string) int {
|
||||||
|
panic("unimplemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||||
|
// before switching over to Index.
|
||||||
|
// n is the number of bytes processed so far.
|
||||||
|
// See the bytes.Index implementation for details.
|
||||||
|
func Cutover(n int) int {
|
||||||
|
panic("unimplemented")
|
||||||
|
}
|
||||||
19
src/internal/bytealg/index_native.go
Normal file
19
src/internal/bytealg/index_native.go
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build amd64 arm64 s390x
|
||||||
|
|
||||||
|
package bytealg
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||||
|
// Requires 2 <= len(b) <= MaxLen.
|
||||||
|
func Index(a, b []byte) int
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
|
||||||
|
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||||
|
// Requires 2 <= len(b) <= MaxLen.
|
||||||
|
func IndexString(a, b string) int
|
||||||
31
src/internal/bytealg/index_s390x.go
Normal file
31
src/internal/bytealg/index_s390x.go
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package bytealg
|
||||||
|
|
||||||
|
import "internal/cpu"
|
||||||
|
|
||||||
|
const MaxBruteForce = 64
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Note: we're kind of lucky that this flag is available at this point.
|
||||||
|
// The runtime sets HasVX when processing auxv records, and that happens
|
||||||
|
// to happen *before* running the init functions of packages that
|
||||||
|
// the runtime depends on.
|
||||||
|
// TODO: it would really be nicer for internal/cpu to figure out this
|
||||||
|
// flag by itself. Then we wouldn't need to depend on quirks of
|
||||||
|
// early startup initialization order.
|
||||||
|
if cpu.S390X.HasVX {
|
||||||
|
MaxLen = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||||
|
// before switching over to Index.
|
||||||
|
// n is the number of bytes processed so far.
|
||||||
|
// See the bytes.Index implementation for details.
|
||||||
|
func Cutover(n int) int {
|
||||||
|
// 1 error per 8 characters, plus a few slop to start.
|
||||||
|
return (n + 16) / 8
|
||||||
|
}
|
||||||
216
src/internal/bytealg/index_s390x.s
Normal file
216
src/internal/bytealg/index_s390x.s
Normal file
|
|
@ -0,0 +1,216 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "go_asm.h"
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// Caller must confirm availability of vx facility before calling.
|
||||||
|
TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
|
||||||
|
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
||||||
|
LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
||||||
|
MOVD $ret+48(FP), R5
|
||||||
|
BR indexbody<>(SB)
|
||||||
|
|
||||||
|
// Caller must confirm availability of vx facility before calling.
|
||||||
|
TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
|
||||||
|
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
||||||
|
LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
||||||
|
MOVD $ret+32(FP), R5
|
||||||
|
BR indexbody<>(SB)
|
||||||
|
|
||||||
|
// s: string we are searching
|
||||||
|
// sep: string to search for
|
||||||
|
// R1=&s[0], R2=len(s)
|
||||||
|
// R3=&sep[0], R4=len(sep)
|
||||||
|
// R5=&ret (int)
|
||||||
|
// Caller must confirm availability of vx facility before calling.
|
||||||
|
TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
|
||||||
|
CMPBGT R4, R2, notfound
|
||||||
|
ADD R1, R2
|
||||||
|
SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
|
||||||
|
CMPBEQ R4, $0, notfound
|
||||||
|
SUB $1, R4 // R4=len(sep)-1 for use as VLL index
|
||||||
|
VLL R4, (R3), V0 // contains first 16 bytes of sep
|
||||||
|
MOVD R1, R7
|
||||||
|
index2plus:
|
||||||
|
CMPBNE R4, $1, index3plus
|
||||||
|
MOVD $15(R7), R9
|
||||||
|
CMPBGE R9, R2, index2to16
|
||||||
|
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
||||||
|
VONE V16
|
||||||
|
VREPH $0, V0, V1
|
||||||
|
CMPBGE R9, R2, index2to16
|
||||||
|
index2loop:
|
||||||
|
VL 0(R7), V2 // 16 bytes, even indices
|
||||||
|
VL 1(R7), V4 // 16 bytes, odd indices
|
||||||
|
VCEQH V1, V2, V5 // compare even indices
|
||||||
|
VCEQH V1, V4, V6 // compare odd indices
|
||||||
|
VSEL V5, V6, V31, V7 // merge even and odd indices
|
||||||
|
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
||||||
|
BLT foundV17
|
||||||
|
MOVD $16(R7), R7 // R7+=16
|
||||||
|
ADD $15, R7, R9
|
||||||
|
CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
|
||||||
|
CMPBLE R7, R2, index2to16
|
||||||
|
BR notfound
|
||||||
|
|
||||||
|
index3plus:
|
||||||
|
CMPBNE R4, $2, index4plus
|
||||||
|
ADD $15, R7, R9
|
||||||
|
CMPBGE R9, R2, index2to16
|
||||||
|
MOVD $1, R0
|
||||||
|
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
||||||
|
VONE V16
|
||||||
|
VREPH $0, V0, V1
|
||||||
|
VREPB $2, V0, V8
|
||||||
|
index3loop:
|
||||||
|
VL (R7), V2 // load 16-bytes into V2
|
||||||
|
VLL R0, 16(R7), V3 // load 2-bytes into V3
|
||||||
|
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
||||||
|
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2
|
||||||
|
VCEQH V1, V2, V5 // compare 2-byte even indices
|
||||||
|
VCEQH V1, V4, V6 // compare 2-byte odd indices
|
||||||
|
VCEQB V8, V9, V10 // compare last bytes
|
||||||
|
VSEL V5, V6, V31, V7 // merge even and odd indices
|
||||||
|
VN V7, V10, V7 // AND indices with last byte
|
||||||
|
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
||||||
|
BLT foundV17
|
||||||
|
MOVD $16(R7), R7 // R7+=16
|
||||||
|
ADD $15, R7, R9
|
||||||
|
CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
|
||||||
|
CMPBLE R7, R2, index2to16
|
||||||
|
BR notfound
|
||||||
|
|
||||||
|
index4plus:
|
||||||
|
CMPBNE R4, $3, index5plus
|
||||||
|
ADD $15, R7, R9
|
||||||
|
CMPBGE R9, R2, index2to16
|
||||||
|
MOVD $2, R0
|
||||||
|
VGBM $0x8888, V29 // 0xff000000ff000000...
|
||||||
|
VGBM $0x2222, V30 // 0x0000ff000000ff00...
|
||||||
|
VGBM $0xcccc, V31 // 0xffff0000ffff0000...
|
||||||
|
VONE V16
|
||||||
|
VREPF $0, V0, V1
|
||||||
|
index4loop:
|
||||||
|
VL (R7), V2 // load 16-bytes into V2
|
||||||
|
VLL R0, 16(R7), V3 // load 3-bytes into V3
|
||||||
|
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
||||||
|
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1
|
||||||
|
VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1
|
||||||
|
VCEQF V1, V2, V5 // compare index 0, 4, ...
|
||||||
|
VCEQF V1, V4, V6 // compare index 1, 5, ...
|
||||||
|
VCEQF V1, V9, V11 // compare index 2, 6, ...
|
||||||
|
VCEQF V1, V10, V12 // compare index 3, 7, ...
|
||||||
|
VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ...
|
||||||
|
VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
|
||||||
|
VSEL V13, V14, V31, V7 // final merge
|
||||||
|
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
||||||
|
BLT foundV17
|
||||||
|
MOVD $16(R7), R7 // R7+=16
|
||||||
|
ADD $15, R7, R9
|
||||||
|
CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
|
||||||
|
CMPBLE R7, R2, index2to16
|
||||||
|
BR notfound
|
||||||
|
|
||||||
|
index5plus:
|
||||||
|
CMPBGT R4, $15, index17plus
|
||||||
|
index2to16:
|
||||||
|
CMPBGT R7, R2, notfound
|
||||||
|
MOVD $1(R7), R8
|
||||||
|
CMPBGT R8, R2, index2to16tail
|
||||||
|
index2to16loop:
|
||||||
|
// unrolled 2x
|
||||||
|
VLL R4, (R7), V1
|
||||||
|
VLL R4, 1(R7), V2
|
||||||
|
VCEQGS V0, V1, V3
|
||||||
|
BEQ found
|
||||||
|
MOVD $1(R7), R7
|
||||||
|
VCEQGS V0, V2, V4
|
||||||
|
BEQ found
|
||||||
|
MOVD $1(R7), R7
|
||||||
|
CMPBLT R7, R2, index2to16loop
|
||||||
|
CMPBGT R7, R2, notfound
|
||||||
|
index2to16tail:
|
||||||
|
VLL R4, (R7), V1
|
||||||
|
VCEQGS V0, V1, V2
|
||||||
|
BEQ found
|
||||||
|
BR notfound
|
||||||
|
|
||||||
|
index17plus:
|
||||||
|
CMPBGT R4, $31, index33plus
|
||||||
|
SUB $16, R4, R0
|
||||||
|
VLL R0, 16(R3), V1
|
||||||
|
VONE V7
|
||||||
|
index17to32loop:
|
||||||
|
VL (R7), V2
|
||||||
|
VLL R0, 16(R7), V3
|
||||||
|
VCEQG V0, V2, V4
|
||||||
|
VCEQG V1, V3, V5
|
||||||
|
VN V4, V5, V6
|
||||||
|
VCEQGS V6, V7, V8
|
||||||
|
BEQ found
|
||||||
|
MOVD $1(R7), R7
|
||||||
|
CMPBLE R7, R2, index17to32loop
|
||||||
|
BR notfound
|
||||||
|
|
||||||
|
index33plus:
|
||||||
|
CMPBGT R4, $47, index49plus
|
||||||
|
SUB $32, R4, R0
|
||||||
|
VL 16(R3), V1
|
||||||
|
VLL R0, 32(R3), V2
|
||||||
|
VONE V11
|
||||||
|
index33to48loop:
|
||||||
|
VL (R7), V3
|
||||||
|
VL 16(R7), V4
|
||||||
|
VLL R0, 32(R7), V5
|
||||||
|
VCEQG V0, V3, V6
|
||||||
|
VCEQG V1, V4, V7
|
||||||
|
VCEQG V2, V5, V8
|
||||||
|
VN V6, V7, V9
|
||||||
|
VN V8, V9, V10
|
||||||
|
VCEQGS V10, V11, V12
|
||||||
|
BEQ found
|
||||||
|
MOVD $1(R7), R7
|
||||||
|
CMPBLE R7, R2, index33to48loop
|
||||||
|
BR notfound
|
||||||
|
|
||||||
|
index49plus:
|
||||||
|
CMPBGT R4, $63, index65plus
|
||||||
|
SUB $48, R4, R0
|
||||||
|
VL 16(R3), V1
|
||||||
|
VL 32(R3), V2
|
||||||
|
VLL R0, 48(R3), V3
|
||||||
|
VONE V15
|
||||||
|
index49to64loop:
|
||||||
|
VL (R7), V4
|
||||||
|
VL 16(R7), V5
|
||||||
|
VL 32(R7), V6
|
||||||
|
VLL R0, 48(R7), V7
|
||||||
|
VCEQG V0, V4, V8
|
||||||
|
VCEQG V1, V5, V9
|
||||||
|
VCEQG V2, V6, V10
|
||||||
|
VCEQG V3, V7, V11
|
||||||
|
VN V8, V9, V12
|
||||||
|
VN V10, V11, V13
|
||||||
|
VN V12, V13, V14
|
||||||
|
VCEQGS V14, V15, V16
|
||||||
|
BEQ found
|
||||||
|
MOVD $1(R7), R7
|
||||||
|
CMPBLE R7, R2, index49to64loop
|
||||||
|
notfound:
|
||||||
|
MOVD $-1, (R5)
|
||||||
|
RET
|
||||||
|
|
||||||
|
index65plus:
|
||||||
|
// not implemented
|
||||||
|
MOVD $0, (R0)
|
||||||
|
RET
|
||||||
|
|
||||||
|
foundV17: // index is in doubleword V17[0]
|
||||||
|
VLGVG $0, V17, R8
|
||||||
|
ADD R8, R7
|
||||||
|
found:
|
||||||
|
SUB R1, R7
|
||||||
|
MOVD R7, (R5)
|
||||||
|
RET
|
||||||
|
|
@ -1358,274 +1358,6 @@ DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
|
||||||
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
|
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
|
||||||
GLOBL shifts<>(SB),RODATA,$256
|
GLOBL shifts<>(SB),RODATA,$256
|
||||||
|
|
||||||
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
|
||||||
MOVQ s+0(FP), DI
|
|
||||||
// We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
|
||||||
MOVQ s_len+8(FP), DX
|
|
||||||
MOVQ c+16(FP), BP
|
|
||||||
MOVQ c_len+24(FP), AX
|
|
||||||
MOVQ DI, R10
|
|
||||||
LEAQ ret+32(FP), R11
|
|
||||||
JMP runtime·indexShortStr(SB)
|
|
||||||
|
|
||||||
TEXT bytes·indexShortStr(SB),NOSPLIT,$0-56
|
|
||||||
MOVQ s+0(FP), DI
|
|
||||||
MOVQ s_len+8(FP), DX
|
|
||||||
MOVQ c+24(FP), BP
|
|
||||||
MOVQ c_len+32(FP), AX
|
|
||||||
MOVQ DI, R10
|
|
||||||
LEAQ ret+48(FP), R11
|
|
||||||
JMP runtime·indexShortStr(SB)
|
|
||||||
|
|
||||||
// AX: length of string, that we are searching for
|
|
||||||
// DX: length of string, in which we are searching
|
|
||||||
// DI: pointer to string, in which we are searching
|
|
||||||
// BP: pointer to string, that we are searching for
|
|
||||||
// R11: address, where to put return value
|
|
||||||
TEXT runtime·indexShortStr(SB),NOSPLIT,$0
|
|
||||||
CMPQ AX, DX
|
|
||||||
JA fail
|
|
||||||
CMPQ DX, $16
|
|
||||||
JAE sse42
|
|
||||||
no_sse42:
|
|
||||||
CMPQ AX, $2
|
|
||||||
JA _3_or_more
|
|
||||||
MOVW (BP), BP
|
|
||||||
LEAQ -1(DI)(DX*1), DX
|
|
||||||
loop2:
|
|
||||||
MOVW (DI), SI
|
|
||||||
CMPW SI,BP
|
|
||||||
JZ success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop2
|
|
||||||
JMP fail
|
|
||||||
_3_or_more:
|
|
||||||
CMPQ AX, $3
|
|
||||||
JA _4_or_more
|
|
||||||
MOVW 1(BP), BX
|
|
||||||
MOVW (BP), BP
|
|
||||||
LEAQ -2(DI)(DX*1), DX
|
|
||||||
loop3:
|
|
||||||
MOVW (DI), SI
|
|
||||||
CMPW SI,BP
|
|
||||||
JZ partial_success3
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop3
|
|
||||||
JMP fail
|
|
||||||
partial_success3:
|
|
||||||
MOVW 1(DI), SI
|
|
||||||
CMPW SI,BX
|
|
||||||
JZ success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop3
|
|
||||||
JMP fail
|
|
||||||
_4_or_more:
|
|
||||||
CMPQ AX, $4
|
|
||||||
JA _5_or_more
|
|
||||||
MOVL (BP), BP
|
|
||||||
LEAQ -3(DI)(DX*1), DX
|
|
||||||
loop4:
|
|
||||||
MOVL (DI), SI
|
|
||||||
CMPL SI,BP
|
|
||||||
JZ success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop4
|
|
||||||
JMP fail
|
|
||||||
_5_or_more:
|
|
||||||
CMPQ AX, $7
|
|
||||||
JA _8_or_more
|
|
||||||
LEAQ 1(DI)(DX*1), DX
|
|
||||||
SUBQ AX, DX
|
|
||||||
MOVL -4(BP)(AX*1), BX
|
|
||||||
MOVL (BP), BP
|
|
||||||
loop5to7:
|
|
||||||
MOVL (DI), SI
|
|
||||||
CMPL SI,BP
|
|
||||||
JZ partial_success5to7
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop5to7
|
|
||||||
JMP fail
|
|
||||||
partial_success5to7:
|
|
||||||
MOVL -4(AX)(DI*1), SI
|
|
||||||
CMPL SI,BX
|
|
||||||
JZ success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop5to7
|
|
||||||
JMP fail
|
|
||||||
_8_or_more:
|
|
||||||
CMPQ AX, $8
|
|
||||||
JA _9_or_more
|
|
||||||
MOVQ (BP), BP
|
|
||||||
LEAQ -7(DI)(DX*1), DX
|
|
||||||
loop8:
|
|
||||||
MOVQ (DI), SI
|
|
||||||
CMPQ SI,BP
|
|
||||||
JZ success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop8
|
|
||||||
JMP fail
|
|
||||||
_9_or_more:
|
|
||||||
CMPQ AX, $15
|
|
||||||
JA _16_or_more
|
|
||||||
LEAQ 1(DI)(DX*1), DX
|
|
||||||
SUBQ AX, DX
|
|
||||||
MOVQ -8(BP)(AX*1), BX
|
|
||||||
MOVQ (BP), BP
|
|
||||||
loop9to15:
|
|
||||||
MOVQ (DI), SI
|
|
||||||
CMPQ SI,BP
|
|
||||||
JZ partial_success9to15
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop9to15
|
|
||||||
JMP fail
|
|
||||||
partial_success9to15:
|
|
||||||
MOVQ -8(AX)(DI*1), SI
|
|
||||||
CMPQ SI,BX
|
|
||||||
JZ success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop9to15
|
|
||||||
JMP fail
|
|
||||||
_16_or_more:
|
|
||||||
CMPQ AX, $16
|
|
||||||
JA _17_or_more
|
|
||||||
MOVOU (BP), X1
|
|
||||||
LEAQ -15(DI)(DX*1), DX
|
|
||||||
loop16:
|
|
||||||
MOVOU (DI), X2
|
|
||||||
PCMPEQB X1, X2
|
|
||||||
PMOVMSKB X2, SI
|
|
||||||
CMPQ SI, $0xffff
|
|
||||||
JE success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop16
|
|
||||||
JMP fail
|
|
||||||
_17_or_more:
|
|
||||||
CMPQ AX, $31
|
|
||||||
JA _32_or_more
|
|
||||||
LEAQ 1(DI)(DX*1), DX
|
|
||||||
SUBQ AX, DX
|
|
||||||
MOVOU -16(BP)(AX*1), X0
|
|
||||||
MOVOU (BP), X1
|
|
||||||
loop17to31:
|
|
||||||
MOVOU (DI), X2
|
|
||||||
PCMPEQB X1,X2
|
|
||||||
PMOVMSKB X2, SI
|
|
||||||
CMPQ SI, $0xffff
|
|
||||||
JE partial_success17to31
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop17to31
|
|
||||||
JMP fail
|
|
||||||
partial_success17to31:
|
|
||||||
MOVOU -16(AX)(DI*1), X3
|
|
||||||
PCMPEQB X0, X3
|
|
||||||
PMOVMSKB X3, SI
|
|
||||||
CMPQ SI, $0xffff
|
|
||||||
JE success
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop17to31
|
|
||||||
JMP fail
|
|
||||||
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
|
|
||||||
// So no need to check cpuid
|
|
||||||
_32_or_more:
|
|
||||||
CMPQ AX, $32
|
|
||||||
JA _33_to_63
|
|
||||||
VMOVDQU (BP), Y1
|
|
||||||
LEAQ -31(DI)(DX*1), DX
|
|
||||||
loop32:
|
|
||||||
VMOVDQU (DI), Y2
|
|
||||||
VPCMPEQB Y1, Y2, Y3
|
|
||||||
VPMOVMSKB Y3, SI
|
|
||||||
CMPL SI, $0xffffffff
|
|
||||||
JE success_avx2
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop32
|
|
||||||
JMP fail_avx2
|
|
||||||
_33_to_63:
|
|
||||||
LEAQ 1(DI)(DX*1), DX
|
|
||||||
SUBQ AX, DX
|
|
||||||
VMOVDQU -32(BP)(AX*1), Y0
|
|
||||||
VMOVDQU (BP), Y1
|
|
||||||
loop33to63:
|
|
||||||
VMOVDQU (DI), Y2
|
|
||||||
VPCMPEQB Y1, Y2, Y3
|
|
||||||
VPMOVMSKB Y3, SI
|
|
||||||
CMPL SI, $0xffffffff
|
|
||||||
JE partial_success33to63
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop33to63
|
|
||||||
JMP fail_avx2
|
|
||||||
partial_success33to63:
|
|
||||||
VMOVDQU -32(AX)(DI*1), Y3
|
|
||||||
VPCMPEQB Y0, Y3, Y4
|
|
||||||
VPMOVMSKB Y4, SI
|
|
||||||
CMPL SI, $0xffffffff
|
|
||||||
JE success_avx2
|
|
||||||
ADDQ $1,DI
|
|
||||||
CMPQ DI,DX
|
|
||||||
JB loop33to63
|
|
||||||
fail_avx2:
|
|
||||||
VZEROUPPER
|
|
||||||
fail:
|
|
||||||
MOVQ $-1, (R11)
|
|
||||||
RET
|
|
||||||
success_avx2:
|
|
||||||
VZEROUPPER
|
|
||||||
JMP success
|
|
||||||
sse42:
|
|
||||||
CMPB runtime·support_sse42(SB), $1
|
|
||||||
JNE no_sse42
|
|
||||||
CMPQ AX, $12
|
|
||||||
// PCMPESTRI is slower than normal compare,
|
|
||||||
// so using it makes sense only if we advance 4+ bytes per compare
|
|
||||||
// This value was determined experimentally and is the ~same
|
|
||||||
// on Nehalem (first with SSE42) and Haswell.
|
|
||||||
JAE _9_or_more
|
|
||||||
LEAQ 16(BP), SI
|
|
||||||
TESTW $0xff0, SI
|
|
||||||
JEQ no_sse42
|
|
||||||
MOVOU (BP), X1
|
|
||||||
LEAQ -15(DI)(DX*1), SI
|
|
||||||
MOVQ $16, R9
|
|
||||||
SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
|
|
||||||
loop_sse42:
|
|
||||||
// 0x0c means: unsigned byte compare (bits 0,1 are 00)
|
|
||||||
// for equality (bits 2,3 are 11)
|
|
||||||
// result is not masked or inverted (bits 4,5 are 00)
|
|
||||||
// and corresponds to first matching byte (bit 6 is 0)
|
|
||||||
PCMPESTRI $0x0c, (DI), X1
|
|
||||||
// CX == 16 means no match,
|
|
||||||
// CX > R9 means partial match at the end of the string,
|
|
||||||
// otherwise sep is at offset CX from X1 start
|
|
||||||
CMPQ CX, R9
|
|
||||||
JBE sse42_success
|
|
||||||
ADDQ R9, DI
|
|
||||||
CMPQ DI, SI
|
|
||||||
JB loop_sse42
|
|
||||||
PCMPESTRI $0x0c, -1(SI), X1
|
|
||||||
CMPQ CX, R9
|
|
||||||
JA fail
|
|
||||||
LEAQ -1(SI), DI
|
|
||||||
sse42_success:
|
|
||||||
ADDQ CX, DI
|
|
||||||
success:
|
|
||||||
SUBQ R10, DI
|
|
||||||
MOVQ DI, (R11)
|
|
||||||
RET
|
|
||||||
|
|
||||||
TEXT runtime·return0(SB), NOSPLIT, $0
|
TEXT runtime·return0(SB), NOSPLIT, $0
|
||||||
MOVL $0, AX
|
MOVL $0, AX
|
||||||
RET
|
RET
|
||||||
|
|
|
||||||
|
|
@ -796,230 +796,6 @@ TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
|
||||||
// compile barrier.
|
// compile barrier.
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func supportsVX() bool
|
|
||||||
TEXT strings·supportsVX(SB),NOSPLIT,$0-1
|
|
||||||
MOVBZ runtime·cpu+facilities_hasVX(SB), R0
|
|
||||||
MOVB R0, ret+0(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func supportsVX() bool
|
|
||||||
TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
|
|
||||||
MOVBZ runtime·cpu+facilities_hasVX(SB), R0
|
|
||||||
MOVB R0, ret+0(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func indexShortStr(s, sep string) int
|
|
||||||
// Caller must confirm availability of vx facility before calling.
|
|
||||||
TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
|
|
||||||
LMG s+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
|
||||||
LMG sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
|
||||||
MOVD $ret+32(FP), R5
|
|
||||||
BR runtime·indexShortStr(SB)
|
|
||||||
|
|
||||||
// func indexShortStr(s, sep []byte) int
|
|
||||||
// Caller must confirm availability of vx facility before calling.
|
|
||||||
TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
|
|
||||||
LMG s+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
|
||||||
LMG sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
|
||||||
MOVD $ret+48(FP), R5
|
|
||||||
BR runtime·indexShortStr(SB)
|
|
||||||
|
|
||||||
// s: string we are searching
|
|
||||||
// sep: string to search for
|
|
||||||
// R1=&s[0], R2=len(s)
|
|
||||||
// R3=&sep[0], R4=len(sep)
|
|
||||||
// R5=&ret (int)
|
|
||||||
// Caller must confirm availability of vx facility before calling.
|
|
||||||
TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
|
|
||||||
CMPBGT R4, R2, notfound
|
|
||||||
ADD R1, R2
|
|
||||||
SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
|
|
||||||
CMPBEQ R4, $0, notfound
|
|
||||||
SUB $1, R4 // R4=len(sep)-1 for use as VLL index
|
|
||||||
VLL R4, (R3), V0 // contains first 16 bytes of sep
|
|
||||||
MOVD R1, R7
|
|
||||||
index2plus:
|
|
||||||
CMPBNE R4, $1, index3plus
|
|
||||||
MOVD $15(R7), R9
|
|
||||||
CMPBGE R9, R2, index2to16
|
|
||||||
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
|
||||||
VONE V16
|
|
||||||
VREPH $0, V0, V1
|
|
||||||
CMPBGE R9, R2, index2to16
|
|
||||||
index2loop:
|
|
||||||
VL 0(R7), V2 // 16 bytes, even indices
|
|
||||||
VL 1(R7), V4 // 16 bytes, odd indices
|
|
||||||
VCEQH V1, V2, V5 // compare even indices
|
|
||||||
VCEQH V1, V4, V6 // compare odd indices
|
|
||||||
VSEL V5, V6, V31, V7 // merge even and odd indices
|
|
||||||
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
|
||||||
BLT foundV17
|
|
||||||
MOVD $16(R7), R7 // R7+=16
|
|
||||||
ADD $15, R7, R9
|
|
||||||
CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
|
|
||||||
CMPBLE R7, R2, index2to16
|
|
||||||
BR notfound
|
|
||||||
|
|
||||||
index3plus:
|
|
||||||
CMPBNE R4, $2, index4plus
|
|
||||||
ADD $15, R7, R9
|
|
||||||
CMPBGE R9, R2, index2to16
|
|
||||||
MOVD $1, R0
|
|
||||||
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
|
||||||
VONE V16
|
|
||||||
VREPH $0, V0, V1
|
|
||||||
VREPB $2, V0, V8
|
|
||||||
index3loop:
|
|
||||||
VL (R7), V2 // load 16-bytes into V2
|
|
||||||
VLL R0, 16(R7), V3 // load 2-bytes into V3
|
|
||||||
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
|
||||||
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2
|
|
||||||
VCEQH V1, V2, V5 // compare 2-byte even indices
|
|
||||||
VCEQH V1, V4, V6 // compare 2-byte odd indices
|
|
||||||
VCEQB V8, V9, V10 // compare last bytes
|
|
||||||
VSEL V5, V6, V31, V7 // merge even and odd indices
|
|
||||||
VN V7, V10, V7 // AND indices with last byte
|
|
||||||
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
|
||||||
BLT foundV17
|
|
||||||
MOVD $16(R7), R7 // R7+=16
|
|
||||||
ADD $15, R7, R9
|
|
||||||
CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
|
|
||||||
CMPBLE R7, R2, index2to16
|
|
||||||
BR notfound
|
|
||||||
|
|
||||||
index4plus:
|
|
||||||
CMPBNE R4, $3, index5plus
|
|
||||||
ADD $15, R7, R9
|
|
||||||
CMPBGE R9, R2, index2to16
|
|
||||||
MOVD $2, R0
|
|
||||||
VGBM $0x8888, V29 // 0xff000000ff000000...
|
|
||||||
VGBM $0x2222, V30 // 0x0000ff000000ff00...
|
|
||||||
VGBM $0xcccc, V31 // 0xffff0000ffff0000...
|
|
||||||
VONE V16
|
|
||||||
VREPF $0, V0, V1
|
|
||||||
index4loop:
|
|
||||||
VL (R7), V2 // load 16-bytes into V2
|
|
||||||
VLL R0, 16(R7), V3 // load 3-bytes into V3
|
|
||||||
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
|
||||||
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1
|
|
||||||
VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1
|
|
||||||
VCEQF V1, V2, V5 // compare index 0, 4, ...
|
|
||||||
VCEQF V1, V4, V6 // compare index 1, 5, ...
|
|
||||||
VCEQF V1, V9, V11 // compare index 2, 6, ...
|
|
||||||
VCEQF V1, V10, V12 // compare index 3, 7, ...
|
|
||||||
VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ...
|
|
||||||
VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
|
|
||||||
VSEL V13, V14, V31, V7 // final merge
|
|
||||||
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
|
||||||
BLT foundV17
|
|
||||||
MOVD $16(R7), R7 // R7+=16
|
|
||||||
ADD $15, R7, R9
|
|
||||||
CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
|
|
||||||
CMPBLE R7, R2, index2to16
|
|
||||||
BR notfound
|
|
||||||
|
|
||||||
index5plus:
|
|
||||||
CMPBGT R4, $15, index17plus
|
|
||||||
index2to16:
|
|
||||||
CMPBGT R7, R2, notfound
|
|
||||||
MOVD $1(R7), R8
|
|
||||||
CMPBGT R8, R2, index2to16tail
|
|
||||||
index2to16loop:
|
|
||||||
// unrolled 2x
|
|
||||||
VLL R4, (R7), V1
|
|
||||||
VLL R4, 1(R7), V2
|
|
||||||
VCEQGS V0, V1, V3
|
|
||||||
BEQ found
|
|
||||||
MOVD $1(R7), R7
|
|
||||||
VCEQGS V0, V2, V4
|
|
||||||
BEQ found
|
|
||||||
MOVD $1(R7), R7
|
|
||||||
CMPBLT R7, R2, index2to16loop
|
|
||||||
CMPBGT R7, R2, notfound
|
|
||||||
index2to16tail:
|
|
||||||
VLL R4, (R7), V1
|
|
||||||
VCEQGS V0, V1, V2
|
|
||||||
BEQ found
|
|
||||||
BR notfound
|
|
||||||
|
|
||||||
index17plus:
|
|
||||||
CMPBGT R4, $31, index33plus
|
|
||||||
SUB $16, R4, R0
|
|
||||||
VLL R0, 16(R3), V1
|
|
||||||
VONE V7
|
|
||||||
index17to32loop:
|
|
||||||
VL (R7), V2
|
|
||||||
VLL R0, 16(R7), V3
|
|
||||||
VCEQG V0, V2, V4
|
|
||||||
VCEQG V1, V3, V5
|
|
||||||
VN V4, V5, V6
|
|
||||||
VCEQGS V6, V7, V8
|
|
||||||
BEQ found
|
|
||||||
MOVD $1(R7), R7
|
|
||||||
CMPBLE R7, R2, index17to32loop
|
|
||||||
BR notfound
|
|
||||||
|
|
||||||
index33plus:
|
|
||||||
CMPBGT R4, $47, index49plus
|
|
||||||
SUB $32, R4, R0
|
|
||||||
VL 16(R3), V1
|
|
||||||
VLL R0, 32(R3), V2
|
|
||||||
VONE V11
|
|
||||||
index33to48loop:
|
|
||||||
VL (R7), V3
|
|
||||||
VL 16(R7), V4
|
|
||||||
VLL R0, 32(R7), V5
|
|
||||||
VCEQG V0, V3, V6
|
|
||||||
VCEQG V1, V4, V7
|
|
||||||
VCEQG V2, V5, V8
|
|
||||||
VN V6, V7, V9
|
|
||||||
VN V8, V9, V10
|
|
||||||
VCEQGS V10, V11, V12
|
|
||||||
BEQ found
|
|
||||||
MOVD $1(R7), R7
|
|
||||||
CMPBLE R7, R2, index33to48loop
|
|
||||||
BR notfound
|
|
||||||
|
|
||||||
index49plus:
|
|
||||||
CMPBGT R4, $63, index65plus
|
|
||||||
SUB $48, R4, R0
|
|
||||||
VL 16(R3), V1
|
|
||||||
VL 32(R3), V2
|
|
||||||
VLL R0, 48(R3), V3
|
|
||||||
VONE V15
|
|
||||||
index49to64loop:
|
|
||||||
VL (R7), V4
|
|
||||||
VL 16(R7), V5
|
|
||||||
VL 32(R7), V6
|
|
||||||
VLL R0, 48(R7), V7
|
|
||||||
VCEQG V0, V4, V8
|
|
||||||
VCEQG V1, V5, V9
|
|
||||||
VCEQG V2, V6, V10
|
|
||||||
VCEQG V3, V7, V11
|
|
||||||
VN V8, V9, V12
|
|
||||||
VN V10, V11, V13
|
|
||||||
VN V12, V13, V14
|
|
||||||
VCEQGS V14, V15, V16
|
|
||||||
BEQ found
|
|
||||||
MOVD $1(R7), R7
|
|
||||||
CMPBLE R7, R2, index49to64loop
|
|
||||||
notfound:
|
|
||||||
MOVD $-1, (R5)
|
|
||||||
RET
|
|
||||||
|
|
||||||
index65plus:
|
|
||||||
// not implemented
|
|
||||||
MOVD $0, (R0)
|
|
||||||
RET
|
|
||||||
|
|
||||||
foundV17: // index is in doubleword V17[0]
|
|
||||||
VLGVG $0, V17, R8
|
|
||||||
ADD R8, R7
|
|
||||||
found:
|
|
||||||
SUB R1, R7
|
|
||||||
MOVD R7, (R5)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// This is called from .init_array and follows the platform, not Go, ABI.
|
// This is called from .init_array and follows the platform, not Go, ABI.
|
||||||
// We are overly conservative. We could only save the registers we use.
|
// We are overly conservative. We could only save the registers we use.
|
||||||
// However, since this function is only called once per loaded module
|
// However, since this function is only called once per loaded module
|
||||||
|
|
|
||||||
|
|
@ -4,32 +4,16 @@
|
||||||
|
|
||||||
package runtime
|
package runtime
|
||||||
|
|
||||||
import (
|
import "internal/cpu"
|
||||||
internalcpu "internal/cpu"
|
|
||||||
"runtime/internal/sys"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// bit masks taken from bits/hwcap.h
|
// bit masks taken from bits/hwcap.h
|
||||||
_HWCAP_S390_VX = 2048 // vector facility
|
_HWCAP_S390_VX = 2048 // vector facility
|
||||||
)
|
)
|
||||||
|
|
||||||
// facilities is padded to avoid false sharing.
|
|
||||||
type facilities struct {
|
|
||||||
_ [sys.CacheLineSize]byte
|
|
||||||
hasVX bool // vector facility
|
|
||||||
_ [sys.CacheLineSize]byte
|
|
||||||
}
|
|
||||||
|
|
||||||
// cpu indicates the availability of s390x facilities that can be used in
|
|
||||||
// Go assembly but are optional on models supported by Go.
|
|
||||||
// TODO: remove this once we're only using internal/cpu.
|
|
||||||
var cpu facilities
|
|
||||||
|
|
||||||
func archauxv(tag, val uintptr) {
|
func archauxv(tag, val uintptr) {
|
||||||
switch tag {
|
switch tag {
|
||||||
case _AT_HWCAP: // CPU capability bit flags
|
case _AT_HWCAP: // CPU capability bit flags
|
||||||
internalcpu.S390X.HasVX = val&_HWCAP_S390_VX != 0
|
cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0
|
||||||
cpu.hasVX = val&_HWCAP_S390_VX != 0
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -932,6 +932,85 @@ func EqualFold(s, t string) bool {
|
||||||
return s == t
|
return s == t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
|
||||||
|
func Index(s, substr string) int {
|
||||||
|
n := len(substr)
|
||||||
|
switch {
|
||||||
|
case n == 0:
|
||||||
|
return 0
|
||||||
|
case n == 1:
|
||||||
|
return IndexByte(s, substr[0])
|
||||||
|
case n == len(s):
|
||||||
|
if substr == s {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
case n > len(s):
|
||||||
|
return -1
|
||||||
|
case n <= bytealg.MaxLen:
|
||||||
|
// Use brute force when s and substr both are small
|
||||||
|
if len(s) <= bytealg.MaxBruteForce {
|
||||||
|
return bytealg.IndexString(s, substr)
|
||||||
|
}
|
||||||
|
c := substr[0]
|
||||||
|
i := 0
|
||||||
|
t := s[:len(s)-n+1]
|
||||||
|
fails := 0
|
||||||
|
for i < len(t) {
|
||||||
|
if t[i] != c {
|
||||||
|
// IndexByte is faster than bytealg.IndexString, so use it as long as
|
||||||
|
// we're not getting lots of false positives.
|
||||||
|
o := IndexByte(t[i:], c)
|
||||||
|
if o < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
i += o
|
||||||
|
}
|
||||||
|
if s[i:i+n] == substr {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
fails++
|
||||||
|
i++
|
||||||
|
// Switch to bytealg.IndexString when IndexByte produces too many false positives.
|
||||||
|
if fails > bytealg.Cutover(i) {
|
||||||
|
r := bytealg.IndexString(s[i:], substr)
|
||||||
|
if r >= 0 {
|
||||||
|
return r + i
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
c := substr[0]
|
||||||
|
i := 0
|
||||||
|
t := s[:len(s)-n+1]
|
||||||
|
fails := 0
|
||||||
|
for i < len(t) {
|
||||||
|
if t[i] != c {
|
||||||
|
o := IndexByte(t[i:], c)
|
||||||
|
if o < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
i += o
|
||||||
|
}
|
||||||
|
if s[i:i+n] == substr {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
fails++
|
||||||
|
if fails >= 4+i>>4 && i < len(t) {
|
||||||
|
// See comment in ../bytes/bytes_generic.go.
|
||||||
|
j := indexRabinKarp(s[i:], substr)
|
||||||
|
if j < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return i + j
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
func indexRabinKarp(s, substr string) int {
|
func indexRabinKarp(s, substr string) int {
|
||||||
// Rabin-Karp search
|
// Rabin-Karp search
|
||||||
hashss, pow := hashStr(substr)
|
hashss, pow := hashStr(substr)
|
||||||
|
|
|
||||||
|
|
@ -1,79 +0,0 @@
|
||||||
// Copyright 2015 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package strings
|
|
||||||
|
|
||||||
import "internal/cpu"
|
|
||||||
|
|
||||||
//go:noescape
|
|
||||||
|
|
||||||
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
|
|
||||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
|
||||||
func indexShortStr(s, c string) int // ../runtime/asm_amd64.s
|
|
||||||
func countByte(s string, c byte) int // ../runtime/asm_amd64.s
|
|
||||||
|
|
||||||
var shortStringLen int
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
if cpu.X86.HasAVX2 {
|
|
||||||
shortStringLen = 63
|
|
||||||
} else {
|
|
||||||
shortStringLen = 31
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
|
|
||||||
func Index(s, substr string) int {
|
|
||||||
n := len(substr)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, substr[0])
|
|
||||||
case n == len(s):
|
|
||||||
if substr == s {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
case n <= shortStringLen:
|
|
||||||
// Use brute force when s and substr both are small
|
|
||||||
if len(s) <= 64 {
|
|
||||||
return indexShortStr(s, substr)
|
|
||||||
}
|
|
||||||
c := substr[0]
|
|
||||||
i := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
fails := 0
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
// IndexByte skips 16/32 bytes per iteration,
|
|
||||||
// so it's faster than indexShortStr.
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if s[i:i+n] == substr {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
fails++
|
|
||||||
i++
|
|
||||||
// Switch to indexShortStr when IndexByte produces too many false positives.
|
|
||||||
// Too many means more that 1 error per 8 characters.
|
|
||||||
// Allow some errors in the beginning.
|
|
||||||
if fails > (i+16)/8 {
|
|
||||||
r := indexShortStr(s[i:], substr)
|
|
||||||
if r >= 0 {
|
|
||||||
return r + i
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return indexRabinKarp(s, substr)
|
|
||||||
}
|
|
||||||
|
|
@ -1,55 +0,0 @@
|
||||||
// Copyright 2015 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build !amd64,!s390x
|
|
||||||
|
|
||||||
package strings
|
|
||||||
|
|
||||||
// TODO: implements short string optimization on non amd64 platforms
|
|
||||||
// and get rid of strings_amd64.go
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
|
|
||||||
func Index(s, substr string) int {
|
|
||||||
n := len(substr)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, substr[0])
|
|
||||||
case n == len(s):
|
|
||||||
if substr == s {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
c := substr[0]
|
|
||||||
i := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
fails := 0
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if s[i:i+n] == substr {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
fails++
|
|
||||||
if fails >= 4+i>>4 && i < len(t) {
|
|
||||||
// See comment in ../bytes/bytes_generic.go.
|
|
||||||
j := indexRabinKarp(s[i:], substr)
|
|
||||||
if j < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return i + j
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
// Copyright 2016 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package strings
|
|
||||||
|
|
||||||
//go:noescape
|
|
||||||
|
|
||||||
// indexShortStr returns the index of the first instance of sep in s,
|
|
||||||
// or -1 if sep is not present in s.
|
|
||||||
// indexShortStr requires 2 <= len(sep) <= shortStringLen
|
|
||||||
func indexShortStr(s, sep string) int // ../runtime/asm_$GOARCH.s
|
|
||||||
|
|
||||||
// supportsVX reports whether the vector facility is available.
|
|
||||||
// indexShortStr must not be called if the vector facility is not
|
|
||||||
// available.
|
|
||||||
func supportsVX() bool // ../runtime/asm_s390x.s
|
|
||||||
|
|
||||||
var shortStringLen = -1
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
if supportsVX() {
|
|
||||||
shortStringLen = 64
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
|
|
||||||
func Index(s, substr string) int {
|
|
||||||
n := len(substr)
|
|
||||||
switch {
|
|
||||||
case n == 0:
|
|
||||||
return 0
|
|
||||||
case n == 1:
|
|
||||||
return IndexByte(s, substr[0])
|
|
||||||
case n == len(s):
|
|
||||||
if substr == s {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
case n > len(s):
|
|
||||||
return -1
|
|
||||||
case n <= shortStringLen:
|
|
||||||
// Use brute force when s and substr both are small
|
|
||||||
if len(s) <= 64 {
|
|
||||||
return indexShortStr(s, substr)
|
|
||||||
}
|
|
||||||
c := substr[0]
|
|
||||||
i := 0
|
|
||||||
t := s[:len(s)-n+1]
|
|
||||||
fails := 0
|
|
||||||
for i < len(t) {
|
|
||||||
if t[i] != c {
|
|
||||||
// IndexByte skips 16/32 bytes per iteration,
|
|
||||||
// so it's faster than indexShortStr.
|
|
||||||
o := IndexByte(t[i:], c)
|
|
||||||
if o < 0 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
i += o
|
|
||||||
}
|
|
||||||
if s[i:i+n] == substr {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
fails++
|
|
||||||
i++
|
|
||||||
// Switch to indexShortStr when IndexByte produces too many false positives.
|
|
||||||
// Too many means more that 1 error per 8 characters.
|
|
||||||
// Allow some errors in the beginning.
|
|
||||||
if fails > (i+16)/8 {
|
|
||||||
r := indexShortStr(s[i:], substr)
|
|
||||||
if r >= 0 {
|
|
||||||
return r + i
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return indexRabinKarp(s, substr)
|
|
||||||
}
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue