2008-11-24 15:17:47 -08:00
|
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
2009-08-31 13:01:25 -07:00
|
|
|
|
package strings_test
|
2008-11-24 15:17:47 -08:00
|
|
|
|
|
|
|
|
|
|
import (
|
2011-05-26 11:02:07 -07:00
|
|
|
|
"bytes"
|
2016-09-28 01:54:38 -07:00
|
|
|
|
"fmt"
|
2011-11-01 22:05:34 -04:00
|
|
|
|
"io"
|
2012-09-18 15:02:08 -04:00
|
|
|
|
"math/rand"
|
2011-03-28 09:41:57 -07:00
|
|
|
|
"reflect"
|
2009-12-15 15:40:16 -08:00
|
|
|
|
. "strings"
|
|
|
|
|
|
"testing"
|
|
|
|
|
|
"unicode"
|
2011-11-08 15:41:54 -08:00
|
|
|
|
"unicode/utf8"
|
2011-03-28 09:41:57 -07:00
|
|
|
|
"unsafe"
|
2008-11-24 15:17:47 -08:00
|
|
|
|
)
|
|
|
|
|
|
|
2008-12-18 22:37:22 -08:00
|
|
|
|
func eq(a, b []string) bool {
|
2008-11-24 15:17:47 -08:00
|
|
|
|
if len(a) != len(b) {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
return false
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < len(a); i++ {
|
|
|
|
|
|
if a[i] != b[i] {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
return false
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
return true
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-07 11:55:06 -07:00
|
|
|
|
var abcd = "abcd"
|
|
|
|
|
|
var faces = "☺☻☹"
|
|
|
|
|
|
var commas = "1,2,3,4"
|
|
|
|
|
|
var dots = "1....2....3....4"
|
2008-11-24 15:17:47 -08:00
|
|
|
|
|
2009-06-09 10:58:58 -07:00
|
|
|
|
type IndexTest struct {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
s string
|
|
|
|
|
|
sep string
|
|
|
|
|
|
out int
|
2009-06-09 10:58:58 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-07 11:55:06 -07:00
|
|
|
|
var indexTests = []IndexTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", "", 0},
|
|
|
|
|
|
{"", "a", -1},
|
|
|
|
|
|
{"", "foo", -1},
|
|
|
|
|
|
{"fo", "foo", -1},
|
|
|
|
|
|
{"foo", "foo", 0},
|
|
|
|
|
|
{"oofofoofooo", "f", 2},
|
|
|
|
|
|
{"oofofoofooo", "foo", 4},
|
|
|
|
|
|
{"barfoobarfoo", "foo", 3},
|
|
|
|
|
|
{"foo", "", 0},
|
|
|
|
|
|
{"foo", "o", 1},
|
|
|
|
|
|
{"abcABCabc", "A", 3},
|
2009-11-18 19:23:08 -08:00
|
|
|
|
// cases with one byte strings - test special case in Index()
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", "a", -1},
|
|
|
|
|
|
{"x", "a", -1},
|
|
|
|
|
|
{"x", "x", 0},
|
|
|
|
|
|
{"abc", "a", 0},
|
|
|
|
|
|
{"abc", "b", 1},
|
|
|
|
|
|
{"abc", "c", 2},
|
|
|
|
|
|
{"abc", "x", -1},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
// test special cases in Index() for short strings
|
|
|
|
|
|
{"", "ab", -1},
|
|
|
|
|
|
{"bc", "ab", -1},
|
|
|
|
|
|
{"ab", "ab", 0},
|
|
|
|
|
|
{"xab", "ab", 1},
|
|
|
|
|
|
{"xab"[:2], "ab", -1},
|
|
|
|
|
|
{"", "abc", -1},
|
|
|
|
|
|
{"xbc", "abc", -1},
|
|
|
|
|
|
{"abc", "abc", 0},
|
|
|
|
|
|
{"xabc", "abc", 1},
|
|
|
|
|
|
{"xabc"[:3], "abc", -1},
|
|
|
|
|
|
{"xabxc", "abc", -1},
|
|
|
|
|
|
{"", "abcd", -1},
|
|
|
|
|
|
{"xbcd", "abcd", -1},
|
|
|
|
|
|
{"abcd", "abcd", 0},
|
|
|
|
|
|
{"xabcd", "abcd", 1},
|
|
|
|
|
|
{"xyabcd"[:5], "abcd", -1},
|
|
|
|
|
|
{"xbcqq", "abcqq", -1},
|
|
|
|
|
|
{"abcqq", "abcqq", 0},
|
|
|
|
|
|
{"xabcqq", "abcqq", 1},
|
|
|
|
|
|
{"xyabcqq"[:6], "abcqq", -1},
|
|
|
|
|
|
{"xabxcqq", "abcqq", -1},
|
|
|
|
|
|
{"xabcqxq", "abcqq", -1},
|
|
|
|
|
|
{"", "01234567", -1},
|
|
|
|
|
|
{"32145678", "01234567", -1},
|
|
|
|
|
|
{"01234567", "01234567", 0},
|
|
|
|
|
|
{"x01234567", "01234567", 1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"x0123456x01234567", "01234567", 9},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
{"xx01234567"[:9], "01234567", -1},
|
|
|
|
|
|
{"", "0123456789", -1},
|
|
|
|
|
|
{"3214567844", "0123456789", -1},
|
|
|
|
|
|
{"0123456789", "0123456789", 0},
|
|
|
|
|
|
{"x0123456789", "0123456789", 1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"x012345678x0123456789", "0123456789", 11},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
{"xyz0123456789"[:12], "0123456789", -1},
|
|
|
|
|
|
{"x01234567x89", "0123456789", -1},
|
|
|
|
|
|
{"", "0123456789012345", -1},
|
|
|
|
|
|
{"3214567889012345", "0123456789012345", -1},
|
|
|
|
|
|
{"0123456789012345", "0123456789012345", 0},
|
|
|
|
|
|
{"x0123456789012345", "0123456789012345", 1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"x012345678901234x0123456789012345", "0123456789012345", 17},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
{"", "01234567890123456789", -1},
|
|
|
|
|
|
{"32145678890123456789", "01234567890123456789", -1},
|
|
|
|
|
|
{"01234567890123456789", "01234567890123456789", 0},
|
|
|
|
|
|
{"x01234567890123456789", "01234567890123456789", 1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
|
|
|
|
|
|
{"", "0123456789012345678901234567890", -1},
|
|
|
|
|
|
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
|
|
|
|
|
|
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
|
|
|
|
|
|
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
|
|
|
|
|
|
{"", "01234567890123456789012345678901", -1},
|
|
|
|
|
|
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
|
|
|
|
|
|
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
|
|
|
|
|
|
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
|
2015-10-28 18:05:05 +03:00
|
|
|
|
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
|
2016-04-28 17:39:55 +03:00
|
|
|
|
{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
|
|
|
|
|
|
{"", "0123456789012345678901234567890123456789", -1},
|
|
|
|
|
|
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
|
|
|
|
|
|
{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
|
|
|
|
|
|
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
|
|
|
|
|
|
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
|
2017-11-04 10:19:53 -07:00
|
|
|
|
// test fallback to Rabin-Karp.
|
|
|
|
|
|
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
|
|
|
|
|
|
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
|
2009-06-09 10:58:58 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-07 11:55:06 -07:00
|
|
|
|
var lastIndexTests = []IndexTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", "", 0},
|
|
|
|
|
|
{"", "a", -1},
|
|
|
|
|
|
{"", "foo", -1},
|
|
|
|
|
|
{"fo", "foo", -1},
|
|
|
|
|
|
{"foo", "foo", 0},
|
|
|
|
|
|
{"foo", "f", 0},
|
|
|
|
|
|
{"oofofoofooo", "f", 7},
|
|
|
|
|
|
{"oofofoofooo", "foo", 7},
|
|
|
|
|
|
{"barfoobarfoo", "foo", 9},
|
|
|
|
|
|
{"foo", "", 3},
|
|
|
|
|
|
{"foo", "o", 2},
|
|
|
|
|
|
{"abcABCabc", "A", 3},
|
|
|
|
|
|
{"abcABCabc", "a", 6},
|
2009-06-09 10:58:58 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2010-03-26 13:05:04 -07:00
|
|
|
|
var indexAnyTests = []IndexTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", "", -1},
|
|
|
|
|
|
{"", "a", -1},
|
|
|
|
|
|
{"", "abc", -1},
|
|
|
|
|
|
{"a", "", -1},
|
|
|
|
|
|
{"a", "a", 0},
|
|
|
|
|
|
{"aaa", "a", 0},
|
|
|
|
|
|
{"abc", "xyz", -1},
|
|
|
|
|
|
{"abc", "xcz", 2},
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
{"ab☺c", "x☺yz", 2},
|
|
|
|
|
|
{"a☺b☻c☹d", "cx", len("a☺b☻")},
|
|
|
|
|
|
{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"aRegExp*", ".(|)*+?^$[]", 7},
|
|
|
|
|
|
{dots + dots + dots, " ", -1},
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
{"012abcba210", "\xffb", 4},
|
|
|
|
|
|
{"012\x80bcb\x80210", "\xffb", 3},
|
2010-03-26 13:05:04 -07:00
|
|
|
|
}
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
|
2010-11-12 12:47:50 -08:00
|
|
|
|
var lastIndexAnyTests = []IndexTest{
|
|
|
|
|
|
{"", "", -1},
|
|
|
|
|
|
{"", "a", -1},
|
|
|
|
|
|
{"", "abc", -1},
|
|
|
|
|
|
{"a", "", -1},
|
|
|
|
|
|
{"a", "a", 0},
|
|
|
|
|
|
{"aaa", "a", 2},
|
|
|
|
|
|
{"abc", "xyz", -1},
|
|
|
|
|
|
{"abc", "ab", 1},
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
{"ab☺c", "x☺yz", 2},
|
|
|
|
|
|
{"a☺b☻c☹d", "cx", len("a☺b☻")},
|
|
|
|
|
|
{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
|
2010-11-12 12:47:50 -08:00
|
|
|
|
{"a.RegExp*", ".(|)*+?^$[]", 8},
|
|
|
|
|
|
{dots + dots + dots, " ", -1},
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
{"012abcba210", "\xffb", 6},
|
|
|
|
|
|
{"012\x80bcb\x80210", "\xffb", 7},
|
2010-11-12 12:47:50 -08:00
|
|
|
|
}
|
2010-03-26 13:05:04 -07:00
|
|
|
|
|
2009-06-09 10:58:58 -07:00
|
|
|
|
// Execute f on each test case. funcName should be the name of f; it's used
|
|
|
|
|
|
// in failure reports.
|
|
|
|
|
|
func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
|
2009-09-15 09:41:59 -07:00
|
|
|
|
for _, test := range testCases {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
actual := f(test.s, test.sep)
|
2009-06-24 20:12:50 -07:00
|
|
|
|
if actual != test.out {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
|
2009-06-09 10:58:58 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-11-12 12:47:50 -08:00
|
|
|
|
func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
|
|
|
|
|
|
func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
|
|
|
|
|
|
func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
|
|
|
|
|
|
func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) }
|
2009-06-09 10:58:58 -07:00
|
|
|
|
|
2015-04-29 20:45:55 +03:00
|
|
|
|
func TestLastIndexByte(t *testing.T) {
|
|
|
|
|
|
testCases := []IndexTest{
|
|
|
|
|
|
{"", "q", -1},
|
|
|
|
|
|
{"abcdef", "q", -1},
|
|
|
|
|
|
{"abcdefabcdef", "a", len("abcdef")}, // something in the middle
|
|
|
|
|
|
{"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
|
|
|
|
|
|
{"zabcdefabcdef", "z", 0}, // first byte
|
|
|
|
|
|
{"a☺b☻c☹d", "b", len("a☺")}, // non-ascii
|
|
|
|
|
|
}
|
|
|
|
|
|
for _, test := range testCases {
|
|
|
|
|
|
actual := LastIndexByte(test.s, test.sep[0])
|
|
|
|
|
|
if actual != test.out {
|
|
|
|
|
|
t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2016-05-25 16:33:19 +03:00
|
|
|
|
func simpleIndex(s, sep string) int {
|
|
|
|
|
|
n := len(sep)
|
|
|
|
|
|
for i := n; i <= len(s); i++ {
|
|
|
|
|
|
if s[i-n:i] == sep {
|
|
|
|
|
|
return i - n
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return -1
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestIndexRandom(t *testing.T) {
|
|
|
|
|
|
const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
|
|
|
|
|
|
for times := 0; times < 10; times++ {
|
|
|
|
|
|
for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary
|
|
|
|
|
|
s1 := make([]byte, strLen)
|
|
|
|
|
|
for i := range s1 {
|
|
|
|
|
|
s1[i] = chars[rand.Intn(len(chars))]
|
|
|
|
|
|
}
|
|
|
|
|
|
s := string(s1)
|
|
|
|
|
|
for i := 0; i < 50; i++ {
|
|
|
|
|
|
begin := rand.Intn(len(s) + 1)
|
|
|
|
|
|
end := begin + rand.Intn(len(s)+1-begin)
|
|
|
|
|
|
sep := s[begin:end]
|
|
|
|
|
|
if i%4 == 0 {
|
|
|
|
|
|
pos := rand.Intn(len(sep) + 1)
|
|
|
|
|
|
sep = sep[:pos] + "A" + sep[pos:]
|
|
|
|
|
|
}
|
|
|
|
|
|
want := simpleIndex(s, sep)
|
|
|
|
|
|
res := Index(s, sep)
|
|
|
|
|
|
if res != want {
|
|
|
|
|
|
t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-03-08 09:41:12 -08:00
|
|
|
|
func TestIndexRune(t *testing.T) {
|
2016-10-26 14:18:37 -07:00
|
|
|
|
tests := []struct {
|
|
|
|
|
|
in string
|
|
|
|
|
|
rune rune
|
|
|
|
|
|
want int
|
|
|
|
|
|
}{
|
|
|
|
|
|
{"", 'a', -1},
|
|
|
|
|
|
{"", '☺', -1},
|
|
|
|
|
|
{"foo", '☹', -1},
|
|
|
|
|
|
{"foo", 'o', 1},
|
|
|
|
|
|
{"foo☺bar", '☺', 3},
|
|
|
|
|
|
{"foo☺☻☹bar", '☹', 9},
|
|
|
|
|
|
{"a A x", 'A', 2},
|
|
|
|
|
|
{"some_text=some_value", '=', 9},
|
|
|
|
|
|
{"☺a", 'a', 3},
|
|
|
|
|
|
{"a☻☺b", '☺', 4},
|
|
|
|
|
|
|
|
|
|
|
|
// RuneError should match any invalid UTF-8 byte sequence.
|
|
|
|
|
|
{"<22>", '<27>', 0},
|
|
|
|
|
|
{"\xff", '<27>', 0},
|
|
|
|
|
|
{"☻x<E298BB>", '<27>', len("☻x")},
|
|
|
|
|
|
{"☻x\xe2\x98", '<27>', len("☻x")},
|
|
|
|
|
|
{"☻x\xe2\x98<39>", '<27>', len("☻x")},
|
|
|
|
|
|
{"☻x\xe2\x98x", '<27>', len("☻x")},
|
|
|
|
|
|
|
|
|
|
|
|
// Invalid rune values should never match.
|
|
|
|
|
|
{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", -1, -1},
|
|
|
|
|
|
{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
|
|
|
|
|
|
{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", utf8.MaxRune + 1, -1},
|
|
|
|
|
|
}
|
|
|
|
|
|
for _, tt := range tests {
|
|
|
|
|
|
if got := IndexRune(tt.in, tt.rune); got != tt.want {
|
|
|
|
|
|
t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
|
2011-03-08 09:41:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2016-09-06 20:23:40 +09:00
|
|
|
|
|
|
|
|
|
|
haystack := "test世界"
|
|
|
|
|
|
allocs := testing.AllocsPerRun(1000, func() {
|
|
|
|
|
|
if i := IndexRune(haystack, 's'); i != 2 {
|
|
|
|
|
|
t.Fatalf("'s' at %d; want 2", i)
|
|
|
|
|
|
}
|
|
|
|
|
|
if i := IndexRune(haystack, '世'); i != 4 {
|
|
|
|
|
|
t.Fatalf("'世' at %d; want 4", i)
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
2016-11-01 18:13:54 +00:00
|
|
|
|
if allocs != 0 && testing.CoverMode() == "" {
|
2016-10-26 14:18:37 -07:00
|
|
|
|
t.Errorf("expected no allocations, got %f", allocs)
|
2016-09-06 20:23:40 +09:00
|
|
|
|
}
|
2011-03-08 09:41:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-03-08 09:58:18 -08:00
|
|
|
|
const benchmarkString = "some_text=some☺value"
|
|
|
|
|
|
|
2011-03-08 09:41:12 -08:00
|
|
|
|
func BenchmarkIndexRune(b *testing.B) {
|
2011-03-08 09:58:18 -08:00
|
|
|
|
if got := IndexRune(benchmarkString, '☺'); got != 14 {
|
2011-12-20 10:36:25 -08:00
|
|
|
|
b.Fatalf("wrong index: expected 14, got=%d", got)
|
2011-03-08 09:58:18 -08:00
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
IndexRune(benchmarkString, '☺')
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2016-09-06 20:23:40 +09:00
|
|
|
|
var benchmarkLongString = Repeat(" ", 100) + benchmarkString
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkIndexRuneLongString(b *testing.B) {
|
|
|
|
|
|
if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
|
|
|
|
|
|
b.Fatalf("wrong index: expected 114, got=%d", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
IndexRune(benchmarkLongString, '☺')
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-03-08 09:58:18 -08:00
|
|
|
|
func BenchmarkIndexRuneFastPath(b *testing.B) {
|
|
|
|
|
|
if got := IndexRune(benchmarkString, 'v'); got != 17 {
|
2011-12-20 10:36:25 -08:00
|
|
|
|
b.Fatalf("wrong index: expected 17, got=%d", got)
|
2011-03-08 09:41:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
2011-03-08 09:58:18 -08:00
|
|
|
|
IndexRune(benchmarkString, 'v')
|
2011-03-08 09:41:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-03-08 09:58:18 -08:00
|
|
|
|
func BenchmarkIndex(b *testing.B) {
|
|
|
|
|
|
if got := Index(benchmarkString, "v"); got != 17 {
|
2011-12-20 10:36:25 -08:00
|
|
|
|
b.Fatalf("wrong index: expected 17, got=%d", got)
|
2011-03-08 09:41:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
2011-03-08 09:58:18 -08:00
|
|
|
|
Index(benchmarkString, "v")
|
2011-03-08 09:41:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-09-01 17:47:57 +10:00
|
|
|
|
func BenchmarkLastIndex(b *testing.B) {
|
|
|
|
|
|
if got := Index(benchmarkString, "v"); got != 17 {
|
|
|
|
|
|
b.Fatalf("wrong index: expected 17, got=%d", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
LastIndex(benchmarkString, "v")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-08-06 14:41:07 -07:00
|
|
|
|
func BenchmarkIndexByte(b *testing.B) {
|
|
|
|
|
|
if got := IndexByte(benchmarkString, 'v'); got != 17 {
|
|
|
|
|
|
b.Fatalf("wrong index: expected 17, got=%d", got)
|
|
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
IndexByte(benchmarkString, 'v')
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-01-20 14:40:40 -08:00
|
|
|
|
type SplitTest struct {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
s string
|
|
|
|
|
|
sep string
|
|
|
|
|
|
n int
|
|
|
|
|
|
a []string
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
2009-10-07 11:55:06 -07:00
|
|
|
|
|
|
|
|
|
|
var splittests = []SplitTest{
|
2016-03-26 00:04:48 +01:00
|
|
|
|
{"", "", -1, []string{}},
|
|
|
|
|
|
{abcd, "", 2, []string{"a", "bcd"}},
|
|
|
|
|
|
{abcd, "", 4, []string{"a", "b", "c", "d"}},
|
|
|
|
|
|
{abcd, "", -1, []string{"a", "b", "c", "d"}},
|
|
|
|
|
|
{faces, "", -1, []string{"☺", "☻", "☹"}},
|
|
|
|
|
|
{faces, "", 3, []string{"☺", "☻", "☹"}},
|
|
|
|
|
|
{faces, "", 17, []string{"☺", "☻", "☹"}},
|
|
|
|
|
|
{"☺<>☹", "", -1, []string{"☺", "<22>", "☹"}},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{abcd, "a", 0, nil},
|
|
|
|
|
|
{abcd, "a", -1, []string{"", "bcd"}},
|
|
|
|
|
|
{abcd, "z", -1, []string{"abcd"}},
|
|
|
|
|
|
{commas, ",", -1, []string{"1", "2", "3", "4"}},
|
|
|
|
|
|
{dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
|
|
|
|
|
|
{faces, "☹", -1, []string{"☺☻", ""}},
|
|
|
|
|
|
{faces, "~", -1, []string{faces}},
|
|
|
|
|
|
{"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
|
|
|
|
|
|
{"1 2", " ", 3, []string{"1", "2"}},
|
2009-03-03 08:39:12 -08:00
|
|
|
|
}
|
2009-10-07 11:55:06 -07:00
|
|
|
|
|
2009-01-20 14:40:40 -08:00
|
|
|
|
func TestSplit(t *testing.T) {
|
2009-06-24 19:02:29 -07:00
|
|
|
|
for _, tt := range splittests {
|
2011-06-28 09:43:14 +10:00
|
|
|
|
a := SplitN(tt.s, tt.sep, tt.n)
|
2008-11-24 15:17:47 -08:00
|
|
|
|
if !eq(a, tt.a) {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
|
|
|
|
|
|
continue
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
2010-07-01 14:08:14 -07:00
|
|
|
|
if tt.n == 0 {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
s := Join(a, tt.sep)
|
2008-11-24 15:17:47 -08:00
|
|
|
|
if s != tt.s {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
2011-06-28 09:43:14 +10:00
|
|
|
|
if tt.n < 0 {
|
|
|
|
|
|
b := Split(tt.s, tt.sep)
|
|
|
|
|
|
if !reflect.DeepEqual(a, b) {
|
|
|
|
|
|
t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2008-11-24 15:17:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-11-04 15:19:30 -08:00
|
|
|
|
var splitaftertests = []SplitTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{abcd, "a", -1, []string{"a", "bcd"}},
|
|
|
|
|
|
{abcd, "z", -1, []string{"abcd"}},
|
|
|
|
|
|
{abcd, "", -1, []string{"a", "b", "c", "d"}},
|
|
|
|
|
|
{commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
|
|
|
|
|
|
{dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
|
|
|
|
|
|
{faces, "☹", -1, []string{"☺☻☹", ""}},
|
|
|
|
|
|
{faces, "~", -1, []string{faces}},
|
|
|
|
|
|
{faces, "", -1, []string{"☺", "☻", "☹"}},
|
|
|
|
|
|
{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
|
|
|
|
|
|
{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
|
|
|
|
|
|
{"1 2", " ", 3, []string{"1 ", "2"}},
|
|
|
|
|
|
{"123", "", 2, []string{"1", "23"}},
|
|
|
|
|
|
{"123", "", 17, []string{"1", "2", "3"}},
|
2009-11-04 15:19:30 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestSplitAfter(t *testing.T) {
|
|
|
|
|
|
for _, tt := range splitaftertests {
|
2011-06-28 09:43:14 +10:00
|
|
|
|
a := SplitAfterN(tt.s, tt.sep, tt.n)
|
2009-11-04 15:19:30 -08:00
|
|
|
|
if !eq(a, tt.a) {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
|
|
|
|
|
|
continue
|
2009-11-04 15:19:30 -08:00
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
s := Join(a, "")
|
2009-11-04 15:19:30 -08:00
|
|
|
|
if s != tt.s {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
|
2009-11-04 15:19:30 -08:00
|
|
|
|
}
|
2011-06-28 09:43:14 +10:00
|
|
|
|
if tt.n < 0 {
|
|
|
|
|
|
b := SplitAfter(tt.s, tt.sep)
|
|
|
|
|
|
if !reflect.DeepEqual(a, b) {
|
|
|
|
|
|
t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2009-11-04 15:19:30 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-12-15 21:09:55 -08:00
|
|
|
|
type FieldsTest struct {
|
|
|
|
|
|
s string
|
|
|
|
|
|
a []string
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var fieldstests = []FieldsTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", []string{}},
|
|
|
|
|
|
{" ", []string{}},
|
|
|
|
|
|
{" \t ", []string{}},
|
2017-03-06 09:34:39 +01:00
|
|
|
|
{"\u2000", []string{}},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{" abc ", []string{"abc"}},
|
|
|
|
|
|
{"1 2 3 4", []string{"1", "2", "3", "4"}},
|
|
|
|
|
|
{"1 2 3 4", []string{"1", "2", "3", "4"}},
|
|
|
|
|
|
{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
|
|
|
|
|
|
{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
|
|
|
|
|
|
{"\u2000\u2001\u2002", []string{}},
|
|
|
|
|
|
{"\n™\t™\n", []string{"™", "™"}},
|
2017-03-06 09:34:39 +01:00
|
|
|
|
{"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
|
|
|
|
|
|
{"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
|
|
|
|
|
|
{"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{faces, []string{faces}},
|
2009-12-15 21:09:55 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestFields(t *testing.T) {
|
|
|
|
|
|
for _, tt := range fieldstests {
|
|
|
|
|
|
a := Fields(tt.s)
|
|
|
|
|
|
if !eq(a, tt.a) {
|
|
|
|
|
|
t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var FieldsFuncTests = []FieldsTest{
|
|
|
|
|
|
{"", []string{}},
|
|
|
|
|
|
{"XX", []string{}},
|
|
|
|
|
|
{"XXhiXXX", []string{"hi"}},
|
|
|
|
|
|
{"aXXbXXXcX", []string{"a", "b", "c"}},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-04-19 16:36:50 -07:00
|
|
|
|
func TestFieldsFunc(t *testing.T) {
|
2012-09-18 15:02:08 -04:00
|
|
|
|
for _, tt := range fieldstests {
|
|
|
|
|
|
a := FieldsFunc(tt.s, unicode.IsSpace)
|
|
|
|
|
|
if !eq(a, tt.a) {
|
|
|
|
|
|
t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2011-10-25 22:22:09 -07:00
|
|
|
|
pred := func(c rune) bool { return c == 'X' }
|
2011-09-26 18:32:51 -04:00
|
|
|
|
for _, tt := range FieldsFuncTests {
|
2010-04-19 16:36:50 -07:00
|
|
|
|
a := FieldsFunc(tt.s, pred)
|
|
|
|
|
|
if !eq(a, tt.a) {
|
|
|
|
|
|
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-06-05 13:09:03 -07:00
|
|
|
|
// Test case for any function which accepts and returns a single string.
|
|
|
|
|
|
type StringTest struct {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
in, out string
|
2009-06-05 13:09:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Execute f on each test case. funcName should be the name of f; it's used
|
|
|
|
|
|
// in failure reports.
|
|
|
|
|
|
func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
|
2009-09-15 09:41:59 -07:00
|
|
|
|
for _, tc := range testCases {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
actual := f(tc.in)
|
2009-06-24 20:12:50 -07:00
|
|
|
|
if actual != tc.out {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
|
2009-06-05 13:09:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-07 11:55:06 -07:00
|
|
|
|
var upperTests = []StringTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", ""},
|
2017-09-28 15:07:48 +05:30
|
|
|
|
{"ONLYUPPER", "ONLYUPPER"},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"abc", "ABC"},
|
|
|
|
|
|
{"AbC123", "ABC123"},
|
|
|
|
|
|
{"azAZ09_", "AZAZ09_"},
|
2017-09-28 15:07:48 +05:30
|
|
|
|
{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
|
|
|
|
|
|
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
|
2018-05-04 06:54:18 +02:00
|
|
|
|
{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
|
2009-06-05 13:09:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-07 11:55:06 -07:00
|
|
|
|
var lowerTests = []StringTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", ""},
|
|
|
|
|
|
{"abc", "abc"},
|
|
|
|
|
|
{"AbC123", "abc123"},
|
|
|
|
|
|
{"azAZ09_", "azaz09_"},
|
2017-11-08 09:15:53 +05:30
|
|
|
|
{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
|
|
|
|
|
|
{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
|
2018-05-04 06:54:18 +02:00
|
|
|
|
{"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
|
2009-06-05 13:09:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
|
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
|
|
|
|
|
|
|
2009-10-07 11:55:06 -07:00
|
|
|
|
var trimSpaceTests = []StringTest{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", ""},
|
|
|
|
|
|
{"abc", "abc"},
|
|
|
|
|
|
{space + "abc" + space, "abc"},
|
|
|
|
|
|
{" ", ""},
|
|
|
|
|
|
{" \t\r\n \t\t\r\r\n\n ", ""},
|
|
|
|
|
|
{" \t\r\n x\t\t\r\r\n\n ", "x"},
|
|
|
|
|
|
{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
|
|
|
|
|
|
{"1 \t\r\n2", "1 \t\r\n2"},
|
|
|
|
|
|
{" x\x80", "x\x80"},
|
|
|
|
|
|
{" x\xc0", "x\xc0"},
|
|
|
|
|
|
{"x \xc0\xc0 ", "x \xc0\xc0"},
|
|
|
|
|
|
{"x \xc0", "x \xc0"},
|
|
|
|
|
|
{"x \xc0 ", "x \xc0"},
|
|
|
|
|
|
{"x \xc0\xc0 ", "x \xc0\xc0"},
|
|
|
|
|
|
{"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
|
|
|
|
|
|
{"x ☺ ", "x ☺"},
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-10-25 22:22:09 -07:00
|
|
|
|
func tenRunes(ch rune) string {
|
|
|
|
|
|
r := make([]rune, 10)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
for i := range r {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
r[i] = ch
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
return string(r)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-11-15 12:07:27 -08:00
|
|
|
|
// User-defined self-inverse mapping function
|
2011-10-25 22:22:09 -07:00
|
|
|
|
func rot13(r rune) rune {
|
|
|
|
|
|
step := rune(13)
|
|
|
|
|
|
if r >= 'a' && r <= 'z' {
|
|
|
|
|
|
return ((r - 'a' + step) % 26) + 'a'
|
2009-11-15 12:07:27 -08:00
|
|
|
|
}
|
2011-10-25 22:22:09 -07:00
|
|
|
|
if r >= 'A' && r <= 'Z' {
|
|
|
|
|
|
return ((r - 'A' + step) % 26) + 'A'
|
2009-11-15 12:07:27 -08:00
|
|
|
|
}
|
2011-10-25 22:22:09 -07:00
|
|
|
|
return r
|
2009-11-15 12:07:27 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
|
func TestMap(t *testing.T) {
|
|
|
|
|
|
// Run a couple of awful growth/shrinkage tests
|
2009-12-15 15:40:16 -08:00
|
|
|
|
a := tenRunes('a')
|
2016-03-01 23:21:55 +00:00
|
|
|
|
// 1. Grow. This triggers two reallocations in Map.
|
2011-10-25 22:22:09 -07:00
|
|
|
|
maxRune := func(rune) rune { return unicode.MaxRune }
|
2009-12-15 15:40:16 -08:00
|
|
|
|
m := Map(maxRune, a)
|
|
|
|
|
|
expect := tenRunes(unicode.MaxRune)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
if m != expect {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf("growing: expected %q got %q", expect, m)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2009-11-15 12:07:27 -08:00
|
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
|
// 2. Shrink
|
2011-10-25 22:22:09 -07:00
|
|
|
|
minRune := func(rune) rune { return 'a' }
|
2009-12-15 15:40:16 -08:00
|
|
|
|
m = Map(minRune, tenRunes(unicode.MaxRune))
|
|
|
|
|
|
expect = a
|
2009-09-01 11:06:28 -07:00
|
|
|
|
if m != expect {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf("shrinking: expected %q got %q", expect, m)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2009-11-15 12:07:27 -08:00
|
|
|
|
|
|
|
|
|
|
// 3. Rot13
|
2009-12-15 15:40:16 -08:00
|
|
|
|
m = Map(rot13, "a to zed")
|
|
|
|
|
|
expect = "n gb mrq"
|
2009-11-15 12:07:27 -08:00
|
|
|
|
if m != expect {
|
|
|
|
|
|
t.Errorf("rot13: expected %q got %q", expect, m)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 4. Rot13^2
|
2009-12-15 15:40:16 -08:00
|
|
|
|
m = Map(rot13, Map(rot13, "a to zed"))
|
|
|
|
|
|
expect = "a to zed"
|
2009-11-15 12:07:27 -08:00
|
|
|
|
if m != expect {
|
|
|
|
|
|
t.Errorf("rot13: expected %q got %q", expect, m)
|
|
|
|
|
|
}
|
2009-12-11 10:37:48 -08:00
|
|
|
|
|
|
|
|
|
|
// 5. Drop
|
2011-10-25 22:22:09 -07:00
|
|
|
|
dropNotLatin := func(r rune) rune {
|
|
|
|
|
|
if unicode.Is(unicode.Latin, r) {
|
|
|
|
|
|
return r
|
2009-12-11 10:37:48 -08:00
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
return -1
|
|
|
|
|
|
}
|
|
|
|
|
|
m = Map(dropNotLatin, "Hello, 세계")
|
|
|
|
|
|
expect = "Hello"
|
2009-12-11 10:37:48 -08:00
|
|
|
|
if m != expect {
|
|
|
|
|
|
t.Errorf("drop: expected %q got %q", expect, m)
|
|
|
|
|
|
}
|
2011-03-28 09:41:57 -07:00
|
|
|
|
|
|
|
|
|
|
// 6. Identity
|
2011-10-25 22:22:09 -07:00
|
|
|
|
identity := func(r rune) rune {
|
|
|
|
|
|
return r
|
2011-03-28 09:41:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
orig := "Input string that we expect not to be copied."
|
|
|
|
|
|
m = Map(identity, orig)
|
|
|
|
|
|
if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data !=
|
|
|
|
|
|
(*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
|
|
|
|
|
|
t.Error("unexpected copy during identity map")
|
|
|
|
|
|
}
|
2017-02-28 21:21:45 +01:00
|
|
|
|
|
|
|
|
|
|
// 7. Handle invalid UTF-8 sequence
|
|
|
|
|
|
replaceNotLatin := func(r rune) rune {
|
|
|
|
|
|
if unicode.Is(unicode.Latin, r) {
|
|
|
|
|
|
return r
|
|
|
|
|
|
}
|
|
|
|
|
|
return '?'
|
|
|
|
|
|
}
|
|
|
|
|
|
m = Map(replaceNotLatin, "Hello\255World")
|
|
|
|
|
|
expect = "Hello?World"
|
|
|
|
|
|
if m != expect {
|
|
|
|
|
|
t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
|
|
|
|
|
|
}
|
2018-05-04 06:54:18 +02:00
|
|
|
|
|
|
|
|
|
|
// 8. Check utf8.RuneSelf and utf8.MaxRune encoding
|
|
|
|
|
|
encode := func(r rune) rune {
|
|
|
|
|
|
switch r {
|
|
|
|
|
|
case utf8.RuneSelf:
|
|
|
|
|
|
return unicode.MaxRune
|
|
|
|
|
|
case unicode.MaxRune:
|
|
|
|
|
|
return utf8.RuneSelf
|
|
|
|
|
|
}
|
|
|
|
|
|
return r
|
|
|
|
|
|
}
|
|
|
|
|
|
s := string(utf8.RuneSelf) + string(utf8.MaxRune)
|
|
|
|
|
|
r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s
|
|
|
|
|
|
m = Map(encode, s)
|
|
|
|
|
|
if m != r {
|
|
|
|
|
|
t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
|
|
|
|
|
|
}
|
|
|
|
|
|
m = Map(encode, r)
|
|
|
|
|
|
if m != s {
|
|
|
|
|
|
t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
|
|
|
|
|
|
}
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-12-15 15:40:16 -08:00
|
|
|
|
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
|
2009-06-05 13:09:03 -07:00
|
|
|
|
|
2009-12-15 15:40:16 -08:00
|
|
|
|
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
|
2009-06-05 13:09:03 -07:00
|
|
|
|
|
2017-09-28 15:07:48 +05:30
|
|
|
|
func BenchmarkToUpper(b *testing.B) {
|
|
|
|
|
|
for _, tc := range upperTests {
|
|
|
|
|
|
b.Run(tc.in, func(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
actual := ToUpper(tc.in)
|
|
|
|
|
|
if actual != tc.out {
|
|
|
|
|
|
b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-11-08 09:15:53 +05:30
|
|
|
|
func BenchmarkToLower(b *testing.B) {
|
|
|
|
|
|
for _, tc := range lowerTests {
|
|
|
|
|
|
b.Run(tc.in, func(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
actual := ToLower(tc.in)
|
|
|
|
|
|
if actual != tc.out {
|
|
|
|
|
|
b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-03-28 09:41:57 -07:00
|
|
|
|
func BenchmarkMapNoChanges(b *testing.B) {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
identity := func(r rune) rune {
|
|
|
|
|
|
return r
|
2011-03-28 09:41:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Map(identity, "Some string that won't be modified.")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-03-30 17:51:03 -07:00
|
|
|
|
func TestSpecialCase(t *testing.T) {
|
|
|
|
|
|
lower := "abcçdefgğhıijklmnoöprsştuüvyz"
|
|
|
|
|
|
upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
|
|
|
|
|
|
u := ToUpperSpecial(unicode.TurkishCase, upper)
|
|
|
|
|
|
if u != upper {
|
|
|
|
|
|
t.Errorf("Upper(upper) is %s not %s", u, upper)
|
|
|
|
|
|
}
|
|
|
|
|
|
u = ToUpperSpecial(unicode.TurkishCase, lower)
|
|
|
|
|
|
if u != upper {
|
|
|
|
|
|
t.Errorf("Upper(lower) is %s not %s", u, upper)
|
|
|
|
|
|
}
|
|
|
|
|
|
l := ToLowerSpecial(unicode.TurkishCase, lower)
|
|
|
|
|
|
if l != lower {
|
|
|
|
|
|
t.Errorf("Lower(lower) is %s not %s", l, lower)
|
|
|
|
|
|
}
|
|
|
|
|
|
l = ToLowerSpecial(unicode.TurkishCase, upper)
|
|
|
|
|
|
if l != lower {
|
|
|
|
|
|
t.Errorf("Lower(upper) is %s not %s", l, lower)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-12-15 15:40:16 -08:00
|
|
|
|
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
|
2009-06-05 13:09:03 -07:00
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var trimTests = []struct {
|
2013-02-01 08:41:25 -08:00
|
|
|
|
f string
|
|
|
|
|
|
in, arg, out string
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2011-11-13 22:57:19 -05:00
|
|
|
|
{"Trim", "abba", "a", "bb"},
|
|
|
|
|
|
{"Trim", "abba", "ab", ""},
|
|
|
|
|
|
{"TrimLeft", "abba", "ab", ""},
|
|
|
|
|
|
{"TrimRight", "abba", "ab", ""},
|
|
|
|
|
|
{"TrimLeft", "abba", "a", "bba"},
|
|
|
|
|
|
{"TrimRight", "abba", "a", "abb"},
|
|
|
|
|
|
{"Trim", "<tag>", "<>", "tag"},
|
|
|
|
|
|
{"Trim", "* listitem", " *", "listitem"},
|
|
|
|
|
|
{"Trim", `"quote"`, `"`, "quote"},
|
|
|
|
|
|
{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
{"Trim", "\x80test\xff", "\xff", "test"},
|
|
|
|
|
|
{"Trim", " Ġ ", " ", "Ġ"},
|
|
|
|
|
|
{"Trim", " Ġİ0", "0 ", "Ġİ"},
|
2010-04-09 18:57:03 -07:00
|
|
|
|
//empty string tests
|
2011-11-13 22:57:19 -05:00
|
|
|
|
{"Trim", "abba", "", "abba"},
|
|
|
|
|
|
{"Trim", "", "123", ""},
|
|
|
|
|
|
{"Trim", "", "", ""},
|
|
|
|
|
|
{"TrimLeft", "abba", "", "abba"},
|
|
|
|
|
|
{"TrimLeft", "", "123", ""},
|
|
|
|
|
|
{"TrimLeft", "", "", ""},
|
|
|
|
|
|
{"TrimRight", "abba", "", "abba"},
|
|
|
|
|
|
{"TrimRight", "", "123", ""},
|
|
|
|
|
|
{"TrimRight", "", "", ""},
|
|
|
|
|
|
{"TrimRight", "☺\xc0", "☺", "☺\xc0"},
|
2013-02-01 08:41:25 -08:00
|
|
|
|
{"TrimPrefix", "aabb", "a", "abb"},
|
|
|
|
|
|
{"TrimPrefix", "aabb", "b", "aabb"},
|
|
|
|
|
|
{"TrimSuffix", "aabb", "a", "aabb"},
|
|
|
|
|
|
{"TrimSuffix", "aabb", "b", "aab"},
|
2010-04-09 18:57:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestTrim(t *testing.T) {
|
|
|
|
|
|
for _, tc := range trimTests {
|
2011-11-13 22:57:19 -05:00
|
|
|
|
name := tc.f
|
|
|
|
|
|
var f func(string, string) string
|
|
|
|
|
|
switch name {
|
|
|
|
|
|
case "Trim":
|
|
|
|
|
|
f = Trim
|
|
|
|
|
|
case "TrimLeft":
|
|
|
|
|
|
f = TrimLeft
|
|
|
|
|
|
case "TrimRight":
|
|
|
|
|
|
f = TrimRight
|
2013-02-01 08:41:25 -08:00
|
|
|
|
case "TrimPrefix":
|
|
|
|
|
|
f = TrimPrefix
|
|
|
|
|
|
case "TrimSuffix":
|
|
|
|
|
|
f = TrimSuffix
|
2010-04-09 18:57:03 -07:00
|
|
|
|
default:
|
2011-11-28 09:51:03 -08:00
|
|
|
|
t.Errorf("Undefined trim function %s", name)
|
2010-04-09 18:57:03 -07:00
|
|
|
|
}
|
2013-02-01 08:41:25 -08:00
|
|
|
|
actual := f(tc.in, tc.arg)
|
2010-04-09 18:57:03 -07:00
|
|
|
|
if actual != tc.out {
|
2013-02-01 08:41:25 -08:00
|
|
|
|
t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
|
2010-04-09 18:57:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-03-25 21:08:04 -04:00
|
|
|
|
func BenchmarkTrim(b *testing.B) {
|
|
|
|
|
|
b.ReportAllocs()
|
|
|
|
|
|
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
for _, tc := range trimTests {
|
|
|
|
|
|
name := tc.f
|
|
|
|
|
|
var f func(string, string) string
|
|
|
|
|
|
switch name {
|
|
|
|
|
|
case "Trim":
|
|
|
|
|
|
f = Trim
|
|
|
|
|
|
case "TrimLeft":
|
|
|
|
|
|
f = TrimLeft
|
|
|
|
|
|
case "TrimRight":
|
|
|
|
|
|
f = TrimRight
|
|
|
|
|
|
case "TrimPrefix":
|
|
|
|
|
|
f = TrimPrefix
|
|
|
|
|
|
case "TrimSuffix":
|
|
|
|
|
|
f = TrimSuffix
|
|
|
|
|
|
default:
|
|
|
|
|
|
b.Errorf("Undefined trim function %s", name)
|
|
|
|
|
|
}
|
|
|
|
|
|
actual := f(tc.in, tc.arg)
|
|
|
|
|
|
if actual != tc.out {
|
|
|
|
|
|
b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-07-23 12:34:35 -07:00
|
|
|
|
type predicate struct {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
f func(rune) bool
|
2010-07-23 12:34:35 -07:00
|
|
|
|
name string
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-06-14 14:54:48 -07:00
|
|
|
|
var isSpace = predicate{unicode.IsSpace, "IsSpace"}
|
|
|
|
|
|
var isDigit = predicate{unicode.IsDigit, "IsDigit"}
|
|
|
|
|
|
var isUpper = predicate{unicode.IsUpper, "IsUpper"}
|
|
|
|
|
|
var isValidRune = predicate{
|
2011-10-25 22:22:09 -07:00
|
|
|
|
func(r rune) bool {
|
2010-06-14 14:54:48 -07:00
|
|
|
|
return r != utf8.RuneError
|
|
|
|
|
|
},
|
|
|
|
|
|
"IsValidRune",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func not(p predicate) predicate {
|
|
|
|
|
|
return predicate{
|
2011-10-25 22:22:09 -07:00
|
|
|
|
func(r rune) bool {
|
2010-06-14 14:54:48 -07:00
|
|
|
|
return !p.f(r)
|
|
|
|
|
|
},
|
|
|
|
|
|
"not " + p.name,
|
|
|
|
|
|
}
|
2010-04-09 18:57:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var trimFuncTests = []struct {
|
|
|
|
|
|
f predicate
|
|
|
|
|
|
in, out string
|
|
|
|
|
|
}{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{isSpace, space + " hello " + space, "hello"},
|
|
|
|
|
|
{isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
|
|
|
|
|
|
{isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
|
|
|
|
|
|
{not(isSpace), "hello" + space + "hello", space},
|
|
|
|
|
|
{not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
|
|
|
|
|
|
{isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
|
|
|
|
|
|
{not(isValidRune), "\xc0a\xc0", "a"},
|
2010-04-09 18:57:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestTrimFunc(t *testing.T) {
|
|
|
|
|
|
for _, tc := range trimFuncTests {
|
2010-06-14 14:54:48 -07:00
|
|
|
|
actual := TrimFunc(tc.in, tc.f.f)
|
2010-04-09 18:57:03 -07:00
|
|
|
|
if actual != tc.out {
|
2010-06-14 14:54:48 -07:00
|
|
|
|
t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var indexFuncTests = []struct {
|
2010-06-14 14:54:48 -07:00
|
|
|
|
in string
|
|
|
|
|
|
f predicate
|
|
|
|
|
|
first, last int
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", isValidRune, -1, -1},
|
|
|
|
|
|
{"abc", isDigit, -1, -1},
|
|
|
|
|
|
{"0123", isDigit, 0, 3},
|
|
|
|
|
|
{"a1b", isDigit, 1, 1},
|
|
|
|
|
|
{space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
|
|
|
|
|
|
{"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
|
|
|
|
|
|
{"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
|
|
|
|
|
|
{"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
|
2010-06-14 14:54:48 -07:00
|
|
|
|
|
2010-07-23 12:34:35 -07:00
|
|
|
|
// tests of invalid UTF-8
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"\x801", isDigit, 1, 1},
|
|
|
|
|
|
{"\x80abc", isDigit, -1, -1},
|
|
|
|
|
|
{"\xc0a\xc0", isValidRune, 1, 1},
|
|
|
|
|
|
{"\xc0a\xc0", not(isValidRune), 0, 2},
|
|
|
|
|
|
{"\xc0☺\xc0", not(isValidRune), 0, 4},
|
|
|
|
|
|
{"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
|
|
|
|
|
|
{"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
|
|
|
|
|
|
{"a\xe0\x80cd", not(isValidRune), 1, 2},
|
|
|
|
|
|
{"\x80\x80\x80\x80", not(isValidRune), 0, 3},
|
2010-06-14 14:54:48 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestIndexFunc(t *testing.T) {
|
|
|
|
|
|
for _, tc := range indexFuncTests {
|
|
|
|
|
|
first := IndexFunc(tc.in, tc.f.f)
|
|
|
|
|
|
if first != tc.first {
|
|
|
|
|
|
t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
|
|
|
|
|
|
}
|
|
|
|
|
|
last := LastIndexFunc(tc.in, tc.f.f)
|
|
|
|
|
|
if last != tc.last {
|
|
|
|
|
|
t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
|
2010-04-09 18:57:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
|
func equal(m string, s1, s2 string, t *testing.T) bool {
|
|
|
|
|
|
if s1 == s2 {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
return true
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2011-06-28 09:43:14 +10:00
|
|
|
|
e1 := Split(s1, "")
|
|
|
|
|
|
e2 := Split(s2, "")
|
2009-09-01 11:06:28 -07:00
|
|
|
|
for i, c1 := range e1 {
|
2014-04-21 17:00:27 -07:00
|
|
|
|
if i >= len(e2) {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
break
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
r1, _ := utf8.DecodeRuneInString(c1)
|
|
|
|
|
|
r2, _ := utf8.DecodeRuneInString(e2[i])
|
2009-09-01 11:06:28 -07:00
|
|
|
|
if r1 != r2 {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
return false
|
2009-06-05 13:09:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
|
func TestCaseConsistency(t *testing.T) {
|
|
|
|
|
|
// Make a string of all the runes.
|
2011-12-08 22:08:03 -05:00
|
|
|
|
numRunes := int(unicode.MaxRune + 1)
|
2011-03-25 16:31:10 -07:00
|
|
|
|
if testing.Short() {
|
|
|
|
|
|
numRunes = 1000
|
|
|
|
|
|
}
|
2011-10-25 22:22:09 -07:00
|
|
|
|
a := make([]rune, numRunes)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
for i := range a {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
a[i] = rune(i)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
s := string(a)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
// convert the cases.
|
2009-12-15 15:40:16 -08:00
|
|
|
|
upper := ToUpper(s)
|
|
|
|
|
|
lower := ToLower(s)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
|
|
|
|
|
|
// Consistency checks
|
2011-03-25 16:31:10 -07:00
|
|
|
|
if n := utf8.RuneCountInString(upper); n != numRunes {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Error("rune count wrong in upper:", n)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2011-03-25 16:31:10 -07:00
|
|
|
|
if n := utf8.RuneCountInString(lower); n != numRunes {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Error("rune count wrong in lower:", n)
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Error("ToUpper(upper) consistency fail")
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
if !equal("ToLower(lower)", ToLower(lower), lower, t) {
|
2009-11-09 12:07:39 -08:00
|
|
|
|
t.Error("ToLower(lower) consistency fail")
|
2009-09-01 11:06:28 -07:00
|
|
|
|
}
|
2009-11-05 15:12:37 -08:00
|
|
|
|
/*
|
|
|
|
|
|
These fail because of non-one-to-oneness of the data, such as multiple
|
|
|
|
|
|
upper case 'I' mapping to 'i'. We comment them out but keep them for
|
|
|
|
|
|
interest.
|
|
|
|
|
|
For instance: CAPITAL LETTER I WITH DOT ABOVE:
|
|
|
|
|
|
unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
|
|
|
|
|
|
|
|
|
|
|
|
if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
|
|
|
|
|
|
t.Error("ToUpper(lower) consistency fail");
|
|
|
|
|
|
}
|
|
|
|
|
|
if !equal("ToLower(upper)", ToLower(upper), lower, t) {
|
|
|
|
|
|
t.Error("ToLower(upper) consistency fail");
|
|
|
|
|
|
}
|
2009-09-01 11:06:28 -07:00
|
|
|
|
*/
|
|
|
|
|
|
}
|
2009-11-16 12:40:01 -08:00
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var RepeatTests = []struct {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
in, out string
|
|
|
|
|
|
count int
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", "", 0},
|
|
|
|
|
|
{"", "", 1},
|
|
|
|
|
|
{"", "", 2},
|
|
|
|
|
|
{"-", "", 0},
|
|
|
|
|
|
{"-", "-", 1},
|
|
|
|
|
|
{"-", "----------", 10},
|
|
|
|
|
|
{"abc ", "abc abc abc ", 3},
|
2009-11-16 12:40:01 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestRepeat(t *testing.T) {
|
|
|
|
|
|
for _, tt := range RepeatTests {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
a := Repeat(tt.in, tt.count)
|
2009-11-16 12:40:01 -08:00
|
|
|
|
if !equal("Repeat(s)", a, tt.out, t) {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
|
|
|
|
|
|
continue
|
2009-11-16 12:40:01 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2009-12-02 20:47:38 -08:00
|
|
|
|
|
2016-09-28 01:54:38 -07:00
|
|
|
|
func repeat(s string, count int) (err error) {
|
|
|
|
|
|
defer func() {
|
|
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
|
|
switch v := r.(type) {
|
|
|
|
|
|
case error:
|
|
|
|
|
|
err = v
|
|
|
|
|
|
default:
|
|
|
|
|
|
err = fmt.Errorf("%s", v)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
|
|
Repeat(s, count)
|
|
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// See Issue golang.org/issue/16237
|
|
|
|
|
|
func TestRepeatCatchesOverflow(t *testing.T) {
|
|
|
|
|
|
tests := [...]struct {
|
|
|
|
|
|
s string
|
|
|
|
|
|
count int
|
|
|
|
|
|
errStr string
|
|
|
|
|
|
}{
|
|
|
|
|
|
0: {"--", -2147483647, "negative"},
|
|
|
|
|
|
1: {"", int(^uint(0) >> 1), ""},
|
|
|
|
|
|
2: {"-", 10, ""},
|
|
|
|
|
|
3: {"gopher", 0, ""},
|
|
|
|
|
|
4: {"-", -1, "negative"},
|
|
|
|
|
|
5: {"--", -102, "negative"},
|
|
|
|
|
|
6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for i, tt := range tests {
|
|
|
|
|
|
err := repeat(tt.s, tt.count)
|
|
|
|
|
|
if tt.errStr == "" {
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Errorf("#%d panicked %v", i, err)
|
|
|
|
|
|
}
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if err == nil || !Contains(err.Error(), tt.errStr) {
|
|
|
|
|
|
t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-10-25 22:22:09 -07:00
|
|
|
|
func runesEqual(a, b []rune) bool {
|
2009-12-02 20:47:38 -08:00
|
|
|
|
if len(a) != len(b) {
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
|
|
|
|
|
for i, r := range a {
|
|
|
|
|
|
if r != b[i] {
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2009-12-15 15:40:16 -08:00
|
|
|
|
return true
|
2009-12-02 20:47:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var RunesTests = []struct {
|
2009-12-15 15:40:16 -08:00
|
|
|
|
in string
|
2011-10-25 22:22:09 -07:00
|
|
|
|
out []rune
|
2009-12-15 15:40:16 -08:00
|
|
|
|
lossy bool
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2011-10-25 22:22:09 -07:00
|
|
|
|
{"", []rune{}, false},
|
|
|
|
|
|
{" ", []rune{32}, false},
|
|
|
|
|
|
{"ABC", []rune{65, 66, 67}, false},
|
|
|
|
|
|
{"abc", []rune{97, 98, 99}, false},
|
|
|
|
|
|
{"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
|
|
|
|
|
|
{"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
|
|
|
|
|
|
{"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
|
2009-12-02 20:47:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestRunes(t *testing.T) {
|
|
|
|
|
|
for _, tt := range RunesTests {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
a := []rune(tt.in)
|
2009-12-02 20:47:38 -08:00
|
|
|
|
if !runesEqual(a, tt.out) {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
|
2009-12-15 15:40:16 -08:00
|
|
|
|
continue
|
2009-12-02 20:47:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
if !tt.lossy {
|
|
|
|
|
|
// can only test reassembly if we didn't lose information
|
2009-12-15 15:40:16 -08:00
|
|
|
|
s := string(a)
|
2009-12-02 20:47:38 -08:00
|
|
|
|
if s != tt.in {
|
2011-10-25 22:22:09 -07:00
|
|
|
|
t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
|
2009-12-02 20:47:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2010-04-20 22:18:26 -07:00
|
|
|
|
|
2011-05-26 11:02:07 -07:00
|
|
|
|
func TestReadByte(t *testing.T) {
|
|
|
|
|
|
testStrings := []string{"", abcd, faces, commas}
|
|
|
|
|
|
for _, s := range testStrings {
|
|
|
|
|
|
reader := NewReader(s)
|
|
|
|
|
|
if e := reader.UnreadByte(); e == nil {
|
|
|
|
|
|
t.Errorf("Unreading %q at beginning: expected error", s)
|
|
|
|
|
|
}
|
|
|
|
|
|
var res bytes.Buffer
|
|
|
|
|
|
for {
|
|
|
|
|
|
b, e := reader.ReadByte()
|
2011-11-01 22:05:34 -04:00
|
|
|
|
if e == io.EOF {
|
2011-05-26 11:02:07 -07:00
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
if e != nil {
|
|
|
|
|
|
t.Errorf("Reading %q: %s", s, e)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
res.WriteByte(b)
|
|
|
|
|
|
// unread and read again
|
|
|
|
|
|
e = reader.UnreadByte()
|
|
|
|
|
|
if e != nil {
|
|
|
|
|
|
t.Errorf("Unreading %q: %s", s, e)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
b1, e := reader.ReadByte()
|
|
|
|
|
|
if e != nil {
|
|
|
|
|
|
t.Errorf("Reading %q after unreading: %s", s, e)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
if b1 != b {
|
|
|
|
|
|
t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if res.String() != s {
|
|
|
|
|
|
t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-04-20 22:18:26 -07:00
|
|
|
|
func TestReadRune(t *testing.T) {
|
|
|
|
|
|
testStrings := []string{"", abcd, faces, commas}
|
|
|
|
|
|
for _, s := range testStrings {
|
|
|
|
|
|
reader := NewReader(s)
|
2011-05-26 11:02:07 -07:00
|
|
|
|
if e := reader.UnreadRune(); e == nil {
|
|
|
|
|
|
t.Errorf("Unreading %q at beginning: expected error", s)
|
|
|
|
|
|
}
|
2010-04-20 22:18:26 -07:00
|
|
|
|
res := ""
|
|
|
|
|
|
for {
|
2011-05-26 11:02:07 -07:00
|
|
|
|
r, z, e := reader.ReadRune()
|
2011-11-01 22:05:34 -04:00
|
|
|
|
if e == io.EOF {
|
2010-04-20 22:18:26 -07:00
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
if e != nil {
|
|
|
|
|
|
t.Errorf("Reading %q: %s", s, e)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
res += string(r)
|
2011-05-26 11:02:07 -07:00
|
|
|
|
// unread and read again
|
|
|
|
|
|
e = reader.UnreadRune()
|
|
|
|
|
|
if e != nil {
|
|
|
|
|
|
t.Errorf("Unreading %q: %s", s, e)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
r1, z1, e := reader.ReadRune()
|
|
|
|
|
|
if e != nil {
|
|
|
|
|
|
t.Errorf("Reading %q after unreading: %s", s, e)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
if r1 != r {
|
|
|
|
|
|
t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
if z1 != z {
|
|
|
|
|
|
t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
2010-04-20 22:18:26 -07:00
|
|
|
|
}
|
|
|
|
|
|
if res != s {
|
|
|
|
|
|
t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2010-06-30 18:03:09 -07:00
|
|
|
|
|
2014-03-19 09:00:58 -07:00
|
|
|
|
var UnreadRuneErrorTests = []struct {
|
|
|
|
|
|
name string
|
|
|
|
|
|
f func(*Reader)
|
|
|
|
|
|
}{
|
2014-04-25 06:44:51 -07:00
|
|
|
|
{"Read", func(r *Reader) { r.Read([]byte{0}) }},
|
2014-03-19 09:00:58 -07:00
|
|
|
|
{"ReadByte", func(r *Reader) { r.ReadByte() }},
|
|
|
|
|
|
{"UnreadRune", func(r *Reader) { r.UnreadRune() }},
|
2016-04-05 11:22:53 -07:00
|
|
|
|
{"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }},
|
2014-03-19 09:00:58 -07:00
|
|
|
|
{"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestUnreadRuneError(t *testing.T) {
|
|
|
|
|
|
for _, tt := range UnreadRuneErrorTests {
|
|
|
|
|
|
reader := NewReader("0123456789")
|
|
|
|
|
|
if _, _, err := reader.ReadRune(); err != nil {
|
|
|
|
|
|
// should not happen
|
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
|
}
|
|
|
|
|
|
tt.f(reader)
|
|
|
|
|
|
err := reader.UnreadRune()
|
|
|
|
|
|
if err == nil {
|
|
|
|
|
|
t.Errorf("Unreading after %s: expected error", tt.name)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var ReplaceTests = []struct {
|
2010-06-30 18:03:09 -07:00
|
|
|
|
in string
|
|
|
|
|
|
old, new string
|
|
|
|
|
|
n int
|
|
|
|
|
|
out string
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"hello", "l", "L", 0, "hello"},
|
|
|
|
|
|
{"hello", "l", "L", -1, "heLLo"},
|
|
|
|
|
|
{"hello", "x", "X", -1, "hello"},
|
|
|
|
|
|
{"", "x", "X", -1, ""},
|
|
|
|
|
|
{"radar", "r", "<r>", -1, "<r>ada<r>"},
|
|
|
|
|
|
{"", "", "<>", -1, "<>"},
|
|
|
|
|
|
{"banana", "a", "<>", -1, "b<>n<>n<>"},
|
|
|
|
|
|
{"banana", "a", "<>", 1, "b<>nana"},
|
|
|
|
|
|
{"banana", "a", "<>", 1000, "b<>n<>n<>"},
|
|
|
|
|
|
{"banana", "an", "<>", -1, "b<><>a"},
|
|
|
|
|
|
{"banana", "ana", "<>", -1, "b<>na"},
|
|
|
|
|
|
{"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
|
|
|
|
|
|
{"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
|
|
|
|
|
|
{"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
|
|
|
|
|
|
{"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
|
|
|
|
|
|
{"banana", "", "<>", 1, "<>banana"},
|
|
|
|
|
|
{"banana", "a", "a", -1, "banana"},
|
|
|
|
|
|
{"banana", "a", "a", 1, "banana"},
|
|
|
|
|
|
{"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
|
2010-06-30 18:03:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestReplace(t *testing.T) {
|
|
|
|
|
|
for _, tt := range ReplaceTests {
|
|
|
|
|
|
if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
|
|
|
|
|
|
t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2010-07-20 00:03:59 -07:00
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var TitleTests = []struct {
|
2010-07-20 00:03:59 -07:00
|
|
|
|
in, out string
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2010-10-22 10:06:33 -07:00
|
|
|
|
{"", ""},
|
|
|
|
|
|
{"a", "A"},
|
|
|
|
|
|
{" aaa aaa aaa ", " Aaa Aaa Aaa "},
|
|
|
|
|
|
{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
|
|
|
|
|
|
{"123a456", "123a456"},
|
|
|
|
|
|
{"double-blind", "Double-Blind"},
|
|
|
|
|
|
{"ÿøû", "Ÿøû"},
|
2013-12-20 23:19:32 -08:00
|
|
|
|
{"with_underscore", "With_underscore"},
|
|
|
|
|
|
{"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
|
2010-07-20 00:03:59 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestTitle(t *testing.T) {
|
|
|
|
|
|
for _, tt := range TitleTests {
|
|
|
|
|
|
if s := Title(tt.in); s != tt.out {
|
|
|
|
|
|
t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2010-11-01 14:32:48 -07:00
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var ContainsTests = []struct {
|
2010-11-01 14:32:48 -07:00
|
|
|
|
str, substr string
|
|
|
|
|
|
expected bool
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}{
|
2010-11-01 14:32:48 -07:00
|
|
|
|
{"abc", "bc", true},
|
|
|
|
|
|
{"abc", "bcd", false},
|
|
|
|
|
|
{"abc", "", true},
|
|
|
|
|
|
{"", "a", false},
|
2016-05-14 17:33:23 -07:00
|
|
|
|
|
|
|
|
|
|
// cases to cover code in runtime/asm_amd64.s:indexShortStr
|
|
|
|
|
|
// 2-byte needle
|
|
|
|
|
|
{"xxxxxx", "01", false},
|
|
|
|
|
|
{"01xxxx", "01", true},
|
|
|
|
|
|
{"xx01xx", "01", true},
|
|
|
|
|
|
{"xxxx01", "01", true},
|
|
|
|
|
|
{"01xxxxx"[1:], "01", false},
|
|
|
|
|
|
{"xxxxx01"[:6], "01", false},
|
|
|
|
|
|
// 3-byte needle
|
|
|
|
|
|
{"xxxxxxx", "012", false},
|
|
|
|
|
|
{"012xxxx", "012", true},
|
|
|
|
|
|
{"xx012xx", "012", true},
|
|
|
|
|
|
{"xxxx012", "012", true},
|
|
|
|
|
|
{"012xxxxx"[1:], "012", false},
|
|
|
|
|
|
{"xxxxx012"[:7], "012", false},
|
|
|
|
|
|
// 4-byte needle
|
|
|
|
|
|
{"xxxxxxxx", "0123", false},
|
|
|
|
|
|
{"0123xxxx", "0123", true},
|
|
|
|
|
|
{"xx0123xx", "0123", true},
|
|
|
|
|
|
{"xxxx0123", "0123", true},
|
|
|
|
|
|
{"0123xxxxx"[1:], "0123", false},
|
|
|
|
|
|
{"xxxxx0123"[:8], "0123", false},
|
|
|
|
|
|
// 5-7-byte needle
|
|
|
|
|
|
{"xxxxxxxxx", "01234", false},
|
|
|
|
|
|
{"01234xxxx", "01234", true},
|
|
|
|
|
|
{"xx01234xx", "01234", true},
|
|
|
|
|
|
{"xxxx01234", "01234", true},
|
|
|
|
|
|
{"01234xxxxx"[1:], "01234", false},
|
|
|
|
|
|
{"xxxxx01234"[:9], "01234", false},
|
|
|
|
|
|
// 8-byte needle
|
|
|
|
|
|
{"xxxxxxxxxxxx", "01234567", false},
|
|
|
|
|
|
{"01234567xxxx", "01234567", true},
|
|
|
|
|
|
{"xx01234567xx", "01234567", true},
|
|
|
|
|
|
{"xxxx01234567", "01234567", true},
|
|
|
|
|
|
{"01234567xxxxx"[1:], "01234567", false},
|
|
|
|
|
|
{"xxxxx01234567"[:12], "01234567", false},
|
|
|
|
|
|
// 9-15-byte needle
|
|
|
|
|
|
{"xxxxxxxxxxxxx", "012345678", false},
|
|
|
|
|
|
{"012345678xxxx", "012345678", true},
|
|
|
|
|
|
{"xx012345678xx", "012345678", true},
|
|
|
|
|
|
{"xxxx012345678", "012345678", true},
|
|
|
|
|
|
{"012345678xxxxx"[1:], "012345678", false},
|
|
|
|
|
|
{"xxxxx012345678"[:13], "012345678", false},
|
|
|
|
|
|
// 16-byte needle
|
|
|
|
|
|
{"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false},
|
|
|
|
|
|
{"0123456789ABCDEFxxxx", "0123456789ABCDEF", true},
|
|
|
|
|
|
{"xx0123456789ABCDEFxx", "0123456789ABCDEF", true},
|
|
|
|
|
|
{"xxxx0123456789ABCDEF", "0123456789ABCDEF", true},
|
|
|
|
|
|
{"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false},
|
|
|
|
|
|
{"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false},
|
|
|
|
|
|
// 17-31-byte needle
|
|
|
|
|
|
{"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false},
|
|
|
|
|
|
{"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true},
|
|
|
|
|
|
{"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true},
|
|
|
|
|
|
{"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true},
|
|
|
|
|
|
{"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false},
|
|
|
|
|
|
{"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false},
|
|
|
|
|
|
|
|
|
|
|
|
// partial match cases
|
|
|
|
|
|
{"xx01x", "012", false}, // 3
|
|
|
|
|
|
{"xx0123x", "01234", false}, // 5-7
|
|
|
|
|
|
{"xx01234567x", "012345678", false}, // 9-15
|
|
|
|
|
|
{"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679
|
2010-11-01 14:32:48 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestContains(t *testing.T) {
|
|
|
|
|
|
for _, ct := range ContainsTests {
|
|
|
|
|
|
if Contains(ct.str, ct.substr) != ct.expected {
|
|
|
|
|
|
t.Errorf("Contains(%s, %s) = %v, want %v",
|
|
|
|
|
|
ct.str, ct.substr, !ct.expected, ct.expected)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2011-09-26 18:32:51 -04:00
|
|
|
|
|
2011-11-23 20:20:14 -08:00
|
|
|
|
var ContainsAnyTests = []struct {
|
|
|
|
|
|
str, substr string
|
|
|
|
|
|
expected bool
|
|
|
|
|
|
}{
|
|
|
|
|
|
{"", "", false},
|
|
|
|
|
|
{"", "a", false},
|
|
|
|
|
|
{"", "abc", false},
|
|
|
|
|
|
{"a", "", false},
|
|
|
|
|
|
{"a", "a", true},
|
|
|
|
|
|
{"aaa", "a", true},
|
|
|
|
|
|
{"abc", "xyz", false},
|
|
|
|
|
|
{"abc", "xcz", true},
|
|
|
|
|
|
{"a☺b☻c☹d", "uvw☻xyz", true},
|
|
|
|
|
|
{"aRegExp*", ".(|)*+?^$[]", true},
|
|
|
|
|
|
{dots + dots + dots, " ", false},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestContainsAny(t *testing.T) {
|
|
|
|
|
|
for _, ct := range ContainsAnyTests {
|
|
|
|
|
|
if ContainsAny(ct.str, ct.substr) != ct.expected {
|
|
|
|
|
|
t.Errorf("ContainsAny(%s, %s) = %v, want %v",
|
|
|
|
|
|
ct.str, ct.substr, !ct.expected, ct.expected)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var ContainsRuneTests = []struct {
|
|
|
|
|
|
str string
|
|
|
|
|
|
r rune
|
|
|
|
|
|
expected bool
|
|
|
|
|
|
}{
|
|
|
|
|
|
{"", 'a', false},
|
|
|
|
|
|
{"a", 'a', true},
|
|
|
|
|
|
{"aaa", 'a', true},
|
|
|
|
|
|
{"abc", 'y', false},
|
|
|
|
|
|
{"abc", 'c', true},
|
|
|
|
|
|
{"a☺b☻c☹d", 'x', false},
|
|
|
|
|
|
{"a☺b☻c☹d", '☻', true},
|
|
|
|
|
|
{"aRegExp*", '*', true},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestContainsRune(t *testing.T) {
|
|
|
|
|
|
for _, ct := range ContainsRuneTests {
|
|
|
|
|
|
if ContainsRune(ct.str, ct.r) != ct.expected {
|
2013-02-28 11:33:08 -08:00
|
|
|
|
t.Errorf("ContainsRune(%q, %q) = %v, want %v",
|
2011-11-23 20:20:14 -08:00
|
|
|
|
ct.str, ct.r, !ct.expected, ct.expected)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-09-26 18:32:51 -04:00
|
|
|
|
var EqualFoldTests = []struct {
|
|
|
|
|
|
s, t string
|
|
|
|
|
|
out bool
|
|
|
|
|
|
}{
|
|
|
|
|
|
{"abc", "abc", true},
|
|
|
|
|
|
{"ABcd", "ABcd", true},
|
|
|
|
|
|
{"123abc", "123ABC", true},
|
|
|
|
|
|
{"αβδ", "ΑΒΔ", true},
|
|
|
|
|
|
{"abc", "xyz", false},
|
|
|
|
|
|
{"abc", "XYZ", false},
|
|
|
|
|
|
{"abcdefghijk", "abcdefghijX", false},
|
|
|
|
|
|
{"abcdefghijk", "abcdefghij\u212A", true},
|
|
|
|
|
|
{"abcdefghijK", "abcdefghij\u212A", true},
|
|
|
|
|
|
{"abcdefghijkz", "abcdefghij\u212Ay", false},
|
|
|
|
|
|
{"abcdefghijKz", "abcdefghij\u212Ay", false},
|
2018-04-29 00:15:03 -04:00
|
|
|
|
{"1", "2", false},
|
|
|
|
|
|
{"utf-8", "US-ASCII", false},
|
2011-09-26 18:32:51 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestEqualFold(t *testing.T) {
|
|
|
|
|
|
for _, tt := range EqualFoldTests {
|
|
|
|
|
|
if out := EqualFold(tt.s, tt.t); out != tt.out {
|
|
|
|
|
|
t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
if out := EqualFold(tt.t, tt.s); out != tt.out {
|
|
|
|
|
|
t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2012-09-18 15:02:08 -04:00
|
|
|
|
|
2018-04-29 00:15:03 -04:00
|
|
|
|
func BenchmarkEqualFold(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
for _, tt := range EqualFoldTests {
|
|
|
|
|
|
if out := EqualFold(tt.s, tt.t); out != tt.out {
|
|
|
|
|
|
b.Fatal("wrong result")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-08-09 12:51:21 -07:00
|
|
|
|
var CountTests = []struct {
|
|
|
|
|
|
s, sep string
|
|
|
|
|
|
num int
|
|
|
|
|
|
}{
|
|
|
|
|
|
{"", "", 1},
|
|
|
|
|
|
{"", "notempty", 0},
|
|
|
|
|
|
{"notempty", "", 9},
|
|
|
|
|
|
{"smaller", "not smaller", 0},
|
|
|
|
|
|
{"12345678987654321", "6", 2},
|
|
|
|
|
|
{"611161116", "6", 3},
|
|
|
|
|
|
{"notequal", "NotEqual", 0},
|
|
|
|
|
|
{"equal", "equal", 1},
|
|
|
|
|
|
{"abc1231231123q", "123", 3},
|
|
|
|
|
|
{"11111", "11", 2},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func TestCount(t *testing.T) {
|
|
|
|
|
|
for _, tt := range CountTests {
|
|
|
|
|
|
if num := Count(tt.s, tt.sep); num != tt.num {
|
|
|
|
|
|
t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-02-17 13:07:17 +01:00
|
|
|
|
func makeBenchInputHard() string {
|
|
|
|
|
|
tokens := [...]string{
|
|
|
|
|
|
"<a>", "<p>", "<b>", "<strong>",
|
|
|
|
|
|
"</a>", "</p>", "</b>", "</strong>",
|
|
|
|
|
|
"hello", "world",
|
|
|
|
|
|
}
|
|
|
|
|
|
x := make([]byte, 0, 1<<20)
|
2014-06-26 13:00:47 -07:00
|
|
|
|
for {
|
2013-02-17 13:07:17 +01:00
|
|
|
|
i := rand.Intn(len(tokens))
|
2014-06-26 13:00:47 -07:00
|
|
|
|
if len(x)+len(tokens[i]) >= 1<<20 {
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
2013-02-17 13:07:17 +01:00
|
|
|
|
x = append(x, tokens[i]...)
|
|
|
|
|
|
}
|
|
|
|
|
|
return string(x)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var benchInputHard = makeBenchInputHard()
|
|
|
|
|
|
|
|
|
|
|
|
func benchmarkIndexHard(b *testing.B, sep string) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Index(benchInputHard, sep)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-09-01 17:47:57 +10:00
|
|
|
|
func benchmarkLastIndexHard(b *testing.B, sep string) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
LastIndex(benchInputHard, sep)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-02-17 13:07:17 +01:00
|
|
|
|
func benchmarkCountHard(b *testing.B, sep string) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Count(benchInputHard, sep)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
|
|
|
|
|
|
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
|
|
|
|
|
|
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
|
2016-04-28 17:39:55 +03:00
|
|
|
|
func BenchmarkIndexHard4(b *testing.B) {
|
|
|
|
|
|
benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
|
|
|
|
|
|
}
|
2013-02-17 13:07:17 +01:00
|
|
|
|
|
2014-09-01 17:47:57 +10:00
|
|
|
|
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
|
|
|
|
|
|
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
|
|
|
|
|
|
func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
|
|
|
|
|
|
|
2013-02-17 13:07:17 +01:00
|
|
|
|
func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
|
|
|
|
|
|
func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
|
|
|
|
|
|
func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
|
|
|
|
|
|
|
|
|
|
|
|
var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
|
|
|
|
|
|
var benchNeedleTorture = Repeat("ABC", 1<<10+1)
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkIndexTorture(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Index(benchInputTorture, benchNeedleTorture)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkCountTorture(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Count(benchInputTorture, benchNeedleTorture)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-02-19 10:36:15 -05:00
|
|
|
|
func BenchmarkCountTortureOverlapping(b *testing.B) {
|
|
|
|
|
|
A := Repeat("ABC", 1<<20)
|
|
|
|
|
|
B := Repeat("ABC", 1<<10)
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Count(A, B)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-03-27 13:22:59 +02:00
|
|
|
|
func BenchmarkCountByte(b *testing.B) {
|
|
|
|
|
|
indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
|
|
|
|
|
|
benchStr := Repeat(benchmarkString,
|
|
|
|
|
|
(indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
|
|
|
|
|
|
benchFunc := func(b *testing.B, benchStr string) {
|
|
|
|
|
|
b.SetBytes(int64(len(benchStr)))
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Count(benchStr, "=")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
for _, size := range indexSizes {
|
|
|
|
|
|
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
|
|
|
|
|
|
benchFunc(b, benchStr[:size])
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-09-18 15:02:08 -04:00
|
|
|
|
var makeFieldsInput = func() string {
|
|
|
|
|
|
x := make([]byte, 1<<20)
|
2012-10-30 13:38:01 -07:00
|
|
|
|
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
|
2012-09-18 15:02:08 -04:00
|
|
|
|
for i := range x {
|
|
|
|
|
|
switch rand.Intn(10) {
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
x[i] = ' '
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
if i > 0 && x[i-1] == 'x' {
|
|
|
|
|
|
copy(x[i-1:], "χ")
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
fallthrough
|
|
|
|
|
|
default:
|
|
|
|
|
|
x[i] = 'x'
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return string(x)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-03-06 09:34:39 +01:00
|
|
|
|
var makeFieldsInputASCII = func() string {
|
|
|
|
|
|
x := make([]byte, 1<<20)
|
|
|
|
|
|
// Input is ~10% space, rest ASCII non-space.
|
|
|
|
|
|
for i := range x {
|
|
|
|
|
|
if rand.Intn(10) == 0 {
|
|
|
|
|
|
x[i] = ' '
|
|
|
|
|
|
} else {
|
|
|
|
|
|
x[i] = 'x'
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return string(x)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var stringdata = []struct{ name, data string }{
|
|
|
|
|
|
{"ASCII", makeFieldsInputASCII()},
|
|
|
|
|
|
{"Mixed", makeFieldsInput()},
|
|
|
|
|
|
}
|
2012-09-18 15:02:08 -04:00
|
|
|
|
|
|
|
|
|
|
func BenchmarkFields(b *testing.B) {
|
2017-03-06 09:34:39 +01:00
|
|
|
|
for _, sd := range stringdata {
|
|
|
|
|
|
b.Run(sd.name, func(b *testing.B) {
|
|
|
|
|
|
for j := 1 << 4; j <= 1<<20; j <<= 4 {
|
|
|
|
|
|
b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
|
|
|
|
|
|
b.ReportAllocs()
|
|
|
|
|
|
b.SetBytes(int64(j))
|
|
|
|
|
|
data := sd.data[:j]
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Fields(data)
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
2012-09-18 15:02:08 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkFieldsFunc(b *testing.B) {
|
2017-03-06 09:34:39 +01:00
|
|
|
|
for _, sd := range stringdata {
|
|
|
|
|
|
b.Run(sd.name, func(b *testing.B) {
|
|
|
|
|
|
for j := 1 << 4; j <= 1<<20; j <<= 4 {
|
|
|
|
|
|
b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
|
|
|
|
|
|
b.ReportAllocs()
|
|
|
|
|
|
b.SetBytes(int64(j))
|
|
|
|
|
|
data := sd.data[:j]
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
FieldsFunc(data, unicode.IsSpace)
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
2012-09-18 15:02:08 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2013-03-06 15:21:19 -05:00
|
|
|
|
|
2017-02-07 13:38:52 +02:00
|
|
|
|
func BenchmarkSplitEmptySeparator(b *testing.B) {
|
2013-03-06 15:21:19 -05:00
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Split(benchInputHard, "")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-02-07 13:38:52 +02:00
|
|
|
|
func BenchmarkSplitSingleByteSeparator(b *testing.B) {
|
2013-03-06 15:21:19 -05:00
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Split(benchInputHard, "/")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-02-07 13:38:52 +02:00
|
|
|
|
func BenchmarkSplitMultiByteSeparator(b *testing.B) {
|
2013-03-06 15:21:19 -05:00
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Split(benchInputHard, "hello")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2014-06-11 19:03:59 -07:00
|
|
|
|
|
2017-02-07 13:38:52 +02:00
|
|
|
|
func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
SplitN(benchInputHard, "/", 10)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
SplitN(benchInputHard, "hello", 10)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-11 19:03:59 -07:00
|
|
|
|
func BenchmarkRepeat(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Repeat("-", 80)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
bytes, strings: optimize for ASCII sets
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight
An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.
Uses of ContainsAny|IndexAny|LastIndexAny:
6% are 1 character (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.
Uses of Trim|TrimLeft|TrimRight:
71% are 1 character (e.g., "\n" or " ")
14% are 2 characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.
The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.
== bytes package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75%
BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69%
BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88%
BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91%
BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71%
BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56%
BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67%
BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21%
BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15%
BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79%
BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41%
BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82%
BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55%
BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24%
BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31%
BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08%
BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92%
BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99%
BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68%
BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27%
BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94%
BenchmarkTrimASCII/1:2-4 79.0 105 +32.91%
BenchmarkTrimASCII/1:4-4 79.6 109 +36.93%
BenchmarkTrimASCII/1:8-4 78.8 118 +49.75%
BenchmarkTrimASCII/1:16-4 80.2 132 +64.59%
BenchmarkTrimASCII/16:1-4 243 116 -52.26%
BenchmarkTrimASCII/16:2-4 243 171 -29.63%
BenchmarkTrimASCII/16:4-4 243 176 -27.57%
BenchmarkTrimASCII/16:8-4 241 184 -23.65%
BenchmarkTrimASCII/16:16-4 238 199 -16.39%
BenchmarkTrimASCII/256:1-4 2580 840 -67.44%
BenchmarkTrimASCII/256:2-4 2603 1175 -54.86%
BenchmarkTrimASCII/256:4-4 2572 1188 -53.81%
BenchmarkTrimASCII/256:8-4 2550 1191 -53.29%
BenchmarkTrimASCII/256:16-4 2585 1208 -53.27%
BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37%
BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86%
BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66%
BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88%
BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21%
== strings package ==
benchmark old ns/op new ns/op delta
BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33%
BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57%
BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22%
BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85%
BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15%
BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54%
BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24%
BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10%
BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03%
BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48%
BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74%
BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08%
BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37%
BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44%
BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21%
BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95%
BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19%
BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25%
BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37%
BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26%
BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14%
BenchmarkTrimASCII/1:2-4 73.3 103 +40.52%
BenchmarkTrimASCII/1:4-4 71.8 106 +47.63%
BenchmarkTrimASCII/1:8-4 71.2 113 +58.71%
BenchmarkTrimASCII/1:16-4 71.6 128 +78.77%
BenchmarkTrimASCII/16:1-4 152 116 -23.68%
BenchmarkTrimASCII/16:2-4 160 168 +5.00%
BenchmarkTrimASCII/16:4-4 172 170 -1.16%
BenchmarkTrimASCII/16:8-4 200 177 -11.50%
BenchmarkTrimASCII/16:16-4 254 193 -24.02%
BenchmarkTrimASCII/256:1-4 1438 864 -39.92%
BenchmarkTrimASCII/256:2-4 1551 1195 -22.95%
BenchmarkTrimASCII/256:4-4 1770 1200 -32.20%
BenchmarkTrimASCII/256:8-4 2195 1216 -44.60%
BenchmarkTrimASCII/256:16-4 3054 1224 -59.92%
BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20%
BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77%
BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90%
BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81%
BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68%
The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.
Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
2016-10-20 03:16:22 -07:00
|
|
|
|
|
|
|
|
|
|
func BenchmarkIndexAnyASCII(b *testing.B) {
|
|
|
|
|
|
x := Repeat("#", 4096) // Never matches set
|
|
|
|
|
|
cs := "0123456789abcdef"
|
|
|
|
|
|
for k := 1; k <= 4096; k <<= 4 {
|
|
|
|
|
|
for j := 1; j <= 16; j <<= 1 {
|
|
|
|
|
|
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
IndexAny(x[:k], cs[:j])
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func BenchmarkTrimASCII(b *testing.B) {
|
|
|
|
|
|
cs := "0123456789abcdef"
|
|
|
|
|
|
for k := 1; k <= 4096; k <<= 4 {
|
|
|
|
|
|
for j := 1; j <= 16; j <<= 1 {
|
|
|
|
|
|
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
|
|
|
|
|
|
x := Repeat(cs[:j], k) // Always matches set
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Trim(x[:k], cs[:j])
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2017-11-04 10:19:53 -07:00
|
|
|
|
|
|
|
|
|
|
func BenchmarkIndexPeriodic(b *testing.B) {
|
|
|
|
|
|
key := "aa"
|
|
|
|
|
|
for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
|
|
|
|
|
|
b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
|
|
|
|
|
|
s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
|
Index(s, key)
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|