go/src/strings/strings_test.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package strings_test

import (
	"bytes"
	"fmt"
	"io"
	"math/rand"
	"reflect"
	. "strings"
	"testing"
	"unicode"
	"unicode/utf8"
	"unsafe"
)

func eq(a, b []string) bool {
	if len(a) != len(b) {
		return false
	}
	for i := 0; i < len(a); i++ {
		if a[i] != b[i] {
			return false
		}
	}
	return true
}

var abcd = "abcd"
var faces = "☺☻☹"
var commas = "1,2,3,4"
var dots = "1....2....3....4"

type IndexTest struct {
	s   string
	sep string
	out int
}

var indexTests = []IndexTest{
	{"", "", 0},
	{"", "a", -1},
	{"", "foo", -1},
	{"fo", "foo", -1},
	{"foo", "foo", 0},
	{"oofofoofooo", "f", 2},
	{"oofofoofooo", "foo", 4},
	{"barfoobarfoo", "foo", 3},
	{"foo", "", 0},
	{"foo", "o", 1},
	{"abcABCabc", "A", 3},
	// cases with one byte strings - test special case in Index()
	{"", "a", -1},
	{"x", "a", -1},
	{"x", "x", 0},
	{"abc", "a", 0},
	{"abc", "b", 1},
	{"abc", "c", 2},
	{"abc", "x", -1},
	// test special cases in Index() for short strings
	{"", "ab", -1},
	{"bc", "ab", -1},
	{"ab", "ab", 0},
	{"xab", "ab", 1},
	{"xab"[:2], "ab", -1},
	{"", "abc", -1},
	{"xbc", "abc", -1},
	{"abc", "abc", 0},
	{"xabc", "abc", 1},
	{"xabc"[:3], "abc", -1},
	{"xabxc", "abc", -1},
	{"", "abcd", -1},
	{"xbcd", "abcd", -1},
	{"abcd", "abcd", 0},
	{"xabcd", "abcd", 1},
	{"xyabcd"[:5], "abcd", -1},
	{"xbcqq", "abcqq", -1},
	{"abcqq", "abcqq", 0},
	{"xabcqq", "abcqq", 1},
	{"xyabcqq"[:6], "abcqq", -1},
	{"xabxcqq", "abcqq", -1},
	{"xabcqxq", "abcqq", -1},
	{"", "01234567", -1},
	{"32145678", "01234567", -1},
	{"01234567", "01234567", 0},
	{"x01234567", "01234567", 1},
	{"x0123456x01234567", "01234567", 9},
	{"xx01234567"[:9], "01234567", -1},
	{"", "0123456789", -1},
	{"3214567844", "0123456789", -1},
	{"0123456789", "0123456789", 0},
	{"x0123456789", "0123456789", 1},
	{"x012345678x0123456789", "0123456789", 11},
	{"xyz0123456789"[:12], "0123456789", -1},
	{"x01234567x89", "0123456789", -1},
	{"", "0123456789012345", -1},
	{"3214567889012345", "0123456789012345", -1},
	{"0123456789012345", "0123456789012345", 0},
	{"x0123456789012345", "0123456789012345", 1},
	{"x012345678901234x0123456789012345", "0123456789012345", 17},
	{"", "01234567890123456789", -1},
	{"32145678890123456789", "01234567890123456789", -1},
	{"01234567890123456789", "01234567890123456789", 0},
	{"x01234567890123456789", "01234567890123456789", 1},
	{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
	{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
	{"", "0123456789012345678901234567890", -1},
	{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
	{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
	{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
	{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
	{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
	{"", "01234567890123456789012345678901", -1},
	{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
	{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
	{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
	{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
	{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
	{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
	{"", "0123456789012345678901234567890123456789", -1},
	{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
	{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
	{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
	{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
	// test fallback to Rabin-Karp.
	{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
	{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
}

var lastIndexTests = []IndexTest{
	{"", "", 0},
	{"", "a", -1},
	{"", "foo", -1},
	{"fo", "foo", -1},
	{"foo", "foo", 0},
	{"foo", "f", 0},
	{"oofofoofooo", "f", 7},
	{"oofofoofooo", "foo", 7},
	{"barfoobarfoo", "foo", 9},
	{"foo", "", 3},
	{"foo", "o", 2},
	{"abcABCabc", "A", 3},
	{"abcABCabc", "a", 6},
}

var indexAnyTests = []IndexTest{
	{"", "", -1},
	{"", "a", -1},
	{"", "abc", -1},
	{"a", "", -1},
	{"a", "a", 0},
	{"aaa", "a", 0},
	{"abc", "xyz", -1},
	{"abc", "xcz", 2},
	{"ab☺c", "x☺yz", 2},
	{"a☺b☻c☹d", "cx", len("a☺b☻")},
	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
	{"aRegExp*", ".(|)*+?^$[]", 7},
	{dots + dots + dots, " ", -1},
	{"012abcba210", "\xffb", 4},
	{"012\x80bcb\x80210", "\xffb", 3},
}

var lastIndexAnyTests = []IndexTest{
	{"", "", -1},
	{"", "a", -1},
	{"", "abc", -1},
	{"a", "", -1},
	{"a", "a", 0},
	{"aaa", "a", 2},
	{"abc", "xyz", -1},
	{"abc", "ab", 1},
	{"ab☺c", "x☺yz", 2},
	{"a☺b☻c☹d", "cx", len("a☺b☻")},
	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
	{"a.RegExp*", ".(|)*+?^$[]", 8},
	{dots + dots + dots, " ", -1},
	{"012abcba210", "\xffb", 6},
	{"012\x80bcb\x80210", "\xffb", 7},
}

// Execute f on each test case.  funcName should be the name of f; it's used
// in failure reports.
func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
	for _, test := range testCases {
		actual := f(test.s, test.sep)
		if actual != test.out {
			t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
		}
	}
}

func TestIndex(t *testing.T)        { runIndexTests(t, Index, "Index", indexTests) }
func TestLastIndex(t *testing.T)    { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
func TestIndexAny(t *testing.T)     { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) }

func TestLastIndexByte(t *testing.T) {
	testCases := []IndexTest{
		{"", "q", -1},
		{"abcdef", "q", -1},
		{"abcdefabcdef", "a", len("abcdef")},      // something in the middle
		{"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
		{"zabcdefabcdef", "z", 0},                 // first byte
		{"a☺b☻c☹d", "b", len("a☺")},               // non-ascii
	}
	for _, test := range testCases {
		actual := LastIndexByte(test.s, test.sep[0])
		if actual != test.out {
			t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out)
		}
	}
}

func simpleIndex(s, sep string) int {
	n := len(sep)
	for i := n; i <= len(s); i++ {
		if s[i-n:i] == sep {
			return i - n
		}
	}
	return -1
}

func TestIndexRandom(t *testing.T) {
	const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
	for times := 0; times < 10; times++ {
		for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary
			s1 := make([]byte, strLen)
			for i := range s1 {
				s1[i] = chars[rand.Intn(len(chars))]
			}
			s := string(s1)
			for i := 0; i < 50; i++ {
				begin := rand.Intn(len(s) + 1)
				end := begin + rand.Intn(len(s)+1-begin)
				sep := s[begin:end]
				if i%4 == 0 {
					pos := rand.Intn(len(sep) + 1)
					sep = sep[:pos] + "A" + sep[pos:]
				}
				want := simpleIndex(s, sep)
				res := Index(s, sep)
				if res != want {
					t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want)
				}
			}
		}
	}
}

func TestIndexRune(t *testing.T) {
	tests := []struct {
		in   string
		rune rune
		want int
	}{
		{"", 'a', -1},
		{"", '☺', -1},
		{"foo", '☹', -1},
		{"foo", 'o', 1},
		{"foo☺bar", '☺', 3},
		{"foo☺☻☹bar", '☹', 9},
		{"a A x", 'A', 2},
		{"some_text=some_value", '=', 9},
		{"☺a", 'a', 3},
		{"a☻☺b", '☺', 4},

		// RuneError should match any invalid UTF-8 byte sequence.
		{"<22>", '<27>', 0},
		{"\xff", '<27>', 0},
		{"☻x<E298BB>", '<27>', len("☻x")},
		{"☻x\xe2\x98", '<27>', len("☻x")},
		{"☻x\xe2\x98<39>", '<27>', len("☻x")},
		{"☻x\xe2\x98x", '<27>', len("☻x")},

		// Invalid rune values should never match.
		{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", -1, -1},
		{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
		{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", utf8.MaxRune + 1, -1},
	}
	for _, tt := range tests {
		if got := IndexRune(tt.in, tt.rune); got != tt.want {
			t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
		}
	}

	haystack := "test世界"
	allocs := testing.AllocsPerRun(1000, func() {
		if i := IndexRune(haystack, 's'); i != 2 {
			t.Fatalf("'s' at %d; want 2", i)
		}
		if i := IndexRune(haystack, '世'); i != 4 {
			t.Fatalf("'世' at %d; want 4", i)
		}
	})
	if allocs != 0 && testing.CoverMode() == "" {
		t.Errorf("expected no allocations, got %f", allocs)
	}
}

const benchmarkString = "some_text=some☺value"

func BenchmarkIndexRune(b *testing.B) {
	if got := IndexRune(benchmarkString, '☺'); got != 14 {
		b.Fatalf("wrong index: expected 14, got=%d", got)
	}
	for i := 0; i < b.N; i++ {
		IndexRune(benchmarkString, '☺')
	}
}

var benchmarkLongString = Repeat(" ", 100) + benchmarkString

func BenchmarkIndexRuneLongString(b *testing.B) {
	if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
		b.Fatalf("wrong index: expected 114, got=%d", got)
	}
	for i := 0; i < b.N; i++ {
		IndexRune(benchmarkLongString, '☺')
	}
}

func BenchmarkIndexRuneFastPath(b *testing.B) {
	if got := IndexRune(benchmarkString, 'v'); got != 17 {
		b.Fatalf("wrong index: expected 17, got=%d", got)
	}
	for i := 0; i < b.N; i++ {
		IndexRune(benchmarkString, 'v')
	}
}

func BenchmarkIndex(b *testing.B) {
	if got := Index(benchmarkString, "v"); got != 17 {
		b.Fatalf("wrong index: expected 17, got=%d", got)
	}
	for i := 0; i < b.N; i++ {
		Index(benchmarkString, "v")
	}
}

func BenchmarkLastIndex(b *testing.B) {
	if got := Index(benchmarkString, "v"); got != 17 {
		b.Fatalf("wrong index: expected 17, got=%d", got)
	}
	for i := 0; i < b.N; i++ {
		LastIndex(benchmarkString, "v")
	}
}

func BenchmarkIndexByte(b *testing.B) {
	if got := IndexByte(benchmarkString, 'v'); got != 17 {
		b.Fatalf("wrong index: expected 17, got=%d", got)
	}
	for i := 0; i < b.N; i++ {
		IndexByte(benchmarkString, 'v')
	}
}

type SplitTest struct {
	s   string
	sep string
	n   int
	a   []string
}

var splittests = []SplitTest{
	{"", "", -1, []string{}},
	{abcd, "", 2, []string{"a", "bcd"}},
	{abcd, "", 4, []string{"a", "b", "c", "d"}},
	{abcd, "", -1, []string{"a", "b", "c", "d"}},
	{faces, "", -1, []string{"☺", "☻", "☹"}},
	{faces, "", 3, []string{"☺", "☻", "☹"}},
	{faces, "", 17, []string{"☺", "☻", "☹"}},
	{"☺<>☹", "", -1, []string{"☺", "<22>", "☹"}},
	{abcd, "a", 0, nil},
	{abcd, "a", -1, []string{"", "bcd"}},
	{abcd, "z", -1, []string{"abcd"}},
	{commas, ",", -1, []string{"1", "2", "3", "4"}},
	{dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
	{faces, "☹", -1, []string{"☺☻", ""}},
	{faces, "~", -1, []string{faces}},
	{"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
	{"1 2", " ", 3, []string{"1", "2"}},
}

func TestSplit(t *testing.T) {
	for _, tt := range splittests {
		a := SplitN(tt.s, tt.sep, tt.n)
		if !eq(a, tt.a) {
			t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
			continue
		}
		if tt.n == 0 {
			continue
		}
		s := Join(a, tt.sep)
		if s != tt.s {
			t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
		}
		if tt.n < 0 {
			b := Split(tt.s, tt.sep)
			if !reflect.DeepEqual(a, b) {
				t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
			}
		}
	}
}

var splitaftertests = []SplitTest{
	{abcd, "a", -1, []string{"a", "bcd"}},
	{abcd, "z", -1, []string{"abcd"}},
	{abcd, "", -1, []string{"a", "b", "c", "d"}},
	{commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
	{dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
	{faces, "☹", -1, []string{"☺☻☹", ""}},
	{faces, "~", -1, []string{faces}},
	{faces, "", -1, []string{"☺", "☻", "☹"}},
	{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
	{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
	{"1 2", " ", 3, []string{"1 ", "2"}},
	{"123", "", 2, []string{"1", "23"}},
	{"123", "", 17, []string{"1", "2", "3"}},
}

func TestSplitAfter(t *testing.T) {
	for _, tt := range splitaftertests {
		a := SplitAfterN(tt.s, tt.sep, tt.n)
		if !eq(a, tt.a) {
			t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
			continue
		}
		s := Join(a, "")
		if s != tt.s {
			t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
		}
		if tt.n < 0 {
			b := SplitAfter(tt.s, tt.sep)
			if !reflect.DeepEqual(a, b) {
				t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
			}
		}
	}
}

type FieldsTest struct {
	s string
	a []string
}

var fieldstests = []FieldsTest{
	{"", []string{}},
	{" ", []string{}},
	{" \t ", []string{}},
	{"\u2000", []string{}},
	{"  abc  ", []string{"abc"}},
	{"1 2 3 4", []string{"1", "2", "3", "4"}},
	{"1  2  3  4", []string{"1", "2", "3", "4"}},
	{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
	{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
	{"\u2000\u2001\u2002", []string{}},
	{"\n™\t™\n", []string{"™", "™"}},
	{"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
	{"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
	{"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
	{faces, []string{faces}},
}

func TestFields(t *testing.T) {
	for _, tt := range fieldstests {
		a := Fields(tt.s)
		if !eq(a, tt.a) {
			t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
			continue
		}
	}
}

var FieldsFuncTests = []FieldsTest{
	{"", []string{}},
	{"XX", []string{}},
	{"XXhiXXX", []string{"hi"}},
	{"aXXbXXXcX", []string{"a", "b", "c"}},
}

func TestFieldsFunc(t *testing.T) {
	for _, tt := range fieldstests {
		a := FieldsFunc(tt.s, unicode.IsSpace)
		if !eq(a, tt.a) {
			t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
			continue
		}
	}
	pred := func(c rune) bool { return c == 'X' }
	for _, tt := range FieldsFuncTests {
		a := FieldsFunc(tt.s, pred)
		if !eq(a, tt.a) {
			t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
		}
	}
}

// Test case for any function which accepts and returns a single string.
type StringTest struct {
	in, out string
}

// Execute f on each test case.  funcName should be the name of f; it's used
// in failure reports.
func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
	for _, tc := range testCases {
		actual := f(tc.in)
		if actual != tc.out {
			t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
		}
	}
}

var upperTests = []StringTest{
	{"", ""},
	{"ONLYUPPER", "ONLYUPPER"},
	{"abc", "ABC"},
	{"AbC123", "ABC123"},
	{"azAZ09_", "AZAZ09_"},
	{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
	{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
	{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
	{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"},                           // test utf8.RuneSelf and utf8.MaxRune
}

var lowerTests = []StringTest{
	{"", ""},
	{"abc", "abc"},
	{"AbC123", "abc123"},
	{"azAZ09_", "azaz09_"},
	{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
	{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
	{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
	{"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"},                           // test utf8.RuneSelf and utf8.MaxRune
}

const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"

var trimSpaceTests = []StringTest{
	{"", ""},
	{"abc", "abc"},
	{space + "abc" + space, "abc"},
	{" ", ""},
	{" \t\r\n \t\t\r\r\n\n ", ""},
	{" \t\r\n x\t\t\r\r\n\n ", "x"},
	{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
	{"1 \t\r\n2", "1 \t\r\n2"},
	{" x\x80", "x\x80"},
	{" x\xc0", "x\xc0"},
	{"x \xc0\xc0 ", "x \xc0\xc0"},
	{"x \xc0", "x \xc0"},
	{"x \xc0 ", "x \xc0"},
	{"x \xc0\xc0 ", "x \xc0\xc0"},
	{"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
	{"x ☺ ", "x ☺"},
}

func tenRunes(ch rune) string {
	r := make([]rune, 10)
	for i := range r {
		r[i] = ch
	}
	return string(r)
}

// User-defined self-inverse mapping function
func rot13(r rune) rune {
	step := rune(13)
	if r >= 'a' && r <= 'z' {
		return ((r - 'a' + step) % 26) + 'a'
	}
	if r >= 'A' && r <= 'Z' {
		return ((r - 'A' + step) % 26) + 'A'
	}
	return r
}

func TestMap(t *testing.T) {
	// Run a couple of awful growth/shrinkage tests
	a := tenRunes('a')
	// 1.  Grow. This triggers two reallocations in Map.
	maxRune := func(rune) rune { return unicode.MaxRune }
	m := Map(maxRune, a)
	expect := tenRunes(unicode.MaxRune)
	if m != expect {
		t.Errorf("growing: expected %q got %q", expect, m)
	}

	// 2. Shrink
	minRune := func(rune) rune { return 'a' }
	m = Map(minRune, tenRunes(unicode.MaxRune))
	expect = a
	if m != expect {
		t.Errorf("shrinking: expected %q got %q", expect, m)
	}

	// 3. Rot13
	m = Map(rot13, "a to zed")
	expect = "n gb mrq"
	if m != expect {
		t.Errorf("rot13: expected %q got %q", expect, m)
	}

	// 4. Rot13^2
	m = Map(rot13, Map(rot13, "a to zed"))
	expect = "a to zed"
	if m != expect {
		t.Errorf("rot13: expected %q got %q", expect, m)
	}

	// 5. Drop
	dropNotLatin := func(r rune) rune {
		if unicode.Is(unicode.Latin, r) {
			return r
		}
		return -1
	}
	m = Map(dropNotLatin, "Hello, 세계")
	expect = "Hello"
	if m != expect {
		t.Errorf("drop: expected %q got %q", expect, m)
	}

	// 6. Identity
	identity := func(r rune) rune {
		return r
	}
	orig := "Input string that we expect not to be copied."
	m = Map(identity, orig)
	if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data !=
		(*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
		t.Error("unexpected copy during identity map")
	}

	// 7. Handle invalid UTF-8 sequence
	replaceNotLatin := func(r rune) rune {
		if unicode.Is(unicode.Latin, r) {
			return r
		}
		return '?'
	}
	m = Map(replaceNotLatin, "Hello\255World")
	expect = "Hello?World"
	if m != expect {
		t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
	}

	// 8. Check utf8.RuneSelf and utf8.MaxRune encoding
	encode := func(r rune) rune {
		switch r {
		case utf8.RuneSelf:
			return unicode.MaxRune
		case unicode.MaxRune:
			return utf8.RuneSelf
		}
		return r
	}
	s := string(utf8.RuneSelf) + string(utf8.MaxRune)
	r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s
	m = Map(encode, s)
	if m != r {
		t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
	}
	m = Map(encode, r)
	if m != s {
		t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
	}
}

func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }

func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }

func BenchmarkToUpper(b *testing.B) {
	for _, tc := range upperTests {
		b.Run(tc.in, func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				actual := ToUpper(tc.in)
				if actual != tc.out {
					b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
				}
			}
		})
	}
}

func BenchmarkToLower(b *testing.B) {
	for _, tc := range lowerTests {
		b.Run(tc.in, func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				actual := ToLower(tc.in)
				if actual != tc.out {
					b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
				}
			}
		})
	}
}

func BenchmarkMapNoChanges(b *testing.B) {
	identity := func(r rune) rune {
		return r
	}
	for i := 0; i < b.N; i++ {
		Map(identity, "Some string that won't be modified.")
	}
}

func TestSpecialCase(t *testing.T) {
	lower := "abcçdefgğhıijklmnoöprsştuüvyz"
	upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
	u := ToUpperSpecial(unicode.TurkishCase, upper)
	if u != upper {
		t.Errorf("Upper(upper) is %s not %s", u, upper)
	}
	u = ToUpperSpecial(unicode.TurkishCase, lower)
	if u != upper {
		t.Errorf("Upper(lower) is %s not %s", u, upper)
	}
	l := ToLowerSpecial(unicode.TurkishCase, lower)
	if l != lower {
		t.Errorf("Lower(lower) is %s not %s", l, lower)
	}
	l = ToLowerSpecial(unicode.TurkishCase, upper)
	if l != lower {
		t.Errorf("Lower(upper) is %s not %s", l, lower)
	}
}

func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }

var trimTests = []struct {
	f            string
	in, arg, out string
}{
	{"Trim", "abba", "a", "bb"},
	{"Trim", "abba", "ab", ""},
	{"TrimLeft", "abba", "ab", ""},
	{"TrimRight", "abba", "ab", ""},
	{"TrimLeft", "abba", "a", "bba"},
	{"TrimRight", "abba", "a", "abb"},
	{"Trim", "<tag>", "<>", "tag"},
	{"Trim", "* listitem", " *", "listitem"},
	{"Trim", `"quote"`, `"`, "quote"},
	{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
	{"Trim", "\x80test\xff", "\xff", "test"},
	{"Trim", " Ġ ", " ", "Ġ"},
	{"Trim", " Ġİ0", "0 ", "Ġİ"},
	//empty string tests
	{"Trim", "abba", "", "abba"},
	{"Trim", "", "123", ""},
	{"Trim", "", "", ""},
	{"TrimLeft", "abba", "", "abba"},
	{"TrimLeft", "", "123", ""},
	{"TrimLeft", "", "", ""},
	{"TrimRight", "abba", "", "abba"},
	{"TrimRight", "", "123", ""},
	{"TrimRight", "", "", ""},
	{"TrimRight", "☺\xc0", "☺", "☺\xc0"},
	{"TrimPrefix", "aabb", "a", "abb"},
	{"TrimPrefix", "aabb", "b", "aabb"},
	{"TrimSuffix", "aabb", "a", "aabb"},
	{"TrimSuffix", "aabb", "b", "aab"},
}

func TestTrim(t *testing.T) {
	for _, tc := range trimTests {
		name := tc.f
		var f func(string, string) string
		switch name {
		case "Trim":
			f = Trim
		case "TrimLeft":
			f = TrimLeft
		case "TrimRight":
			f = TrimRight
		case "TrimPrefix":
			f = TrimPrefix
		case "TrimSuffix":
			f = TrimSuffix
		default:
			t.Errorf("Undefined trim function %s", name)
		}
		actual := f(tc.in, tc.arg)
		if actual != tc.out {
			t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
		}
	}
}

func BenchmarkTrim(b *testing.B) {
	b.ReportAllocs()

	for i := 0; i < b.N; i++ {
		for _, tc := range trimTests {
			name := tc.f
			var f func(string, string) string
			switch name {
			case "Trim":
				f = Trim
			case "TrimLeft":
				f = TrimLeft
			case "TrimRight":
				f = TrimRight
			case "TrimPrefix":
				f = TrimPrefix
			case "TrimSuffix":
				f = TrimSuffix
			default:
				b.Errorf("Undefined trim function %s", name)
			}
			actual := f(tc.in, tc.arg)
			if actual != tc.out {
				b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
			}
		}
	}
}

type predicate struct {
	f    func(rune) bool
	name string
}

var isSpace = predicate{unicode.IsSpace, "IsSpace"}
var isDigit = predicate{unicode.IsDigit, "IsDigit"}
var isUpper = predicate{unicode.IsUpper, "IsUpper"}
var isValidRune = predicate{
	func(r rune) bool {
		return r != utf8.RuneError
	},
	"IsValidRune",
}

func not(p predicate) predicate {
	return predicate{
		func(r rune) bool {
			return !p.f(r)
		},
		"not " + p.name,
	}
}

var trimFuncTests = []struct {
	f       predicate
	in, out string
}{
	{isSpace, space + " hello " + space, "hello"},
	{isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
	{isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
	{not(isSpace), "hello" + space + "hello", space},
	{not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
	{isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
	{not(isValidRune), "\xc0a\xc0", "a"},
}

func TestTrimFunc(t *testing.T) {
	for _, tc := range trimFuncTests {
		actual := TrimFunc(tc.in, tc.f.f)
		if actual != tc.out {
			t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
		}
	}
}

var indexFuncTests = []struct {
	in          string
	f           predicate
	first, last int
}{
	{"", isValidRune, -1, -1},
	{"abc", isDigit, -1, -1},
	{"0123", isDigit, 0, 3},
	{"a1b", isDigit, 1, 1},
	{space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
	{"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
	{"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
	{"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},

	// tests of invalid UTF-8
	{"\x801", isDigit, 1, 1},
	{"\x80abc", isDigit, -1, -1},
	{"\xc0a\xc0", isValidRune, 1, 1},
	{"\xc0a\xc0", not(isValidRune), 0, 2},
	{"\xc0☺\xc0", not(isValidRune), 0, 4},
	{"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
	{"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
	{"a\xe0\x80cd", not(isValidRune), 1, 2},
	{"\x80\x80\x80\x80", not(isValidRune), 0, 3},
}

func TestIndexFunc(t *testing.T) {
	for _, tc := range indexFuncTests {
		first := IndexFunc(tc.in, tc.f.f)
		if first != tc.first {
			t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
		}
		last := LastIndexFunc(tc.in, tc.f.f)
		if last != tc.last {
			t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
		}
	}
}

func equal(m string, s1, s2 string, t *testing.T) bool {
	if s1 == s2 {
		return true
	}
	e1 := Split(s1, "")
	e2 := Split(s2, "")
	for i, c1 := range e1 {
		if i >= len(e2) {
			break
		}
		r1, _ := utf8.DecodeRuneInString(c1)
		r2, _ := utf8.DecodeRuneInString(e2[i])
		if r1 != r2 {
			t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
		}
	}
	return false
}

func TestCaseConsistency(t *testing.T) {
	// Make a string of all the runes.
	numRunes := int(unicode.MaxRune + 1)
	if testing.Short() {
		numRunes = 1000
	}
	a := make([]rune, numRunes)
	for i := range a {
		a[i] = rune(i)
	}
	s := string(a)
	// convert the cases.
	upper := ToUpper(s)
	lower := ToLower(s)

	// Consistency checks
	if n := utf8.RuneCountInString(upper); n != numRunes {
		t.Error("rune count wrong in upper:", n)
	}
	if n := utf8.RuneCountInString(lower); n != numRunes {
		t.Error("rune count wrong in lower:", n)
	}
	if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
		t.Error("ToUpper(upper) consistency fail")
	}
	if !equal("ToLower(lower)", ToLower(lower), lower, t) {
		t.Error("ToLower(lower) consistency fail")
	}
	/*
		  These fail because of non-one-to-oneness of the data, such as multiple
		  upper case 'I' mapping to 'i'.  We comment them out but keep them for
		  interest.
		  For instance: CAPITAL LETTER I WITH DOT ABOVE:
			unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'

		if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
			t.Error("ToUpper(lower) consistency fail");
		}
		if !equal("ToLower(upper)", ToLower(upper), lower, t) {
			t.Error("ToLower(upper) consistency fail");
		}
	*/
}

var RepeatTests = []struct {
	in, out string
	count   int
}{
	{"", "", 0},
	{"", "", 1},
	{"", "", 2},
	{"-", "", 0},
	{"-", "-", 1},
	{"-", "----------", 10},
	{"abc ", "abc abc abc ", 3},
}

func TestRepeat(t *testing.T) {
	for _, tt := range RepeatTests {
		a := Repeat(tt.in, tt.count)
		if !equal("Repeat(s)", a, tt.out, t) {
			t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
			continue
		}
	}
}

func repeat(s string, count int) (err error) {
	defer func() {
		if r := recover(); r != nil {
			switch v := r.(type) {
			case error:
				err = v
			default:
				err = fmt.Errorf("%s", v)
			}
		}
	}()

	Repeat(s, count)

	return
}

// See Issue golang.org/issue/16237
func TestRepeatCatchesOverflow(t *testing.T) {
	tests := [...]struct {
		s      string
		count  int
		errStr string
	}{
		0: {"--", -2147483647, "negative"},
		1: {"", int(^uint(0) >> 1), ""},
		2: {"-", 10, ""},
		3: {"gopher", 0, ""},
		4: {"-", -1, "negative"},
		5: {"--", -102, "negative"},
		6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
	}

	for i, tt := range tests {
		err := repeat(tt.s, tt.count)
		if tt.errStr == "" {
			if err != nil {
				t.Errorf("#%d panicked %v", i, err)
			}
			continue
		}

		if err == nil || !Contains(err.Error(), tt.errStr) {
			t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
		}
	}
}

func runesEqual(a, b []rune) bool {
	if len(a) != len(b) {
		return false
	}
	for i, r := range a {
		if r != b[i] {
			return false
		}
	}
	return true
}

var RunesTests = []struct {
	in    string
	out   []rune
	lossy bool
}{
	{"", []rune{}, false},
	{" ", []rune{32}, false},
	{"ABC", []rune{65, 66, 67}, false},
	{"abc", []rune{97, 98, 99}, false},
	{"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
	{"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
	{"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
}

func TestRunes(t *testing.T) {
	for _, tt := range RunesTests {
		a := []rune(tt.in)
		if !runesEqual(a, tt.out) {
			t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
			continue
		}
		if !tt.lossy {
			// can only test reassembly if we didn't lose information
			s := string(a)
			if s != tt.in {
				t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
			}
		}
	}
}

func TestReadByte(t *testing.T) {
	testStrings := []string{"", abcd, faces, commas}
	for _, s := range testStrings {
		reader := NewReader(s)
		if e := reader.UnreadByte(); e == nil {
			t.Errorf("Unreading %q at beginning: expected error", s)
		}
		var res bytes.Buffer
		for {
			b, e := reader.ReadByte()
			if e == io.EOF {
				break
			}
			if e != nil {
				t.Errorf("Reading %q: %s", s, e)
				break
			}
			res.WriteByte(b)
			// unread and read again
			e = reader.UnreadByte()
			if e != nil {
				t.Errorf("Unreading %q: %s", s, e)
				break
			}
			b1, e := reader.ReadByte()
			if e != nil {
				t.Errorf("Reading %q after unreading: %s", s, e)
				break
			}
			if b1 != b {
				t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
				break
			}
		}
		if res.String() != s {
			t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
		}
	}
}

func TestReadRune(t *testing.T) {
	testStrings := []string{"", abcd, faces, commas}
	for _, s := range testStrings {
		reader := NewReader(s)
		if e := reader.UnreadRune(); e == nil {
			t.Errorf("Unreading %q at beginning: expected error", s)
		}
		res := ""
		for {
			r, z, e := reader.ReadRune()
			if e == io.EOF {
				break
			}
			if e != nil {
				t.Errorf("Reading %q: %s", s, e)
				break
			}
			res += string(r)
			// unread and read again
			e = reader.UnreadRune()
			if e != nil {
				t.Errorf("Unreading %q: %s", s, e)
				break
			}
			r1, z1, e := reader.ReadRune()
			if e != nil {
				t.Errorf("Reading %q after unreading: %s", s, e)
				break
			}
			if r1 != r {
				t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
				break
			}
			if z1 != z {
				t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
				break
			}
		}
		if res != s {
			t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
		}
	}
}

var UnreadRuneErrorTests = []struct {
	name string
	f    func(*Reader)
}{
	{"Read", func(r *Reader) { r.Read([]byte{0}) }},
	{"ReadByte", func(r *Reader) { r.ReadByte() }},
	{"UnreadRune", func(r *Reader) { r.UnreadRune() }},
	{"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }},
	{"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
}

func TestUnreadRuneError(t *testing.T) {
	for _, tt := range UnreadRuneErrorTests {
		reader := NewReader("0123456789")
		if _, _, err := reader.ReadRune(); err != nil {
			// should not happen
			t.Fatal(err)
		}
		tt.f(reader)
		err := reader.UnreadRune()
		if err == nil {
			t.Errorf("Unreading after %s: expected error", tt.name)
		}
	}
}

var ReplaceTests = []struct {
	in       string
	old, new string
	n        int
	out      string
}{
	{"hello", "l", "L", 0, "hello"},
	{"hello", "l", "L", -1, "heLLo"},
	{"hello", "x", "X", -1, "hello"},
	{"", "x", "X", -1, ""},
	{"radar", "r", "<r>", -1, "<r>ada<r>"},
	{"", "", "<>", -1, "<>"},
	{"banana", "a", "<>", -1, "b<>n<>n<>"},
	{"banana", "a", "<>", 1, "b<>nana"},
	{"banana", "a", "<>", 1000, "b<>n<>n<>"},
	{"banana", "an", "<>", -1, "b<><>a"},
	{"banana", "ana", "<>", -1, "b<>na"},
	{"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
	{"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
	{"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
	{"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
	{"banana", "", "<>", 1, "<>banana"},
	{"banana", "a", "a", -1, "banana"},
	{"banana", "a", "a", 1, "banana"},
	{"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
}

func TestReplace(t *testing.T) {
	for _, tt := range ReplaceTests {
		if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
			t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
		}
	}
}

var TitleTests = []struct {
	in, out string
}{
	{"", ""},
	{"a", "A"},
	{" aaa aaa aaa ", " Aaa Aaa Aaa "},
	{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
	{"123a456", "123a456"},
	{"double-blind", "Double-Blind"},
	{"ÿøû", "Ÿøû"},
	{"with_underscore", "With_underscore"},
	{"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
}

func TestTitle(t *testing.T) {
	for _, tt := range TitleTests {
		if s := Title(tt.in); s != tt.out {
			t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
		}
	}
}

var ContainsTests = []struct {
	str, substr string
	expected    bool
}{
	{"abc", "bc", true},
	{"abc", "bcd", false},
	{"abc", "", true},
	{"", "a", false},

	// cases to cover code in runtime/asm_amd64.s:indexShortStr
	// 2-byte needle
	{"xxxxxx", "01", false},
	{"01xxxx", "01", true},
	{"xx01xx", "01", true},
	{"xxxx01", "01", true},
	{"01xxxxx"[1:], "01", false},
	{"xxxxx01"[:6], "01", false},
	// 3-byte needle
	{"xxxxxxx", "012", false},
	{"012xxxx", "012", true},
	{"xx012xx", "012", true},
	{"xxxx012", "012", true},
	{"012xxxxx"[1:], "012", false},
	{"xxxxx012"[:7], "012", false},
	// 4-byte needle
	{"xxxxxxxx", "0123", false},
	{"0123xxxx", "0123", true},
	{"xx0123xx", "0123", true},
	{"xxxx0123", "0123", true},
	{"0123xxxxx"[1:], "0123", false},
	{"xxxxx0123"[:8], "0123", false},
	// 5-7-byte needle
	{"xxxxxxxxx", "01234", false},
	{"01234xxxx", "01234", true},
	{"xx01234xx", "01234", true},
	{"xxxx01234", "01234", true},
	{"01234xxxxx"[1:], "01234", false},
	{"xxxxx01234"[:9], "01234", false},
	// 8-byte needle
	{"xxxxxxxxxxxx", "01234567", false},
	{"01234567xxxx", "01234567", true},
	{"xx01234567xx", "01234567", true},
	{"xxxx01234567", "01234567", true},
	{"01234567xxxxx"[1:], "01234567", false},
	{"xxxxx01234567"[:12], "01234567", false},
	// 9-15-byte needle
	{"xxxxxxxxxxxxx", "012345678", false},
	{"012345678xxxx", "012345678", true},
	{"xx012345678xx", "012345678", true},
	{"xxxx012345678", "012345678", true},
	{"012345678xxxxx"[1:], "012345678", false},
	{"xxxxx012345678"[:13], "012345678", false},
	// 16-byte needle
	{"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false},
	{"0123456789ABCDEFxxxx", "0123456789ABCDEF", true},
	{"xx0123456789ABCDEFxx", "0123456789ABCDEF", true},
	{"xxxx0123456789ABCDEF", "0123456789ABCDEF", true},
	{"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false},
	{"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false},
	// 17-31-byte needle
	{"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false},
	{"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true},
	{"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true},
	{"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true},
	{"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false},
	{"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false},

	// partial match cases
	{"xx01x", "012", false},                             // 3
	{"xx0123x", "01234", false},                         // 5-7
	{"xx01234567x", "012345678", false},                 // 9-15
	{"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679
}

func TestContains(t *testing.T) {
	for _, ct := range ContainsTests {
		if Contains(ct.str, ct.substr) != ct.expected {
			t.Errorf("Contains(%s, %s) = %v, want %v",
				ct.str, ct.substr, !ct.expected, ct.expected)
		}
	}
}

var ContainsAnyTests = []struct {
	str, substr string
	expected    bool
}{
	{"", "", false},
	{"", "a", false},
	{"", "abc", false},
	{"a", "", false},
	{"a", "a", true},
	{"aaa", "a", true},
	{"abc", "xyz", false},
	{"abc", "xcz", true},
	{"a☺b☻c☹d", "uvw☻xyz", true},
	{"aRegExp*", ".(|)*+?^$[]", true},
	{dots + dots + dots, " ", false},
}

func TestContainsAny(t *testing.T) {
	for _, ct := range ContainsAnyTests {
		if ContainsAny(ct.str, ct.substr) != ct.expected {
			t.Errorf("ContainsAny(%s, %s) = %v, want %v",
				ct.str, ct.substr, !ct.expected, ct.expected)
		}
	}
}

var ContainsRuneTests = []struct {
	str      string
	r        rune
	expected bool
}{
	{"", 'a', false},
	{"a", 'a', true},
	{"aaa", 'a', true},
	{"abc", 'y', false},
	{"abc", 'c', true},
	{"a☺b☻c☹d", 'x', false},
	{"a☺b☻c☹d", '☻', true},
	{"aRegExp*", '*', true},
}

func TestContainsRune(t *testing.T) {
	for _, ct := range ContainsRuneTests {
		if ContainsRune(ct.str, ct.r) != ct.expected {
			t.Errorf("ContainsRune(%q, %q) = %v, want %v",
				ct.str, ct.r, !ct.expected, ct.expected)
		}
	}
}

var EqualFoldTests = []struct {
	s, t string
	out  bool
}{
	{"abc", "abc", true},
	{"ABcd", "ABcd", true},
	{"123abc", "123ABC", true},
	{"αβδ", "ΑΒΔ", true},
	{"abc", "xyz", false},
	{"abc", "XYZ", false},
	{"abcdefghijk", "abcdefghijX", false},
	{"abcdefghijk", "abcdefghij\u212A", true},
	{"abcdefghijK", "abcdefghij\u212A", true},
	{"abcdefghijkz", "abcdefghij\u212Ay", false},
	{"abcdefghijKz", "abcdefghij\u212Ay", false},
	{"1", "2", false},
	{"utf-8", "US-ASCII", false},
}

func TestEqualFold(t *testing.T) {
	for _, tt := range EqualFoldTests {
		if out := EqualFold(tt.s, tt.t); out != tt.out {
			t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
		}
		if out := EqualFold(tt.t, tt.s); out != tt.out {
			t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
		}
	}
}

func BenchmarkEqualFold(b *testing.B) {
	for i := 0; i < b.N; i++ {
		for _, tt := range EqualFoldTests {
			if out := EqualFold(tt.s, tt.t); out != tt.out {
				b.Fatal("wrong result")
			}
		}
	}
}

var CountTests = []struct {
	s, sep string
	num    int
}{
	{"", "", 1},
	{"", "notempty", 0},
	{"notempty", "", 9},
	{"smaller", "not smaller", 0},
	{"12345678987654321", "6", 2},
	{"611161116", "6", 3},
	{"notequal", "NotEqual", 0},
	{"equal", "equal", 1},
	{"abc1231231123q", "123", 3},
	{"11111", "11", 2},
}

func TestCount(t *testing.T) {
	for _, tt := range CountTests {
		if num := Count(tt.s, tt.sep); num != tt.num {
			t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num)
		}
	}
}

func makeBenchInputHard() string {
	tokens := [...]string{
		"<a>", "<p>", "<b>", "<strong>",
		"</a>", "</p>", "</b>", "</strong>",
		"hello", "world",
	}
	x := make([]byte, 0, 1<<20)
	for {
		i := rand.Intn(len(tokens))
		if len(x)+len(tokens[i]) >= 1<<20 {
			break
		}
		x = append(x, tokens[i]...)
	}
	return string(x)
}

var benchInputHard = makeBenchInputHard()

func benchmarkIndexHard(b *testing.B, sep string) {
	for i := 0; i < b.N; i++ {
		Index(benchInputHard, sep)
	}
}

func benchmarkLastIndexHard(b *testing.B, sep string) {
	for i := 0; i < b.N; i++ {
		LastIndex(benchInputHard, sep)
	}
}

func benchmarkCountHard(b *testing.B, sep string) {
	for i := 0; i < b.N; i++ {
		Count(benchInputHard, sep)
	}
}

func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
func BenchmarkIndexHard4(b *testing.B) {
	benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
}

func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }

func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }

var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
var benchNeedleTorture = Repeat("ABC", 1<<10+1)

func BenchmarkIndexTorture(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Index(benchInputTorture, benchNeedleTorture)
	}
}

func BenchmarkCountTorture(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Count(benchInputTorture, benchNeedleTorture)
	}
}

func BenchmarkCountTortureOverlapping(b *testing.B) {
	A := Repeat("ABC", 1<<20)
	B := Repeat("ABC", 1<<10)
	for i := 0; i < b.N; i++ {
		Count(A, B)
	}
}

func BenchmarkCountByte(b *testing.B) {
	indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
	benchStr := Repeat(benchmarkString,
		(indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
	benchFunc := func(b *testing.B, benchStr string) {
		b.SetBytes(int64(len(benchStr)))
		for i := 0; i < b.N; i++ {
			Count(benchStr, "=")
		}
	}
	for _, size := range indexSizes {
		b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
			benchFunc(b, benchStr[:size])
		})
	}

}

var makeFieldsInput = func() string {
	x := make([]byte, 1<<20)
	// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
	for i := range x {
		switch rand.Intn(10) {
		case 0:
			x[i] = ' '
		case 1:
			if i > 0 && x[i-1] == 'x' {
				copy(x[i-1:], "χ")
				break
			}
			fallthrough
		default:
			x[i] = 'x'
		}
	}
	return string(x)
}

var makeFieldsInputASCII = func() string {
	x := make([]byte, 1<<20)
	// Input is ~10% space, rest ASCII non-space.
	for i := range x {
		if rand.Intn(10) == 0 {
			x[i] = ' '
		} else {
			x[i] = 'x'
		}
	}
	return string(x)
}

var stringdata = []struct{ name, data string }{
	{"ASCII", makeFieldsInputASCII()},
	{"Mixed", makeFieldsInput()},
}

func BenchmarkFields(b *testing.B) {
	for _, sd := range stringdata {
		b.Run(sd.name, func(b *testing.B) {
			for j := 1 << 4; j <= 1<<20; j <<= 4 {
				b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
					b.ReportAllocs()
					b.SetBytes(int64(j))
					data := sd.data[:j]
					for i := 0; i < b.N; i++ {
						Fields(data)
					}
				})
			}
		})
	}
}

func BenchmarkFieldsFunc(b *testing.B) {
	for _, sd := range stringdata {
		b.Run(sd.name, func(b *testing.B) {
			for j := 1 << 4; j <= 1<<20; j <<= 4 {
				b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
					b.ReportAllocs()
					b.SetBytes(int64(j))
					data := sd.data[:j]
					for i := 0; i < b.N; i++ {
						FieldsFunc(data, unicode.IsSpace)
					}
				})
			}
		})
	}
}

func BenchmarkSplitEmptySeparator(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Split(benchInputHard, "")
	}
}

func BenchmarkSplitSingleByteSeparator(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Split(benchInputHard, "/")
	}
}

func BenchmarkSplitMultiByteSeparator(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Split(benchInputHard, "hello")
	}
}

func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
	for i := 0; i < b.N; i++ {
		SplitN(benchInputHard, "/", 10)
	}
}

func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
	for i := 0; i < b.N; i++ {
		SplitN(benchInputHard, "hello", 10)
	}
}

func BenchmarkRepeat(b *testing.B) {
	for i := 0; i < b.N; i++ {
		Repeat("-", 80)
	}
}

func BenchmarkIndexAnyASCII(b *testing.B) {
	x := Repeat("#", 4096) // Never matches set
	cs := "0123456789abcdef"
	for k := 1; k <= 4096; k <<= 4 {
		for j := 1; j <= 16; j <<= 1 {
			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
				for i := 0; i < b.N; i++ {
					IndexAny(x[:k], cs[:j])
				}
			})
		}
	}
}

func BenchmarkTrimASCII(b *testing.B) {
	cs := "0123456789abcdef"
	for k := 1; k <= 4096; k <<= 4 {
		for j := 1; j <= 16; j <<= 1 {
			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
				x := Repeat(cs[:j], k) // Always matches set
				for i := 0; i < b.N; i++ {
					Trim(x[:k], cs[:j])
				}
			})
		}
	}
}

func BenchmarkIndexPeriodic(b *testing.B) {
	key := "aa"
	for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
		b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
			s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
			for i := 0; i < b.N; i++ {
				Index(s, key)
			}
		})
	}
}
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+								// Copyright 2009 The Go Authors. All rights reserved.
 								// Use of this source code is governed by a BSD-style
 								// license that can be found in the LICENSE file.
-												rearrange some constants.  unicode package now defines MaxRune and ReplacementChar.
utf8 package imports unicode to get those definitions.
regenerate dependencies.

R=rsc
DELTA=41  (19 added, 3 deleted, 19 changed)
OCL=34123
CL=34129

											
										
										
											2009-08-31 13:01:25 -07:00
+								package strings_test
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
 								import (
-												strings: implement UnreadByte, UnreadRune

Added corresponding tests.

R=rsc
CC=golang-dev
https://golang.org/cl/4560045

											
										
										
											2011-05-26 11:02:07 -07:00
+									"bytes"
-												strings, bytes: panic if Repeat overflows or if given a negative count

Panic if Repeat is given a negative count or
if the value of (len(*) * count) is detected
to overflow.
We panic because we cannot change the
signature of Repeat to return an error.

Fixes #16237

Change-Id: I9f5ba031a5b8533db0582d7a672ffb715143f3fb
Reviewed-on: https://go-review.googlesource.com/29954
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2016-09-28 01:54:38 -07:00
+									"fmt"
-												src/pkg/[n-z]*: gofix -r error -force=error

R=golang-dev, bsiegert, iant
CC=golang-dev
https://golang.org/cl/5294074

											
										
										
											2011-11-01 22:05:34 -04:00
+									"io"
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
+									"math/rand"
-												strings: Map: avoid allocation when string is unchanged

This speeds up strings.ToLower, etc.

before/after:
strings_test.BenchmarkMapNoChanges 1000000 1013 ns/op
strings_test.BenchmarkMapNoChanges 5000000  442 ns/op

R=r, rog, eh, rsc
CC=golang-dev
https://golang.org/cl/4306056

											
										
										
											2011-03-28 09:41:57 -07:00
+									"reflect"
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									. "strings"
 									"testing"
 									"unicode"
-												renaming_3: gofix -r go1pkgrename src/pkg/[m-z]*

R=rsc
CC=golang-dev
https://golang.org/cl/5345045

											
										
										
											2011-11-08 15:41:54 -08:00
+									"unicode/utf8"
-												strings: Map: avoid allocation when string is unchanged

This speeds up strings.ToLower, etc.

before/after:
strings_test.BenchmarkMapNoChanges 1000000 1013 ns/op
strings_test.BenchmarkMapNoChanges 5000000  442 ns/op

R=r, rog, eh, rsc
CC=golang-dev
https://golang.org/cl/4306056

											
										
										
											2011-03-28 09:41:57 -07:00
+									"unsafe"
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+								)
-												convert *[] to [].

R=r
OCL=21563
CL=21571

											
										
										
											2008-12-18 22:37:22 -08:00
+								func eq(a, b []string) bool {
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+									if len(a) != len(b) {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										return false
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+									}
 									for i := 0; i < len(a); i++ {
 										if a[i] != b[i] {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											return false
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+										}
 									}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									return true
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
+								var abcd = "abcd"
 								var faces = "☺☻☹"
 								var commas = "1,2,3,4"
 								var dots = "1....2....3....4"
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
+								type IndexTest struct {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									s   string
 									sep string
 									out int
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
+								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
+								var indexTests = []IndexTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", "", 0},
 									{"", "a", -1},
 									{"", "foo", -1},
 									{"fo", "foo", -1},
 									{"foo", "foo", 0},
 									{"oofofoofooo", "f", 2},
 									{"oofofoofooo", "foo", 4},
 									{"barfoobarfoo", "foo", 3},
 									{"foo", "", 0},
 									{"foo", "o", 1},
 									{"abcABCabc", "A", 3},
-												add bytes.IndexByte; common case we can make fast later.
also pick off the special case in strings.Index.   don't want strings.IndexByte
because the call site will very rarely need to allocate and we can handle the
test in the code itself.   bytes.IndexByte can avoid a common allocation.

R=rsc
CC=golang-dev
https://golang.org/cl/156091

											
										
										
											2009-11-18 19:23:08 -08:00
+									// cases with one byte strings - test special case in Index()
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", "a", -1},
 									{"x", "a", -1},
 									{"x", "x", 0},
 									{"abc", "a", 0},
 									{"abc", "b", 1},
 									{"abc", "c", 2},
 									{"abc", "x", -1},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									// test special cases in Index() for short strings
 									{"", "ab", -1},
 									{"bc", "ab", -1},
 									{"ab", "ab", 0},
 									{"xab", "ab", 1},
 									{"xab"[:2], "ab", -1},
 									{"", "abc", -1},
 									{"xbc", "abc", -1},
 									{"abc", "abc", 0},
 									{"xabc", "abc", 1},
 									{"xabc"[:3], "abc", -1},
 									{"xabxc", "abc", -1},
 									{"", "abcd", -1},
 									{"xbcd", "abcd", -1},
 									{"abcd", "abcd", 0},
 									{"xabcd", "abcd", 1},
 									{"xyabcd"[:5], "abcd", -1},
 									{"xbcqq", "abcqq", -1},
 									{"abcqq", "abcqq", 0},
 									{"xabcqq", "abcqq", 1},
 									{"xyabcqq"[:6], "abcqq", -1},
 									{"xabxcqq", "abcqq", -1},
 									{"xabcqxq", "abcqq", -1},
 									{"", "01234567", -1},
 									{"32145678", "01234567", -1},
 									{"01234567", "01234567", 0},
 									{"x01234567", "01234567", 1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"x0123456x01234567", "01234567", 9},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									{"xx01234567"[:9], "01234567", -1},
 									{"", "0123456789", -1},
 									{"3214567844", "0123456789", -1},
 									{"0123456789", "0123456789", 0},
 									{"x0123456789", "0123456789", 1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"x012345678x0123456789", "0123456789", 11},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									{"xyz0123456789"[:12], "0123456789", -1},
 									{"x01234567x89", "0123456789", -1},
 									{"", "0123456789012345", -1},
 									{"3214567889012345", "0123456789012345", -1},
 									{"0123456789012345", "0123456789012345", 0},
 									{"x0123456789012345", "0123456789012345", 1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"x012345678901234x0123456789012345", "0123456789012345", 17},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									{"", "01234567890123456789", -1},
 									{"32145678890123456789", "01234567890123456789", -1},
 									{"01234567890123456789", "01234567890123456789", 0},
 									{"x01234567890123456789", "01234567890123456789", 1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
 									{"", "0123456789012345678901234567890", -1},
 									{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
 									{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
 									{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
 									{"", "01234567890123456789012345678901", -1},
 									{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
 									{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
 									{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
-												strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>

											
										
										
											2015-10-28 18:05:05 +03:00
+									{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+									{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
 									{"", "0123456789012345678901234567890123456789", -1},
 									{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
 									{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
 									{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
 									{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
-												bytes,strings: in generic Index, use mix of IndexByte and Rabin-Karp

Use IndexByte first, as it allows us to skip lots of bytes quickly.
If IndexByte is generating a lot of false positives, switch over to Rabin-Karp.

Experiments for ppc64le
bytes:
name                             old time/op  new time/op  delta
IndexPeriodic/IndexPeriodic2-2   1.12ms ± 0%  0.18ms ± 0%  -83.54%  (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic4-2    635µs ± 0%   184µs ± 0%  -71.06%  (p=0.000 n=9+9)
IndexPeriodic/IndexPeriodic8-2    289µs ± 0%   184µs ± 0%  -36.51%  (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic16-2   133µs ± 0%   183µs ± 0%  +37.68%  (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic32-2  68.3µs ± 0%  70.2µs ± 0%   +2.76%  (p=0.000 n=10+10)
IndexPeriodic/IndexPeriodic64-2  35.8µs ± 0%  36.6µs ± 0%   +2.17%  (p=0.000 n=8+10)

strings:
name                             old time/op  new time/op  delta
IndexPeriodic/IndexPeriodic2-2    184µs ± 0%   184µs ± 0%   +0.11%  (p=0.029 n=4+4)
IndexPeriodic/IndexPeriodic4-2    184µs ± 0%   184µs ± 0%     ~     (p=0.886 n=4+4)
IndexPeriodic/IndexPeriodic8-2    184µs ± 0%   184µs ± 0%     ~     (p=0.486 n=4+4)
IndexPeriodic/IndexPeriodic16-2   185µs ± 1%   184µs ± 0%     ~     (p=0.343 n=4+4)
IndexPeriodic/IndexPeriodic32-2   184µs ± 0%    69µs ± 0%  -62.37%  (p=0.029 n=4+4)
IndexPeriodic/IndexPeriodic64-2   184µs ± 0%    37µs ± 0%  -80.17%  (p=0.029 n=4+4)

Fixes #22578

Change-Id: If2a4d8554cb96bfd699b58149d13ac294615f8b8
Reviewed-on: https://go-review.googlesource.com/76070
Reviewed-by: Alberto Donizetti <alb.donizetti@gmail.com>

											
										
										
											2017-11-04 10:19:53 -07:00
+									// test fallback to Rabin-Karp.
 									{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
 									{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
+								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
+								var lastIndexTests = []IndexTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", "", 0},
 									{"", "a", -1},
 									{"", "foo", -1},
 									{"fo", "foo", -1},
 									{"foo", "foo", 0},
 									{"foo", "f", 0},
 									{"oofofoofooo", "f", 7},
 									{"oofofoofooo", "foo", 7},
 									{"barfoobarfoo", "foo", 9},
 									{"foo", "", 3},
 									{"foo", "o", 2},
 									{"abcABCabc", "A", 3},
 									{"abcABCabc", "a", 6},
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
+								}
-												bytes, strings: IndexOfAny
+ first use in go/doc

R=r
CC=golang-dev
https://golang.org/cl/781041

											
										
										
											2010-03-26 13:05:04 -07:00
+								var indexAnyTests = []IndexTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", "", -1},
 									{"", "a", -1},
 									{"", "abc", -1},
 									{"a", "", -1},
 									{"a", "a", 0},
 									{"aaa", "a", 0},
 									{"abc", "xyz", -1},
 									{"abc", "xcz", 2},
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
+									{"ab☺c", "x☺yz", 2},
 									{"a☺b☻c☹d", "cx", len("a☺b☻")},
 									{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"aRegExp*", ".(|)*+?^$[]", 7},
 									{dots + dots + dots, " ", -1},
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
+									{"012abcba210", "\xffb", 4},
 									{"012\x80bcb\x80210", "\xffb", 3},
-												bytes, strings: IndexOfAny
+ first use in go/doc

R=r
CC=golang-dev
https://golang.org/cl/781041

											
										
										
											2010-03-26 13:05:04 -07:00
+								}
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
-												strings: add LastIndexAny

The need for a LastIndexAny function has come up in the discussion
for https://golang.org/cl/3008041/. This function is
implemented analogously to lastIndexFunc, using functions from
the utf8 package.

R=r, rsc, PeterGo
CC=golang-dev
https://golang.org/cl/3057041

											
										
										
											2010-11-12 12:47:50 -08:00
+								var lastIndexAnyTests = []IndexTest{
 									{"", "", -1},
 									{"", "a", -1},
 									{"", "abc", -1},
 									{"a", "", -1},
 									{"a", "a", 0},
 									{"aaa", "a", 2},
 									{"abc", "xyz", -1},
 									{"abc", "ab", 1},
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
+									{"ab☺c", "x☺yz", 2},
 									{"a☺b☻c☹d", "cx", len("a☺b☻")},
 									{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
-												strings: add LastIndexAny

The need for a LastIndexAny function has come up in the discussion
for https://golang.org/cl/3008041/. This function is
implemented analogously to lastIndexFunc, using functions from
the utf8 package.

R=r, rsc, PeterGo
CC=golang-dev
https://golang.org/cl/3057041

											
										
										
											2010-11-12 12:47:50 -08:00
+									{"a.RegExp*", ".(|)*+?^$[]", 8},
 									{dots + dots + dots, " ", -1},
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
+									{"012abcba210", "\xffb", 6},
 									{"012\x80bcb\x80210", "\xffb", 7},
-												strings: add LastIndexAny

The need for a LastIndexAny function has come up in the discussion
for https://golang.org/cl/3008041/. This function is
implemented analogously to lastIndexFunc, using functions from
the utf8 package.

R=r, rsc, PeterGo
CC=golang-dev
https://golang.org/cl/3057041

											
										
										
											2010-11-12 12:47:50 -08:00
+								}
-												bytes, strings: IndexOfAny
+ first use in go/doc

R=r
CC=golang-dev
https://golang.org/cl/781041

											
										
										
											2010-03-26 13:05:04 -07:00
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
+								// Execute f on each test case.  funcName should be the name of f; it's used
 								// in failure reports.
 								func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
-												more "declared and not used".

the last round omitted := range and only
checked 1 out of N vars in a multi-var :=

R=r
OCL=34624
CL=34638

											
										
										
											2009-09-15 09:41:59 -07:00
+									for _, test := range testCases {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										actual := f(test.s, test.sep)
-												style police: parens in if, for, switch, range

R=r
DELTA=32  (0 added, 3 deleted, 29 changed)
OCL=30718
CL=30725

											
										
										
											2009-06-24 20:12:50 -07:00
+										if actual != test.out {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
+										}
 									}
 								}
-												strings: add LastIndexAny

The need for a LastIndexAny function has come up in the discussion
for https://golang.org/cl/3008041/. This function is
implemented analogously to lastIndexFunc, using functions from
the utf8 package.

R=r, rsc, PeterGo
CC=golang-dev
https://golang.org/cl/3057041

											
										
										
											2010-11-12 12:47:50 -08:00
+								func TestIndex(t *testing.T)        { runIndexTests(t, Index, "Index", indexTests) }
 								func TestLastIndex(t *testing.T)    { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
 								func TestIndexAny(t *testing.T)     { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
 								func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) }
-												Basic HTTP client.

R=rsc
APPROVED=rsc
DELTA=392  (386 added, 2 deleted, 4 changed)
OCL=29963
CL=30107

											
										
										
											2009-06-09 10:58:58 -07:00
-												bytes, strings: add LastIndexByte

Currently the packages have the following index functions:

func Index(s, sep []byte) int
func IndexAny(s []byte, chars string) int
func IndexByte(s []byte, c byte) int
func IndexFunc(s []byte, f func(r rune) bool) int
func IndexRune(s []byte, r rune) int

func LastIndex(s, sep []byte) int
func LastIndexAny(s []byte, chars string) int
func LastIndexFunc(s []byte, f func(r rune) bool) int

Searching for the last occurrence of a byte is quite common
for string parsing algorithms (e.g. find the last paren on a line).
Also addition of LastIndexByte makes the set more orthogonal.

Change-Id: Ida168849acacf8e78dd70c1354bef9eac5effafe
Reviewed-on: https://go-review.googlesource.com/9500
Reviewed-by: Rob Pike <r@golang.org>

											
										
										
											2015-04-29 20:45:55 +03:00
+								func TestLastIndexByte(t *testing.T) {
 									testCases := []IndexTest{
 										{"", "q", -1},
 										{"abcdef", "q", -1},
 										{"abcdefabcdef", "a", len("abcdef")},      // something in the middle
 										{"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
 										{"zabcdefabcdef", "z", 0},                 // first byte
 										{"a☺b☻c☹d", "b", len("a☺")},               // non-ascii
 									}
 									for _, test := range testCases {
 										actual := LastIndexByte(test.s, test.sep[0])
 										if actual != test.out {
 											t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out)
 										}
 									}
 								}
-												strings: fix and reenable amd64 Index for 17-31 byte strings

Fixes #15689

Change-Id: I56d0103738cc35cd5bc5e77a0e0341c0dd55530e
Reviewed-on: https://go-review.googlesource.com/23440
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>

											
										
										
											2016-05-25 16:33:19 +03:00
+								func simpleIndex(s, sep string) int {
 									n := len(sep)
 									for i := n; i <= len(s); i++ {
 										if s[i-n:i] == sep {
 											return i - n
 										}
 									}
 									return -1
 								}
 								func TestIndexRandom(t *testing.T) {
 									const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
 									for times := 0; times < 10; times++ {
 										for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary
 											s1 := make([]byte, strLen)
 											for i := range s1 {
 												s1[i] = chars[rand.Intn(len(chars))]
 											}
 											s := string(s1)
 											for i := 0; i < 50; i++ {
 												begin := rand.Intn(len(s) + 1)
 												end := begin + rand.Intn(len(s)+1-begin)
 												sep := s[begin:end]
 												if i%4 == 0 {
 													pos := rand.Intn(len(sep) + 1)
 													sep = sep[:pos] + "A" + sep[pos:]
 												}
 												want := simpleIndex(s, sep)
 												res := Index(s, sep)
 												if res != want {
 													t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want)
 												}
 											}
 										}
 									}
 								}
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+								func TestIndexRune(t *testing.T) {
-												bytes, strings: fix regression in IndexRune

In all previous versions of Go, the behavior of IndexRune(s, r)
where r was utf.RuneError was that it would effectively return the
index of any invalid UTF-8 byte sequence (include RuneError).
Optimizations made in http://golang.org/cl/28537 and
http://golang.org/cl/28546 altered this undocumented behavior such
that RuneError would only match on the RuneError rune itself.

Although, the new behavior is arguably reasonable, it did break code
that depended on the previous behavior. Thus, we add special checks
to ensure that we preserve the old behavior.

There is a slight performance hit for correctness:
	benchmark                   old ns/op     new ns/op     delta
	BenchmarkIndexRune/10-4     19.3          21.6          +11.92%
	BenchmarkIndexRune/32-4     33.6          35.2          +4.76%
This only occurs on small strings. The performance hit for larger strings
is neglible and not shown.

Fixes #17611

Change-Id: I1d863a741213d46c40b2e1724c41245df52502a5
Reviewed-on: https://go-review.googlesource.com/32123
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2016-10-26 14:18:37 -07:00
+									tests := []struct {
 										in   string
 										rune rune
 										want int
 									}{
 										{"", 'a', -1},
 										{"", '☺', -1},
 										{"foo", '☹', -1},
 										{"foo", 'o', 1},
 										{"foo☺bar", '☺', 3},
 										{"foo☺☻☹bar", '☹', 9},
 										{"a A x", 'A', 2},
 										{"some_text=some_value", '=', 9},
 										{"☺a", 'a', 3},
 										{"a☻☺b", '☺', 4},
 										// RuneError should match any invalid UTF-8 byte sequence.
 										{"<22>", '<27>', 0},
 										{"\xff", '<27>', 0},
 										{"☻x<E298BB>", '<27>', len("☻x")},
 										{"☻x\xe2\x98", '<27>', len("☻x")},
 										{"☻x\xe2\x98<39>", '<27>', len("☻x")},
 										{"☻x\xe2\x98x", '<27>', len("☻x")},
 										// Invalid rune values should never match.
 										{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", -1, -1},
 										{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
 										{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", utf8.MaxRune + 1, -1},
 									}
 									for _, tt := range tests {
 										if got := IndexRune(tt.in, tt.rune); got != tt.want {
 											t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+										}
 									}
-												strings: make IndexRune faster

re-implement IndexRune by Index which is well optimized to get
performance gain.

name                   old time/op  new time/op  delta
IndexRune-4            30.2ns ± 1%  28.3ns ± 1%   -6.22%  (p=0.000 n=20+19)
IndexRuneLongString-4   156ns ± 1%    49ns ± 1%  -68.72%  (p=0.000 n=19+19)
IndexRuneFastPath-4    10.6ns ± 2%  10.0ns ± 1%   -6.30%  (p=0.000 n=18+18)

Change-Id: Ie663b8f7860ca51892dd4be182fca3caa5f8ae61
Reviewed-on: https://go-review.googlesource.com/28546
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2016-09-06 20:23:40 +09:00
 									haystack := "test世界"
 									allocs := testing.AllocsPerRun(1000, func() {
 										if i := IndexRune(haystack, 's'); i != 2 {
 											t.Fatalf("'s' at %d; want 2", i)
 										}
 										if i := IndexRune(haystack, '世'); i != 4 {
 											t.Fatalf("'世' at %d; want 4", i)
 										}
 									})
-												strings: ignore allocation test in cover mode

Fixes #17699

Change-Id: I7ea29a3fc2ca13d9d7e3044cbb8ea22e3435d423
Reviewed-on: https://go-review.googlesource.com/32484
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Rob Pike <r@golang.org>

											
										
										
											2016-11-01 18:13:54 +00:00
+									if allocs != 0 && testing.CoverMode() == "" {
-												bytes, strings: fix regression in IndexRune

In all previous versions of Go, the behavior of IndexRune(s, r)
where r was utf.RuneError was that it would effectively return the
index of any invalid UTF-8 byte sequence (include RuneError).
Optimizations made in http://golang.org/cl/28537 and
http://golang.org/cl/28546 altered this undocumented behavior such
that RuneError would only match on the RuneError rune itself.

Although, the new behavior is arguably reasonable, it did break code
that depended on the previous behavior. Thus, we add special checks
to ensure that we preserve the old behavior.

There is a slight performance hit for correctness:
	benchmark                   old ns/op     new ns/op     delta
	BenchmarkIndexRune/10-4     19.3          21.6          +11.92%
	BenchmarkIndexRune/32-4     33.6          35.2          +4.76%
This only occurs on small strings. The performance hit for larger strings
is neglible and not shown.

Fixes #17611

Change-Id: I1d863a741213d46c40b2e1724c41245df52502a5
Reviewed-on: https://go-review.googlesource.com/32123
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2016-10-26 14:18:37 -07:00
+										t.Errorf("expected no allocations, got %f", allocs)
-												strings: make IndexRune faster

re-implement IndexRune by Index which is well optimized to get
performance gain.

name                   old time/op  new time/op  delta
IndexRune-4            30.2ns ± 1%  28.3ns ± 1%   -6.22%  (p=0.000 n=20+19)
IndexRuneLongString-4   156ns ± 1%    49ns ± 1%  -68.72%  (p=0.000 n=19+19)
IndexRuneFastPath-4    10.6ns ± 2%  10.0ns ± 1%   -6.30%  (p=0.000 n=18+18)

Change-Id: Ie663b8f7860ca51892dd4be182fca3caa5f8ae61
Reviewed-on: https://go-review.googlesource.com/28546
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2016-09-06 20:23:40 +09:00
+									}
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+								}
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+								const benchmarkString = "some_text=some☺value"
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+								func BenchmarkIndexRune(b *testing.B) {
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+									if got := IndexRune(benchmarkString, '☺'); got != 14 {
-												panics: use the new facilities of testing.B instead

Lots of panics go away.
Also fix a name error in html/template.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5498045

											
										
										
											2011-12-20 10:36:25 -08:00
+										b.Fatalf("wrong index: expected 14, got=%d", got)
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+									}
 									for i := 0; i < b.N; i++ {
 										IndexRune(benchmarkString, '☺')
 									}
 								}
-												strings: make IndexRune faster

re-implement IndexRune by Index which is well optimized to get
performance gain.

name                   old time/op  new time/op  delta
IndexRune-4            30.2ns ± 1%  28.3ns ± 1%   -6.22%  (p=0.000 n=20+19)
IndexRuneLongString-4   156ns ± 1%    49ns ± 1%  -68.72%  (p=0.000 n=19+19)
IndexRuneFastPath-4    10.6ns ± 2%  10.0ns ± 1%   -6.30%  (p=0.000 n=18+18)

Change-Id: Ie663b8f7860ca51892dd4be182fca3caa5f8ae61
Reviewed-on: https://go-review.googlesource.com/28546
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2016-09-06 20:23:40 +09:00
+								var benchmarkLongString = Repeat(" ", 100) + benchmarkString
 								func BenchmarkIndexRuneLongString(b *testing.B) {
 									if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
 										b.Fatalf("wrong index: expected 114, got=%d", got)
 									}
 									for i := 0; i < b.N; i++ {
 										IndexRune(benchmarkLongString, '☺')
 									}
 								}
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+								func BenchmarkIndexRuneFastPath(b *testing.B) {
 									if got := IndexRune(benchmarkString, 'v'); got != 17 {
-												panics: use the new facilities of testing.B instead

Lots of panics go away.
Also fix a name error in html/template.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5498045

											
										
										
											2011-12-20 10:36:25 -08:00
+										b.Fatalf("wrong index: expected 17, got=%d", got)
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+									}
 									for i := 0; i < b.N; i++ {
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+										IndexRune(benchmarkString, 'v')
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+									}
 								}
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+								func BenchmarkIndex(b *testing.B) {
 									if got := Index(benchmarkString, "v"); got != 17 {
-												panics: use the new facilities of testing.B instead

Lots of panics go away.
Also fix a name error in html/template.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5498045

											
										
										
											2011-12-20 10:36:25 -08:00
+										b.Fatalf("wrong index: expected 17, got=%d", got)
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+									}
 									for i := 0; i < b.N; i++ {
-												strings: better benchmark names; add BenchmarkIndex

R=dsymonds
CC=golang-dev
https://golang.org/cl/4264052

											
										
										
											2011-03-08 09:58:18 -08:00
+										Index(benchmarkString, "v")
-												strings: add IndexRune tests, ASCII fast path

$ gotest -test.v -test.run=IndexRune -test.bench=.*
=== RUN  strings_test.TestIndexRune
--- PASS: strings_test.TestIndexRune (0.0 seconds)
PASS
strings_test.BenchmarkIndexRune	20000000   105 ns/op
strings_test.BenchmarkIndexByte	50000000    48 ns/op

R=rsc, dsymonds
CC=golang-dev
https://golang.org/cl/4267050

											
										
										
											2011-03-08 09:41:12 -08:00
+									}
 								}
-												strings: use Rabin-Karp algorithm for LastIndex.

benchmark                  old ns/op     new ns/op     delta
BenchmarkSingleMatch       49443         52275         +5.73%
BenchmarkIndex             28.8          27.4          -4.86%
BenchmarkLastIndex         14.5          14.0          -3.45%
BenchmarkLastIndexHard1    3982782       2309200       -42.02%
BenchmarkLastIndexHard2    3985562       2287715       -42.60%
BenchmarkLastIndexHard3    3555259       2282866       -35.79%

LGTM=josharian, nigeltao
R=golang-codereviews, ality, josharian, bradfitz, dave, nigeltao, gobot, nightlyone
CC=golang-codereviews
https://golang.org/cl/102560043

											
										
										
											2014-09-01 17:47:57 +10:00
+								func BenchmarkLastIndex(b *testing.B) {
 									if got := Index(benchmarkString, "v"); got != 17 {
 										b.Fatalf("wrong index: expected 17, got=%d", got)
 									}
 									for i := 0; i < b.N; i++ {
 										LastIndex(benchmarkString, "v")
 									}
 								}
-												strings: add IndexByte benchmark

Like existing Index, IndexRune, IndexHardN, etc.

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12486044

											
										
										
											2013-08-06 14:41:07 -07:00
+								func BenchmarkIndexByte(b *testing.B) {
 									if got := IndexByte(benchmarkString, 'v'); got != 17 {
 										b.Fatalf("wrong index: expected 17, got=%d", got)
 									}
 									for i := 0; i < b.N; i++ {
 										IndexByte(benchmarkString, 'v')
 									}
 								}
-												delete export

TBR=r
OCL=23121
CL=23127

											
										
										
											2009-01-20 14:40:40 -08:00
+								type SplitTest struct {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									s   string
 									sep string
 									n   int
 									a   []string
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
 								var splittests = []SplitTest{
-												strings: improve explode and correct comment

Merges explodetests into splittests which already contain
some of the tests that cover explode.

Adds a test to cover the utf8.RuneError branch in explode.

name      old time/op  new time/op  delta
Split1-2  14.9ms ± 0%  14.2ms ± 0%  -4.06%  (p=0.000 n=47+49)

Change-Id: I00f796bd2edab70e926ea9e65439d820c6a28254
Reviewed-on: https://go-review.googlesource.com/21609
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>

											
										
										
											2016-03-26 00:04:48 +01:00
+									{"", "", -1, []string{}},
 									{abcd, "", 2, []string{"a", "bcd"}},
 									{abcd, "", 4, []string{"a", "b", "c", "d"}},
 									{abcd, "", -1, []string{"a", "b", "c", "d"}},
 									{faces, "", -1, []string{"☺", "☻", "☹"}},
 									{faces, "", 3, []string{"☺", "☻", "☹"}},
 									{faces, "", 17, []string{"☺", "☻", "☹"}},
 									{"☺<>☹", "", -1, []string{"☺", "<22>", "☹"}},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{abcd, "a", 0, nil},
 									{abcd, "a", -1, []string{"", "bcd"}},
 									{abcd, "z", -1, []string{"abcd"}},
 									{commas, ",", -1, []string{"1", "2", "3", "4"}},
 									{dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
 									{faces, "☹", -1, []string{"☺☻", ""}},
 									{faces, "~", -1, []string{faces}},
 									{"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
 									{"1 2", " ", 3, []string{"1", "2"}},
-												Automated g4 rollback of changelist 25024,
plus significant hand editing.

Back to T{x} for composite literals.

R=r
OCL=25612
CL=25632

											
										
										
											2009-03-03 08:39:12 -08:00
+								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
-												delete export

TBR=r
OCL=23121
CL=23127

											
										
										
											2009-01-20 14:40:40 -08:00
+								func TestSplit(t *testing.T) {
-												Change strings.Split, bytes.Split to take a maximum substring count argument.

R=rsc
APPROVED=r
DELTA=131  (39 added, 10 deleted, 82 changed)
OCL=30669
CL=30723

											
										
										
											2009-06-24 19:02:29 -07:00
+									for _, tt := range splittests {
-												strings.Split: make the default to split all.
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.

R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051

											
										
										
											2011-06-28 09:43:14 +10:00
+										a := SplitN(tt.s, tt.sep, tt.n)
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+										if !eq(a, tt.a) {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+											t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
 											continue
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+										}
-												strings and bytes.Split: make count of 0 mean 0, not infinite.
Use a count of -1 for infinity.  Ditto for Replace.

R=rsc
CC=golang-dev
https://golang.org/cl/1704044

											
										
										
											2010-07-01 14:08:14 -07:00
+										if tt.n == 0 {
 											continue
 										}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										s := Join(a, tt.sep)
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+										if s != tt.s {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+										}
-												strings.Split: make the default to split all.
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.

R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051

											
										
										
											2011-06-28 09:43:14 +10:00
+										if tt.n < 0 {
 											b := Split(tt.s, tt.sep)
 											if !reflect.DeepEqual(a, b) {
 												t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
 											}
 										}
-												convert tests.
refine gotest's test selection criteria.

R=r
DELTA=1590  (745 added, 844 deleted, 1 changed)
OCL=19903
CL=19936

											
										
										
											2008-11-24 15:17:47 -08:00
+									}
 								}
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+								var splitaftertests = []SplitTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{abcd, "a", -1, []string{"a", "bcd"}},
 									{abcd, "z", -1, []string{"abcd"}},
 									{abcd, "", -1, []string{"a", "b", "c", "d"}},
 									{commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
 									{dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
 									{faces, "☹", -1, []string{"☺☻☹", ""}},
 									{faces, "~", -1, []string{faces}},
 									{faces, "", -1, []string{"☺", "☻", "☹"}},
 									{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
 									{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
 									{"1 2", " ", 3, []string{"1 ", "2"}},
 									{"123", "", 2, []string{"1", "23"}},
 									{"123", "", 17, []string{"1", "2", "3"}},
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+								}
 								func TestSplitAfter(t *testing.T) {
 									for _, tt := range splitaftertests {
-												strings.Split: make the default to split all.
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.

R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051

											
										
										
											2011-06-28 09:43:14 +10:00
+										a := SplitAfterN(tt.s, tt.sep, tt.n)
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+										if !eq(a, tt.a) {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+											t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
 											continue
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+										}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										s := Join(a, "")
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+										if s != tt.s {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+										}
-												strings.Split: make the default to split all.
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.

R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051

											
										
										
											2011-06-28 09:43:14 +10:00
+										if tt.n < 0 {
 											b := SplitAfter(tt.s, tt.sep)
 											if !reflect.DeepEqual(a, b) {
 												t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
 											}
 										}
-												bytes.SplitAfter and strings.SplitAfter
most common usage is:

	lines := strings.SplitAfter(text, "\n", 0)

R=r
http://go/go-review/1018042

											
										
										
											2009-11-04 15:19:30 -08:00
+									}
 								}
-												bytes, strings: add new function Fields

R=rsc, r, phf
CC=golang-dev
https://golang.org/cl/170046

											
										
										
											2009-12-15 21:09:55 -08:00
+								type FieldsTest struct {
 									s string
 									a []string
 								}
 								var fieldstests = []FieldsTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", []string{}},
 									{" ", []string{}},
 									{" \t ", []string{}},
-												strings: speed up Fields

- use a string lookup to detect if a single byte is a space character
- determine the exact number of fields for ASCII and
  a possibly underestimated number of fields for non ASCII strings
  by doing a separate byte for byte scan of the input string
  before collecting the fields in an extra pass
- provide a fast path for ASCII only strings when collecting the fields
- avoid utf8.DecodeRuneInString and unicode.IsSpace for ASCII characters

Used golang.org/cl/33108 from Joe Tsai as starting point.

name                      old time/op    new time/op     delta
Fields/ASCII/16              284ns ± 1%      116ns ± 2%   -59.30%  (p=0.000 n=9+10)
Fields/ASCII/256            3.81µs ± 1%     0.80µs ± 1%   -79.10%  (p=0.000 n=10+10)
Fields/ASCII/4096           61.4µs ± 1%     12.3µs ± 1%   -79.96%  (p=0.000 n=10+9)
Fields/ASCII/65536           982µs ± 1%      235µs ± 0%   -76.04%  (p=0.000 n=10+9)
Fields/ASCII/1048576        16.7ms ± 2%      5.4ms ± 1%   -67.52%  (p=0.000 n=10+10)
Fields/Mixed/16              314ns ± 1%      168ns ± 1%   -46.33%  (p=0.000 n=9+10)
Fields/Mixed/256            3.92µs ± 1%     1.17µs ± 1%   -70.19%  (p=0.000 n=10+10)
Fields/Mixed/4096           69.1µs ± 1%     19.0µs ± 1%   -72.53%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.12ms ± 1%     0.39ms ± 0%   -65.37%  (p=0.000 n=10+9)
Fields/Mixed/1048576        19.0ms ± 2%      7.3ms ± 4%   -61.75%  (p=0.000 n=10+9)

name                      old speed      new speed       delta
Fields/ASCII/16           56.3MB/s ± 1%  138.1MB/s ± 2%  +145.31%  (p=0.000 n=9+10)
Fields/ASCII/256          67.1MB/s ± 1%  321.0MB/s ± 1%  +378.26%  (p=0.000 n=10+10)
Fields/ASCII/4096         66.7MB/s ± 1%  333.0MB/s ± 1%  +398.97%  (p=0.000 n=10+9)
Fields/ASCII/65536        66.7MB/s ± 1%  278.4MB/s ± 0%  +317.39%  (p=0.000 n=10+9)
Fields/ASCII/1048576      62.7MB/s ± 2%  192.9MB/s ± 1%  +207.82%  (p=0.000 n=10+10)
Fields/Mixed/16           51.0MB/s ± 2%   94.9MB/s ± 1%   +85.87%  (p=0.000 n=10+10)
Fields/Mixed/256          65.4MB/s ± 1%  219.2MB/s ± 1%  +235.33%  (p=0.000 n=10+10)
Fields/Mixed/4096         59.3MB/s ± 1%  215.7MB/s ± 1%  +263.98%  (p=0.000 n=10+10)
Fields/Mixed/65536        58.6MB/s ± 1%  169.1MB/s ± 0%  +188.73%  (p=0.000 n=10+9)
Fields/Mixed/1048576      55.1MB/s ± 2%  144.0MB/s ± 4%  +161.44%  (p=0.000 n=10+9)

Updates #19789
Updates #17856

Change-Id: If2ce1479542702e9cd65a82a462ba55ac8eb3876
Reviewed-on: https://go-review.googlesource.com/37959
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-03-06 09:34:39 +01:00
+									{"\u2000", []string{}},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"  abc  ", []string{"abc"}},
 									{"1 2 3 4", []string{"1", "2", "3", "4"}},
 									{"1  2  3  4", []string{"1", "2", "3", "4"}},
 									{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
 									{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
 									{"\u2000\u2001\u2002", []string{}},
 									{"\n™\t™\n", []string{"™", "™"}},
-												strings: speed up Fields

- use a string lookup to detect if a single byte is a space character
- determine the exact number of fields for ASCII and
  a possibly underestimated number of fields for non ASCII strings
  by doing a separate byte for byte scan of the input string
  before collecting the fields in an extra pass
- provide a fast path for ASCII only strings when collecting the fields
- avoid utf8.DecodeRuneInString and unicode.IsSpace for ASCII characters

Used golang.org/cl/33108 from Joe Tsai as starting point.

name                      old time/op    new time/op     delta
Fields/ASCII/16              284ns ± 1%      116ns ± 2%   -59.30%  (p=0.000 n=9+10)
Fields/ASCII/256            3.81µs ± 1%     0.80µs ± 1%   -79.10%  (p=0.000 n=10+10)
Fields/ASCII/4096           61.4µs ± 1%     12.3µs ± 1%   -79.96%  (p=0.000 n=10+9)
Fields/ASCII/65536           982µs ± 1%      235µs ± 0%   -76.04%  (p=0.000 n=10+9)
Fields/ASCII/1048576        16.7ms ± 2%      5.4ms ± 1%   -67.52%  (p=0.000 n=10+10)
Fields/Mixed/16              314ns ± 1%      168ns ± 1%   -46.33%  (p=0.000 n=9+10)
Fields/Mixed/256            3.92µs ± 1%     1.17µs ± 1%   -70.19%  (p=0.000 n=10+10)
Fields/Mixed/4096           69.1µs ± 1%     19.0µs ± 1%   -72.53%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.12ms ± 1%     0.39ms ± 0%   -65.37%  (p=0.000 n=10+9)
Fields/Mixed/1048576        19.0ms ± 2%      7.3ms ± 4%   -61.75%  (p=0.000 n=10+9)

name                      old speed      new speed       delta
Fields/ASCII/16           56.3MB/s ± 1%  138.1MB/s ± 2%  +145.31%  (p=0.000 n=9+10)
Fields/ASCII/256          67.1MB/s ± 1%  321.0MB/s ± 1%  +378.26%  (p=0.000 n=10+10)
Fields/ASCII/4096         66.7MB/s ± 1%  333.0MB/s ± 1%  +398.97%  (p=0.000 n=10+9)
Fields/ASCII/65536        66.7MB/s ± 1%  278.4MB/s ± 0%  +317.39%  (p=0.000 n=10+9)
Fields/ASCII/1048576      62.7MB/s ± 2%  192.9MB/s ± 1%  +207.82%  (p=0.000 n=10+10)
Fields/Mixed/16           51.0MB/s ± 2%   94.9MB/s ± 1%   +85.87%  (p=0.000 n=10+10)
Fields/Mixed/256          65.4MB/s ± 1%  219.2MB/s ± 1%  +235.33%  (p=0.000 n=10+10)
Fields/Mixed/4096         59.3MB/s ± 1%  215.7MB/s ± 1%  +263.98%  (p=0.000 n=10+10)
Fields/Mixed/65536        58.6MB/s ± 1%  169.1MB/s ± 0%  +188.73%  (p=0.000 n=10+9)
Fields/Mixed/1048576      55.1MB/s ± 2%  144.0MB/s ± 4%  +161.44%  (p=0.000 n=10+9)

Updates #19789
Updates #17856

Change-Id: If2ce1479542702e9cd65a82a462ba55ac8eb3876
Reviewed-on: https://go-review.googlesource.com/37959
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-03-06 09:34:39 +01:00
+									{"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
 									{"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
 									{"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{faces, []string{faces}},
-												bytes, strings: add new function Fields

R=rsc, r, phf
CC=golang-dev
https://golang.org/cl/170046

											
										
										
											2009-12-15 21:09:55 -08:00
+								}
 								func TestFields(t *testing.T) {
 									for _, tt := range fieldstests {
 										a := Fields(tt.s)
 										if !eq(a, tt.a) {
 											t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
 											continue
 										}
 									}
 								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var FieldsFuncTests = []FieldsTest{
 									{"", []string{}},
 									{"XX", []string{}},
 									{"XXhiXXX", []string{"hi"}},
 									{"aXXbXXXcX", []string{"a", "b", "c"}},
 								}
-												Added strings.FieldsFunc, a generalization of strings.Fields in style of the strings.Trim*Func functions.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/824051

											
										
										
											2010-04-19 16:36:50 -07:00
+								func TestFieldsFunc(t *testing.T) {
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
+									for _, tt := range fieldstests {
 										a := FieldsFunc(tt.s, unicode.IsSpace)
 										if !eq(a, tt.a) {
 											t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
 											continue
 										}
 									}
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									pred := func(c rune) bool { return c == 'X' }
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+									for _, tt := range FieldsFuncTests {
-												Added strings.FieldsFunc, a generalization of strings.Fields in style of the strings.Trim*Func functions.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/824051

											
										
										
											2010-04-19 16:36:50 -07:00
+										a := FieldsFunc(tt.s, pred)
 										if !eq(a, tt.a) {
 											t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
 										}
 									}
 								}
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
+								// Test case for any function which accepts and returns a single string.
 								type StringTest struct {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									in, out string
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
+								}
 								// Execute f on each test case.  funcName should be the name of f; it's used
 								// in failure reports.
 								func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
-												more "declared and not used".

the last round omitted := range and only
checked 1 out of N vars in a multi-var :=

R=r
OCL=34624
CL=34638

											
										
										
											2009-09-15 09:41:59 -07:00
+									for _, tc := range testCases {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										actual := f(tc.in)
-												style police: parens in if, for, switch, range

R=r
DELTA=32  (0 added, 3 deleted, 29 changed)
OCL=30718
CL=30725

											
										
										
											2009-06-24 20:12:50 -07:00
+										if actual != tc.out {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
+										}
 									}
 								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
+								var upperTests = []StringTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", ""},
-												strings: optimize ToUpper

Handling the ASCII case inline and call unicode.ToUpper only
for non-ascii cases.

Gives good improvements for the ascii case and minor perf
degrade for non-ascii case

name                                     old time/op    new time/op    delta
ToUpper/#00                                11.7ns ± 8%     8.0ns ± 1%  -31.95%  (p=0.008 n=5+5)
ToUpper/ONLYUPPER                          45.6ns ± 5%    19.9ns ± 1%  -56.40%  (p=0.008 n=5+5)
ToUpper/abc                                77.4ns ± 1%    57.0ns ± 1%  -26.32%  (p=0.008 n=5+5)
ToUpper/AbC123                             92.1ns ± 4%    67.7ns ± 2%  -26.57%  (p=0.008 n=5+5)
ToUpper/azAZ09_                             105ns ± 6%      67ns ± 2%  -36.26%  (p=0.000 n=5+4)
ToUpper/longStrinGwitHmixofsmaLLandcAps     255ns ± 1%     140ns ± 1%  -45.01%  (p=0.029 n=4+4)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     440ns ± 1%     447ns ± 0%   +1.49%  (p=0.016 n=5+4)
ToUpper/ɐɐɐɐɐ                               370ns ± 4%     366ns ± 1%     ~     (p=0.667 n=5+5)

name                                     old alloc/op   new alloc/op   delta
ToUpper/#00                                 0.00B          0.00B          ~     (all equal)
ToUpper/ONLYUPPER                           0.00B          0.00B          ~     (all equal)
ToUpper/abc                                 16.0B ± 0%      6.0B ± 0%  -62.50%  (p=0.008 n=5+5)
ToUpper/AbC123                              16.0B ± 0%     16.0B ± 0%     ~     (all equal)
ToUpper/azAZ09_                             24.0B ± 0%     16.0B ± 0%  -33.33%  (p=0.008 n=5+5)
ToUpper/longStrinGwitHmixofsmaLLandcAps     80.0B ± 0%     64.0B ± 0%  -20.00%  (p=0.008 n=5+5)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     96.0B ± 0%     96.0B ± 0%     ~     (all equal)
ToUpper/ɐɐɐɐɐ                               64.0B ± 0%     64.0B ± 0%     ~     (all equal)

Ran on a machine with Intel(R) Core(TM) i5-5200U CPU @ 2.20GHz

Updates #17859

Change-Id: I0735ac4a4a36e8a8f6cc06f2c16b871f12b4abf9
Reviewed-on: https://go-review.googlesource.com/68370
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-09-28 15:07:48 +05:30
+									{"ONLYUPPER", "ONLYUPPER"},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"abc", "ABC"},
 									{"AbC123", "ABC123"},
 									{"azAZ09_", "AZAZ09_"},
-												strings: optimize ToUpper

Handling the ASCII case inline and call unicode.ToUpper only
for non-ascii cases.

Gives good improvements for the ascii case and minor perf
degrade for non-ascii case

name                                     old time/op    new time/op    delta
ToUpper/#00                                11.7ns ± 8%     8.0ns ± 1%  -31.95%  (p=0.008 n=5+5)
ToUpper/ONLYUPPER                          45.6ns ± 5%    19.9ns ± 1%  -56.40%  (p=0.008 n=5+5)
ToUpper/abc                                77.4ns ± 1%    57.0ns ± 1%  -26.32%  (p=0.008 n=5+5)
ToUpper/AbC123                             92.1ns ± 4%    67.7ns ± 2%  -26.57%  (p=0.008 n=5+5)
ToUpper/azAZ09_                             105ns ± 6%      67ns ± 2%  -36.26%  (p=0.000 n=5+4)
ToUpper/longStrinGwitHmixofsmaLLandcAps     255ns ± 1%     140ns ± 1%  -45.01%  (p=0.029 n=4+4)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     440ns ± 1%     447ns ± 0%   +1.49%  (p=0.016 n=5+4)
ToUpper/ɐɐɐɐɐ                               370ns ± 4%     366ns ± 1%     ~     (p=0.667 n=5+5)

name                                     old alloc/op   new alloc/op   delta
ToUpper/#00                                 0.00B          0.00B          ~     (all equal)
ToUpper/ONLYUPPER                           0.00B          0.00B          ~     (all equal)
ToUpper/abc                                 16.0B ± 0%      6.0B ± 0%  -62.50%  (p=0.008 n=5+5)
ToUpper/AbC123                              16.0B ± 0%     16.0B ± 0%     ~     (all equal)
ToUpper/azAZ09_                             24.0B ± 0%     16.0B ± 0%  -33.33%  (p=0.008 n=5+5)
ToUpper/longStrinGwitHmixofsmaLLandcAps     80.0B ± 0%     64.0B ± 0%  -20.00%  (p=0.008 n=5+5)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     96.0B ± 0%     96.0B ± 0%     ~     (all equal)
ToUpper/ɐɐɐɐɐ                               64.0B ± 0%     64.0B ± 0%     ~     (all equal)

Ran on a machine with Intel(R) Core(TM) i5-5200U CPU @ 2.20GHz

Updates #17859

Change-Id: I0735ac4a4a36e8a8f6cc06f2c16b871f12b4abf9
Reviewed-on: https://go-review.googlesource.com/68370
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-09-28 15:07:48 +05:30
+									{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
 									{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
-												strings: fix encoding of \u0080 in map

Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf
being incorrectly encoded as \x80 in strings.Map due to using <= instead
of a < comparison operator to check one byte encodings for utf8.

Fixes #25242

Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc
Reviewed-on: https://go-review.googlesource.com/111286
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-05-04 06:54:18 +02:00
+									{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"},                           // test utf8.RuneSelf and utf8.MaxRune
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
+								}
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
+								var lowerTests = []StringTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", ""},
 									{"abc", "abc"},
 									{"AbC123", "abc123"},
 									{"azAZ09_", "azaz09_"},
-												strings: optimize ToLower

Handling the ASCII case inline and call unicode.ToLower only
for non-ASCII cases.

Gives good improvements for the ASCII case and minor perf
degrade for non-ASCII case

name                                     old time/op    new time/op    delta
ToLower/#00                                10.8ns ± 1%     9.0ns ± 1%  -16.83%  (p=0.008 n=5+5)
ToLower/abc                                23.3ns ± 4%    12.6ns ± 1%  -46.01%  (p=0.008 n=5+5)
ToLower/AbC123                             91.0ns ± 2%    70.4ns ± 0%  -22.59%  (p=0.008 n=5+5)
ToLower/azAZ09_                             104ns ± 3%      75ns ± 1%  -28.35%  (p=0.008 n=5+5)
ToLower/longStrinGwitHmixofsmaLLandcAps     254ns ± 4%     157ns ± 0%  -38.19%  (p=0.016 n=5+4)
ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS     446ns ± 1%     451ns ± 1%     ~     (p=0.056 n=5+5)
ToLower/ⱭⱭⱭⱭⱭ                               345ns ± 1%     348ns ± 0%   +0.93%  (p=0.016 n=5+5)

name                                     old alloc/op   new alloc/op   delta
ToLower/#00                                 0.00B          0.00B          ~     (all equal)
ToLower/abc                                 0.00B          0.00B          ~     (all equal)
ToLower/AbC123                              16.0B ± 0%     16.0B ± 0%     ~     (all equal)
ToLower/azAZ09_                             24.0B ± 0%     16.0B ± 0%  -33.33%  (p=0.008 n=5+5)
ToLower/longStrinGwitHmixofsmaLLandcAps     80.0B ± 0%     64.0B ± 0%  -20.00%  (p=0.008 n=5+5)
ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS     96.0B ± 0%     96.0B ± 0%     ~     (all equal)
ToLower/ⱭⱭⱭⱭⱭ                               48.0B ± 0%     48.0B ± 0%     ~     (all equal)

Ran on a machine with Intel(R) Core(TM) i5-5200U CPU @ 2.20GHz

Fixes #17859

Change-Id: Iacc1e6b77e1aedba9447a6e94352606f131ea597
Reviewed-on: https://go-review.googlesource.com/76470
Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-11-08 09:15:53 +05:30
+									{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
 									{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
-												strings: fix encoding of \u0080 in map

Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf
being incorrectly encoded as \x80 in strings.Map due to using <= instead
of a < comparison operator to check one byte encodings for utf8.

Fixes #25242

Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc
Reviewed-on: https://go-review.googlesource.com/111286
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-05-04 06:54:18 +02:00
+									{"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"},                           // test utf8.RuneSelf and utf8.MaxRune
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
+								}
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
-												apply gofmt to rand reflect regexp rpc runtime sort strconv strings sync syscall testing time unicode unsafe utf8

R=gri
DELTA=1409  (79 added, 24 deleted, 1306 changed)
OCL=35415
CL=35437

											
										
										
											2009-10-07 11:55:06 -07:00
+								var trimSpaceTests = []StringTest{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", ""},
 									{"abc", "abc"},
 									{space + "abc" + space, "abc"},
 									{" ", ""},
 									{" \t\r\n \t\t\r\r\n\n ", ""},
 									{" \t\r\n x\t\t\r\r\n\n ", "x"},
 									{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
 									{"1 \t\r\n2", "1 \t\r\n2"},
 									{" x\x80", "x\x80"},
 									{" x\xc0", "x\xc0"},
 									{"x \xc0\xc0 ", "x \xc0\xc0"},
 									{"x \xc0", "x \xc0"},
 									{"x \xc0 ", "x \xc0"},
 									{"x \xc0\xc0 ", "x \xc0\xc0"},
 									{"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
 									{"x ☺ ", "x ☺"},
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								}
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+								func tenRunes(ch rune) string {
 									r := make([]rune, 10)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									for i := range r {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+										r[i] = ch
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									return string(r)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								}
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
+								// User-defined self-inverse mapping function
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+								func rot13(r rune) rune {
 									step := rune(13)
 									if r >= 'a' && r <= 'z' {
 										return ((r - 'a' + step) % 26) + 'a'
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
+									}
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									if r >= 'A' && r <= 'Z' {
 										return ((r - 'A' + step) % 26) + 'A'
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
+									}
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									return r
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
+								}
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								func TestMap(t *testing.T) {
 									// Run a couple of awful growth/shrinkage tests
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									a := tenRunes('a')
-												all: single space after period.

The tree's pretty inconsistent about single space vs double space
after a period in documentation. Make it consistently a single space,
per earlier decisions. This means contributors won't be confused by
misleading precedence.

This CL doesn't use go/doc to parse. It only addresses // comments.
It was generated with:

$ perl -i -npe 's,^(\s*// .+[a-z]\.)  +([A-Z]),$1 $2,' $(git grep -l -E '^\s*//(.+\.)  +([A-Z])')
$ go test go/doc -update

Change-Id: Iccdb99c37c797ef1f804a94b22ba5ee4b500c4f7
Reviewed-on: https://go-review.googlesource.com/20022
Reviewed-by: Rob Pike <r@golang.org>
Reviewed-by: Dave Day <djd@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2016-03-01 23:21:55 +00:00
+									// 1.  Grow. This triggers two reallocations in Map.
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									maxRune := func(rune) rune { return unicode.MaxRune }
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									m := Map(maxRune, a)
 									expect := tenRunes(unicode.MaxRune)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									if m != expect {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										t.Errorf("growing: expected %q got %q", expect, m)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									// 2. Shrink
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									minRune := func(rune) rune { return 'a' }
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									m = Map(minRune, tenRunes(unicode.MaxRune))
 									expect = a
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									if m != expect {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										t.Errorf("shrinking: expected %q got %q", expect, m)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
 									// 3. Rot13
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									m = Map(rot13, "a to zed")
 									expect = "n gb mrq"
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
+									if m != expect {
 										t.Errorf("rot13: expected %q got %q", expect, m)
 									}
 									// 4. Rot13^2
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									m = Map(rot13, Map(rot13, "a to zed"))
 									expect = "a to zed"
-												fix bug in bytes.Map and add test cases for Map in both strings and bytes packages.
thanks to ulrik.sverdrup for the test case.

Fixes #191.

R=rsc
CC=golang-dev
https://golang.org/cl/155056

											
										
										
											2009-11-15 12:07:27 -08:00
+									if m != expect {
 										t.Errorf("rot13: expected %q got %q", expect, m)
 									}
-												bytes, strings: allow -1 in Map to mean "drop this character".

xml: drop invalid characters in attribute names
    when constructing struct field names.

R=rsc
CC=r
https://golang.org/cl/157104

											
										
										
											2009-12-11 10:37:48 -08:00
 									// 5. Drop
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									dropNotLatin := func(r rune) rune {
 										if unicode.Is(unicode.Latin, r) {
 											return r
-												bytes, strings: allow -1 in Map to mean "drop this character".

xml: drop invalid characters in attribute names
    when constructing struct field names.

R=rsc
CC=r
https://golang.org/cl/157104

											
										
										
											2009-12-11 10:37:48 -08:00
+										}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										return -1
 									}
 									m = Map(dropNotLatin, "Hello, 세계")
 									expect = "Hello"
-												bytes, strings: allow -1 in Map to mean "drop this character".

xml: drop invalid characters in attribute names
    when constructing struct field names.

R=rsc
CC=r
https://golang.org/cl/157104

											
										
										
											2009-12-11 10:37:48 -08:00
+									if m != expect {
 										t.Errorf("drop: expected %q got %q", expect, m)
 									}
-												strings: Map: avoid allocation when string is unchanged

This speeds up strings.ToLower, etc.

before/after:
strings_test.BenchmarkMapNoChanges 1000000 1013 ns/op
strings_test.BenchmarkMapNoChanges 5000000  442 ns/op

R=r, rog, eh, rsc
CC=golang-dev
https://golang.org/cl/4306056

											
										
										
											2011-03-28 09:41:57 -07:00
 									// 6. Identity
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									identity := func(r rune) rune {
 										return r
-												strings: Map: avoid allocation when string is unchanged

This speeds up strings.ToLower, etc.

before/after:
strings_test.BenchmarkMapNoChanges 1000000 1013 ns/op
strings_test.BenchmarkMapNoChanges 5000000  442 ns/op

R=r, rog, eh, rsc
CC=golang-dev
https://golang.org/cl/4306056

											
										
										
											2011-03-28 09:41:57 -07:00
+									}
 									orig := "Input string that we expect not to be copied."
 									m = Map(identity, orig)
 									if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data !=
 										(*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
 										t.Error("unexpected copy during identity map")
 									}
-												strings: fix handling of invalid UTF-8 sequences in Map

The new Map implementation introduced in golang.org/cl/33201
did not differentiate if an invalid UTF-8 sequence was decoded
or the RuneError rune. It would therefore always advance by
3 bytes (which is the length of the RuneError rune) instead
of 1 for an invalid sequences. This cl adds a check to correctly
determine the length of bytes needed to advance to the next rune.

Fixes #19330.

Change-Id: I1e7f9333f3ef6068ffc64015bb0a9f32b0b7111d
Reviewed-on: https://go-review.googlesource.com/37597
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2017-02-28 21:21:45 +01:00
 									// 7. Handle invalid UTF-8 sequence
 									replaceNotLatin := func(r rune) rune {
 										if unicode.Is(unicode.Latin, r) {
 											return r
 										}
 										return '?'
 									}
 									m = Map(replaceNotLatin, "Hello\255World")
 									expect = "Hello?World"
 									if m != expect {
 										t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
 									}
-												strings: fix encoding of \u0080 in map

Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf
being incorrectly encoded as \x80 in strings.Map due to using <= instead
of a < comparison operator to check one byte encodings for utf8.

Fixes #25242

Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc
Reviewed-on: https://go-review.googlesource.com/111286
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-05-04 06:54:18 +02:00
 									// 8. Check utf8.RuneSelf and utf8.MaxRune encoding
 									encode := func(r rune) rune {
 										switch r {
 										case utf8.RuneSelf:
 											return unicode.MaxRune
 										case unicode.MaxRune:
 											return utf8.RuneSelf
 										}
 										return r
 									}
 									s := string(utf8.RuneSelf) + string(utf8.MaxRune)
 									r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s
 									m = Map(encode, s)
 									if m != r {
 										t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
 									}
 									m = Map(encode, r)
 									if m != s {
 										t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
 									}
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+								func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+								func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
-												strings: optimize ToUpper

Handling the ASCII case inline and call unicode.ToUpper only
for non-ascii cases.

Gives good improvements for the ascii case and minor perf
degrade for non-ascii case

name                                     old time/op    new time/op    delta
ToUpper/#00                                11.7ns ± 8%     8.0ns ± 1%  -31.95%  (p=0.008 n=5+5)
ToUpper/ONLYUPPER                          45.6ns ± 5%    19.9ns ± 1%  -56.40%  (p=0.008 n=5+5)
ToUpper/abc                                77.4ns ± 1%    57.0ns ± 1%  -26.32%  (p=0.008 n=5+5)
ToUpper/AbC123                             92.1ns ± 4%    67.7ns ± 2%  -26.57%  (p=0.008 n=5+5)
ToUpper/azAZ09_                             105ns ± 6%      67ns ± 2%  -36.26%  (p=0.000 n=5+4)
ToUpper/longStrinGwitHmixofsmaLLandcAps     255ns ± 1%     140ns ± 1%  -45.01%  (p=0.029 n=4+4)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     440ns ± 1%     447ns ± 0%   +1.49%  (p=0.016 n=5+4)
ToUpper/ɐɐɐɐɐ                               370ns ± 4%     366ns ± 1%     ~     (p=0.667 n=5+5)

name                                     old alloc/op   new alloc/op   delta
ToUpper/#00                                 0.00B          0.00B          ~     (all equal)
ToUpper/ONLYUPPER                           0.00B          0.00B          ~     (all equal)
ToUpper/abc                                 16.0B ± 0%      6.0B ± 0%  -62.50%  (p=0.008 n=5+5)
ToUpper/AbC123                              16.0B ± 0%     16.0B ± 0%     ~     (all equal)
ToUpper/azAZ09_                             24.0B ± 0%     16.0B ± 0%  -33.33%  (p=0.008 n=5+5)
ToUpper/longStrinGwitHmixofsmaLLandcAps     80.0B ± 0%     64.0B ± 0%  -20.00%  (p=0.008 n=5+5)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     96.0B ± 0%     96.0B ± 0%     ~     (all equal)
ToUpper/ɐɐɐɐɐ                               64.0B ± 0%     64.0B ± 0%     ~     (all equal)

Ran on a machine with Intel(R) Core(TM) i5-5200U CPU @ 2.20GHz

Updates #17859

Change-Id: I0735ac4a4a36e8a8f6cc06f2c16b871f12b4abf9
Reviewed-on: https://go-review.googlesource.com/68370
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-09-28 15:07:48 +05:30
+								func BenchmarkToUpper(b *testing.B) {
 									for _, tc := range upperTests {
 										b.Run(tc.in, func(b *testing.B) {
 											for i := 0; i < b.N; i++ {
 												actual := ToUpper(tc.in)
 												if actual != tc.out {
 													b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
 												}
 											}
 										})
 									}
 								}
-												strings: optimize ToLower

Handling the ASCII case inline and call unicode.ToLower only
for non-ASCII cases.

Gives good improvements for the ASCII case and minor perf
degrade for non-ASCII case

name                                     old time/op    new time/op    delta
ToLower/#00                                10.8ns ± 1%     9.0ns ± 1%  -16.83%  (p=0.008 n=5+5)
ToLower/abc                                23.3ns ± 4%    12.6ns ± 1%  -46.01%  (p=0.008 n=5+5)
ToLower/AbC123                             91.0ns ± 2%    70.4ns ± 0%  -22.59%  (p=0.008 n=5+5)
ToLower/azAZ09_                             104ns ± 3%      75ns ± 1%  -28.35%  (p=0.008 n=5+5)
ToLower/longStrinGwitHmixofsmaLLandcAps     254ns ± 4%     157ns ± 0%  -38.19%  (p=0.016 n=5+4)
ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS     446ns ± 1%     451ns ± 1%     ~     (p=0.056 n=5+5)
ToLower/ⱭⱭⱭⱭⱭ                               345ns ± 1%     348ns ± 0%   +0.93%  (p=0.016 n=5+5)

name                                     old alloc/op   new alloc/op   delta
ToLower/#00                                 0.00B          0.00B          ~     (all equal)
ToLower/abc                                 0.00B          0.00B          ~     (all equal)
ToLower/AbC123                              16.0B ± 0%     16.0B ± 0%     ~     (all equal)
ToLower/azAZ09_                             24.0B ± 0%     16.0B ± 0%  -33.33%  (p=0.008 n=5+5)
ToLower/longStrinGwitHmixofsmaLLandcAps     80.0B ± 0%     64.0B ± 0%  -20.00%  (p=0.008 n=5+5)
ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS     96.0B ± 0%     96.0B ± 0%     ~     (all equal)
ToLower/ⱭⱭⱭⱭⱭ                               48.0B ± 0%     48.0B ± 0%     ~     (all equal)

Ran on a machine with Intel(R) Core(TM) i5-5200U CPU @ 2.20GHz

Fixes #17859

Change-Id: Iacc1e6b77e1aedba9447a6e94352606f131ea597
Reviewed-on: https://go-review.googlesource.com/76470
Reviewed-by: Marvin Stenger <marvin.stenger94@gmail.com>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-11-08 09:15:53 +05:30
+								func BenchmarkToLower(b *testing.B) {
 									for _, tc := range lowerTests {
 										b.Run(tc.in, func(b *testing.B) {
 											for i := 0; i < b.N; i++ {
 												actual := ToLower(tc.in)
 												if actual != tc.out {
 													b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
 												}
 											}
 										})
 									}
 								}
-												strings: Map: avoid allocation when string is unchanged

This speeds up strings.ToLower, etc.

before/after:
strings_test.BenchmarkMapNoChanges 1000000 1013 ns/op
strings_test.BenchmarkMapNoChanges 5000000  442 ns/op

R=r, rog, eh, rsc
CC=golang-dev
https://golang.org/cl/4306056

											
										
										
											2011-03-28 09:41:57 -07:00
+								func BenchmarkMapNoChanges(b *testing.B) {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									identity := func(r rune) rune {
 										return r
-												strings: Map: avoid allocation when string is unchanged

This speeds up strings.ToLower, etc.

before/after:
strings_test.BenchmarkMapNoChanges 1000000 1013 ns/op
strings_test.BenchmarkMapNoChanges 5000000  442 ns/op

R=r, rog, eh, rsc
CC=golang-dev
https://golang.org/cl/4306056

											
										
										
											2011-03-28 09:41:57 -07:00
+									}
 									for i := 0; i < b.N; i++ {
 										Map(identity, "Some string that won't be modified.")
 									}
 								}
-												Unicode: provide an ability to supplement the case-mapping tables
in character and string case mapping routines.

Add a custom mapper for Turkish and Azeri.

A more general solution for deriving the case information from Unicode's
SpecialCasing.txt will require more work.

Fixes #703.

R=rsc, rsc1
CC=golang-dev, mdakin
https://golang.org/cl/824043

											
										
										
											2010-03-30 17:51:03 -07:00
+								func TestSpecialCase(t *testing.T) {
 									lower := "abcçdefgğhıijklmnoöprsştuüvyz"
 									upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
 									u := ToUpperSpecial(unicode.TurkishCase, upper)
 									if u != upper {
 										t.Errorf("Upper(upper) is %s not %s", u, upper)
 									}
 									u = ToUpperSpecial(unicode.TurkishCase, lower)
 									if u != upper {
 										t.Errorf("Upper(lower) is %s not %s", u, upper)
 									}
 									l := ToLowerSpecial(unicode.TurkishCase, lower)
 									if l != lower {
 										t.Errorf("Lower(lower) is %s not %s", l, lower)
 									}
 									l = ToLowerSpecial(unicode.TurkishCase, upper)
 									if l != lower {
 										t.Errorf("Lower(upper) is %s not %s", l, lower)
 									}
 								}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+								func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var trimTests = []struct {
-												bytes, strings: add TrimPrefix and TrimSuffix

Everybody either gets confused and thinks this is
TrimLeft/TrimRight or does this by hand which gets
repetitive looking.

R=rsc, kevlar
CC=golang-dev
https://golang.org/cl/7239044

											
										
										
											2013-02-01 08:41:25 -08:00
+									f            string
 									in, arg, out string
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												various: avoid func compare

R=gri, r, bradfitz
CC=golang-dev
https://golang.org/cl/5371074

											
										
										
											2011-11-13 22:57:19 -05:00
+									{"Trim", "abba", "a", "bb"},
 									{"Trim", "abba", "ab", ""},
 									{"TrimLeft", "abba", "ab", ""},
 									{"TrimRight", "abba", "ab", ""},
 									{"TrimLeft", "abba", "a", "bba"},
 									{"TrimRight", "abba", "a", "abb"},
 									{"Trim", "<tag>", "<>", "tag"},
 									{"Trim", "* listitem", " *", "listitem"},
 									{"Trim", `"quote"`, `"`, "quote"},
 									{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
+									{"Trim", "\x80test\xff", "\xff", "test"},
 									{"Trim", " Ġ ", " ", "Ġ"},
 									{"Trim", " Ġİ0", "0 ", "Ġİ"},
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+									//empty string tests
-												various: avoid func compare

R=gri, r, bradfitz
CC=golang-dev
https://golang.org/cl/5371074

											
										
										
											2011-11-13 22:57:19 -05:00
+									{"Trim", "abba", "", "abba"},
 									{"Trim", "", "123", ""},
 									{"Trim", "", "", ""},
 									{"TrimLeft", "abba", "", "abba"},
 									{"TrimLeft", "", "123", ""},
 									{"TrimLeft", "", "", ""},
 									{"TrimRight", "abba", "", "abba"},
 									{"TrimRight", "", "123", ""},
 									{"TrimRight", "", "", ""},
 									{"TrimRight", "☺\xc0", "☺", "☺\xc0"},
-												bytes, strings: add TrimPrefix and TrimSuffix

Everybody either gets confused and thinks this is
TrimLeft/TrimRight or does this by hand which gets
repetitive looking.

R=rsc, kevlar
CC=golang-dev
https://golang.org/cl/7239044

											
										
										
											2013-02-01 08:41:25 -08:00
+									{"TrimPrefix", "aabb", "a", "abb"},
 									{"TrimPrefix", "aabb", "b", "aabb"},
 									{"TrimSuffix", "aabb", "a", "aabb"},
 									{"TrimSuffix", "aabb", "b", "aab"},
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+								}
 								func TestTrim(t *testing.T) {
 									for _, tc := range trimTests {
-												various: avoid func compare

R=gri, r, bradfitz
CC=golang-dev
https://golang.org/cl/5371074

											
										
										
											2011-11-13 22:57:19 -05:00
+										name := tc.f
 										var f func(string, string) string
 										switch name {
 										case "Trim":
 											f = Trim
 										case "TrimLeft":
 											f = TrimLeft
 										case "TrimRight":
 											f = TrimRight
-												bytes, strings: add TrimPrefix and TrimSuffix

Everybody either gets confused and thinks this is
TrimLeft/TrimRight or does this by hand which gets
repetitive looking.

R=rsc, kevlar
CC=golang-dev
https://golang.org/cl/7239044

											
										
										
											2013-02-01 08:41:25 -08:00
+										case "TrimPrefix":
 											f = TrimPrefix
 										case "TrimSuffix":
 											f = TrimSuffix
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+										default:
-												strings: fix test output

R=rsc, gri
CC=golang-dev
https://golang.org/cl/5445044

											
										
										
											2011-11-28 09:51:03 -08:00
+											t.Errorf("Undefined trim function %s", name)
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+										}
-												bytes, strings: add TrimPrefix and TrimSuffix

Everybody either gets confused and thinks this is
TrimLeft/TrimRight or does this by hand which gets
repetitive looking.

R=rsc, kevlar
CC=golang-dev
https://golang.org/cl/7239044

											
										
										
											2013-02-01 08:41:25 -08:00
+										actual := f(tc.in, tc.arg)
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+										if actual != tc.out {
-												bytes, strings: add TrimPrefix and TrimSuffix

Everybody either gets confused and thinks this is
TrimLeft/TrimRight or does this by hand which gets
repetitive looking.

R=rsc, kevlar
CC=golang-dev
https://golang.org/cl/7239044

											
										
										
											2013-02-01 08:41:25 -08:00
+											t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+										}
 									}
 								}
-												strings: Add benchmark test for trim function

The strings.Trim function and variants allocate memory on the heap when creating a function to pass into TrimFunc.
Add a benchmark to document the behavior; an issue will be submitted to address this behavior in the compiler if possible.

Change-Id: I8b66721f077951f7e7b8cf3cf346fac27a9b68c0
Reviewed-on: https://go-review.googlesource.com/8200
Reviewed-by: Ian Lance Taylor <iant@golang.org>

											
										
										
											2015-03-25 21:08:04 -04:00
+								func BenchmarkTrim(b *testing.B) {
 									b.ReportAllocs()
 									for i := 0; i < b.N; i++ {
 										for _, tc := range trimTests {
 											name := tc.f
 											var f func(string, string) string
 											switch name {
 											case "Trim":
 												f = Trim
 											case "TrimLeft":
 												f = TrimLeft
 											case "TrimRight":
 												f = TrimRight
 											case "TrimPrefix":
 												f = TrimPrefix
 											case "TrimSuffix":
 												f = TrimSuffix
 											default:
 												b.Errorf("Undefined trim function %s", name)
 											}
 											actual := f(tc.in, tc.arg)
 											if actual != tc.out {
 												b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
 											}
 										}
 									}
 								}
-												bytes: port IndexFunc and LastIndexFunc from strings package

This CL basically applies the same changes as

	http://code.google.com/p/go/source/detail?r=5e0a29014e8e

but for bytes package.

R=r, rog
CC=golang-dev
https://golang.org/cl/1670052

											
										
										
											2010-07-23 12:34:35 -07:00
+								type predicate struct {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									f    func(rune) bool
-												bytes: port IndexFunc and LastIndexFunc from strings package

This CL basically applies the same changes as

	http://code.google.com/p/go/source/detail?r=5e0a29014e8e

but for bytes package.

R=r, rog
CC=golang-dev
https://golang.org/cl/1670052

											
										
										
											2010-07-23 12:34:35 -07:00
+									name string
 								}
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+								var isSpace = predicate{unicode.IsSpace, "IsSpace"}
 								var isDigit = predicate{unicode.IsDigit, "IsDigit"}
 								var isUpper = predicate{unicode.IsUpper, "IsUpper"}
 								var isValidRune = predicate{
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									func(r rune) bool {
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+										return r != utf8.RuneError
 									},
 									"IsValidRune",
 								}
 								func not(p predicate) predicate {
 									return predicate{
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+										func(r rune) bool {
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+											return !p.f(r)
 										},
 										"not " + p.name,
 									}
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var trimFuncTests = []struct {
 									f       predicate
 									in, out string
 								}{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{isSpace, space + " hello " + space, "hello"},
 									{isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
 									{isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
 									{not(isSpace), "hello" + space + "hello", space},
 									{not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
 									{isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
 									{not(isValidRune), "\xc0a\xc0", "a"},
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+								}
 								func TestTrimFunc(t *testing.T) {
 									for _, tc := range trimFuncTests {
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+										actual := TrimFunc(tc.in, tc.f.f)
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+										if actual != tc.out {
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+											t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
 										}
 									}
 								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var indexFuncTests = []struct {
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+									in          string
 									f           predicate
 									first, last int
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", isValidRune, -1, -1},
 									{"abc", isDigit, -1, -1},
 									{"0123", isDigit, 0, 3},
 									{"a1b", isDigit, 1, 1},
 									{space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
 									{"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
 									{"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
 									{"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
-												bytes: port IndexFunc and LastIndexFunc from strings package

This CL basically applies the same changes as

	http://code.google.com/p/go/source/detail?r=5e0a29014e8e

but for bytes package.

R=r, rog
CC=golang-dev
https://golang.org/cl/1670052

											
										
										
											2010-07-23 12:34:35 -07:00
+									// tests of invalid UTF-8
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"\x801", isDigit, 1, 1},
 									{"\x80abc", isDigit, -1, -1},
 									{"\xc0a\xc0", isValidRune, 1, 1},
 									{"\xc0a\xc0", not(isValidRune), 0, 2},
 									{"\xc0☺\xc0", not(isValidRune), 0, 4},
 									{"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
 									{"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
 									{"a\xe0\x80cd", not(isValidRune), 1, 2},
 									{"\x80\x80\x80\x80", not(isValidRune), 0, 3},
-												Add IndexFunc and LastIndexFunc.
Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044

											
										
										
											2010-06-14 14:54:48 -07:00
+								}
 								func TestIndexFunc(t *testing.T) {
 									for _, tc := range indexFuncTests {
 										first := IndexFunc(tc.in, tc.f.f)
 										if first != tc.first {
 											t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
 										}
 										last := LastIndexFunc(tc.in, tc.f.f)
 										if last != tc.last {
 											t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
-												strings: add IndexRune, Trim, TrimLeft, TrimRight, and the generic equivalents TrimFunc, TrimLeftFunc, TrimRightFunc

R=rsc, r
CC=golang-dev
https://golang.org/cl/799048

											
										
										
											2010-04-09 18:57:03 -07:00
+										}
 									}
 								}
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								func equal(m string, s1, s2 string, t *testing.T) bool {
 									if s1 == s2 {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										return true
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-												strings.Split: make the default to split all.
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.

R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051

											
										
										
											2011-06-28 09:43:14 +10:00
+									e1 := Split(s1, "")
 									e2 := Split(s2, "")
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									for i, c1 := range e1 {
-												strings: fix off-by-one error in test

Previously it would panic because of out-of-bound access
if s1 is longer than s2.

LGTM=iant
R=golang-codereviews, iant
CC=golang-codereviews
https://golang.org/cl/90110043

											
										
										
											2014-04-21 17:00:27 -07:00
+										if i >= len(e2) {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											break
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+										}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										r1, _ := utf8.DecodeRuneInString(c1)
 										r2, _ := utf8.DecodeRuneInString(e2[i])
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+										if r1 != r2 {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+											t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+										}
 									}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									return false
-												Add Upper, Lower, Trim methods to strings package.

APPROVED=rsc
DELTA=110  (110 added, 0 deleted, 0 changed)
OCL=29766
CL=29951

											
										
										
											2009-06-05 13:09:03 -07:00
+								}
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+								func TestCaseConsistency(t *testing.T) {
 									// Make a string of all the runes.
-												update tree for new default type rule

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/5448091

											
										
										
											2011-12-08 22:08:03 -05:00
+									numRunes := int(unicode.MaxRune + 1)
-												testing: shorten some tests.
These are the top runners.  More to come.
Also print two digits of timing info under -test.v.

R=rsc
CC=golang-dev
https://golang.org/cl/4317044

											
										
										
											2011-03-25 16:31:10 -07:00
+									if testing.Short() {
 										numRunes = 1000
 									}
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									a := make([]rune, numRunes)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									for i := range a {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+										a[i] = rune(i)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									s := string(a)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									// convert the cases.
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									upper := ToUpper(s)
 									lower := ToLower(s)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
 									// Consistency checks
-												testing: shorten some tests.
These are the top runners.  More to come.
Also print two digits of timing info under -test.v.

R=rsc
CC=golang-dev
https://golang.org/cl/4317044

											
										
										
											2011-03-25 16:31:10 -07:00
+									if n := utf8.RuneCountInString(upper); n != numRunes {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										t.Error("rune count wrong in upper:", n)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-												testing: shorten some tests.
These are the top runners.  More to come.
Also print two digits of timing info under -test.v.

R=rsc
CC=golang-dev
https://golang.org/cl/4317044

											
										
										
											2011-03-25 16:31:10 -07:00
+									if n := utf8.RuneCountInString(lower); n != numRunes {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										t.Error("rune count wrong in lower:", n)
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
 									if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										t.Error("ToUpper(upper) consistency fail")
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
 									if !equal("ToLower(lower)", ToLower(lower), lower, t) {
-												remove semis after statements in one-statement statement lists

R=rsc, r
http://go/go-review/1025029

											
										
										
											2009-11-09 12:07:39 -08:00
+										t.Error("ToLower(lower) consistency fail")
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									}
-												gofmt-ify strings, template

R=r
http://go/go-review/1018064

											
										
										
											2009-11-05 15:12:37 -08:00
+									/*
 										  These fail because of non-one-to-oneness of the data, such as multiple
 										  upper case 'I' mapping to 'i'.  We comment them out but keep them for
 										  interest.
 										  For instance: CAPITAL LETTER I WITH DOT ABOVE:
 											unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
 										if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
 											t.Error("ToUpper(lower) consistency fail");
 										}
 										if !equal("ToLower(upper)", ToLower(upper), lower, t) {
 											t.Error("ToLower(upper) consistency fail");
 										}
-												make ToUpper, ToLower etc. handle unicode properly.
Change their names too.

R=rsc
DELTA=206  (123 added, 2 deleted, 81 changed)
OCL=34170
CL=34194

											
										
										
											2009-09-01 11:06:28 -07:00
+									*/
 								}
-													An asked-for-in #go-nuts extension to quickly create a repeated
	copy of a string or a byte array.
        strings.Repeat("-", 50)
	bytes.Repeat(b, 99)

R=rsc
https://golang.org/cl/155063

											
										
										
											2009-11-16 12:40:01 -08:00
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var RepeatTests = []struct {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									in, out string
 									count   int
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", "", 0},
 									{"", "", 1},
 									{"", "", 2},
 									{"-", "", 0},
 									{"-", "-", 1},
 									{"-", "----------", 10},
 									{"abc ", "abc abc abc ", 3},
-													An asked-for-in #go-nuts extension to quickly create a repeated
	copy of a string or a byte array.
        strings.Repeat("-", 50)
	bytes.Repeat(b, 99)

R=rsc
https://golang.org/cl/155063

											
										
										
											2009-11-16 12:40:01 -08:00
+								}
 								func TestRepeat(t *testing.T) {
 									for _, tt := range RepeatTests {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+										a := Repeat(tt.in, tt.count)
-													An asked-for-in #go-nuts extension to quickly create a repeated
	copy of a string or a byte array.
        strings.Repeat("-", 50)
	bytes.Repeat(b, 99)

R=rsc
https://golang.org/cl/155063

											
										
										
											2009-11-16 12:40:01 -08:00
+										if !equal("Repeat(s)", a, tt.out, t) {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+											t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
 											continue
-													An asked-for-in #go-nuts extension to quickly create a repeated
	copy of a string or a byte array.
        strings.Repeat("-", 50)
	bytes.Repeat(b, 99)

R=rsc
https://golang.org/cl/155063

											
										
										
											2009-11-16 12:40:01 -08:00
+										}
 									}
 								}
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
-												strings, bytes: panic if Repeat overflows or if given a negative count

Panic if Repeat is given a negative count or
if the value of (len(*) * count) is detected
to overflow.
We panic because we cannot change the
signature of Repeat to return an error.

Fixes #16237

Change-Id: I9f5ba031a5b8533db0582d7a672ffb715143f3fb
Reviewed-on: https://go-review.googlesource.com/29954
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2016-09-28 01:54:38 -07:00
+								func repeat(s string, count int) (err error) {
 									defer func() {
 										if r := recover(); r != nil {
 											switch v := r.(type) {
 											case error:
 												err = v
 											default:
 												err = fmt.Errorf("%s", v)
 											}
 										}
 									}()
 									Repeat(s, count)
 									return
 								}
 								// See Issue golang.org/issue/16237
 								func TestRepeatCatchesOverflow(t *testing.T) {
 									tests := [...]struct {
 										s      string
 										count  int
 										errStr string
 									}{
 : {"--", -2147483647, "negative"},
 : {"", int(^uint(0) >> 1), ""},
 : {"-", 10, ""},
 : {"gopher", 0, ""},
 : {"-", -1, "negative"},
 : {"--", -102, "negative"},
 : {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
 									}
 									for i, tt := range tests {
 										err := repeat(tt.s, tt.count)
 										if tt.errStr == "" {
 											if err != nil {
 												t.Errorf("#%d panicked %v", i, err)
 											}
 											continue
 										}
 										if err == nil || !Contains(err.Error(), tt.errStr) {
 											t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
 										}
 									}
 								}
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+								func runesEqual(a, b []rune) bool {
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+									if len(a) != len(b) {
 										return false
 									}
 									for i, r := range a {
 										if r != b[i] {
 											return false
 										}
 									}
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									return true
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var RunesTests = []struct {
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									in    string
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									out   []rune
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+									lossy bool
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+									{"", []rune{}, false},
 									{" ", []rune{32}, false},
 									{"ABC", []rune{65, 66, 67}, false},
 									{"abc", []rune{97, 98, 99}, false},
 									{"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
 									{"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
 									{"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+								}
 								func TestRunes(t *testing.T) {
 									for _, tt := range RunesTests {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+										a := []rune(tt.in)
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+										if !runesEqual(a, tt.out) {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+											t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+											continue
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+										}
 										if !tt.lossy {
 											// can only test reassembly if we didn't lose information
-) Change default gofmt default settings for
   parsing and printing to new syntax.

   Use -oldparser to parse the old syntax,
   use -oldprinter to print the old syntax.

2) Change default gofmt formatting settings
   to use tabs for indentation only and to use
   spaces for alignment. This will make the code
   alignment insensitive to an editor's tabwidth.

   Use -spaces=false to use tabs for alignment.

3) Manually changed src/exp/parser/parser_test.go
   so that it doesn't try to parse the parser's
   source files using the old syntax (they have
   new syntax now).

4) gofmt -w src misc test/bench

4th set of files.

R=rsc
CC=golang-dev
https://golang.org/cl/180049

											
										
										
											2009-12-15 15:40:16 -08:00
+											s := string(a)
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+											if s != tt.in {
-												bytes, strings: use rune

Various rune-based APIs change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044

											
										
										
											2011-10-25 22:22:09 -07:00
+												t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
-												Runes: turn string into []int
Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170

											
										
										
											2009-12-02 20:47:38 -08:00
+											}
 										}
 									}
 								}
-												strings: add ReadRune to Reader

R=rsc
CC=golang-dev
https://golang.org/cl/940041

											
										
										
											2010-04-20 22:18:26 -07:00
-												strings: implement UnreadByte, UnreadRune

Added corresponding tests.

R=rsc
CC=golang-dev
https://golang.org/cl/4560045

											
										
										
											2011-05-26 11:02:07 -07:00
+								func TestReadByte(t *testing.T) {
 									testStrings := []string{"", abcd, faces, commas}
 									for _, s := range testStrings {
 										reader := NewReader(s)
 										if e := reader.UnreadByte(); e == nil {
 											t.Errorf("Unreading %q at beginning: expected error", s)
 										}
 										var res bytes.Buffer
 										for {
 											b, e := reader.ReadByte()
-												src/pkg/[n-z]*: gofix -r error -force=error

R=golang-dev, bsiegert, iant
CC=golang-dev
https://golang.org/cl/5294074

											
										
										
											2011-11-01 22:05:34 -04:00
+											if e == io.EOF {
-												strings: implement UnreadByte, UnreadRune

Added corresponding tests.

R=rsc
CC=golang-dev
https://golang.org/cl/4560045

											
										
										
											2011-05-26 11:02:07 -07:00
+												break
 											}
 											if e != nil {
 												t.Errorf("Reading %q: %s", s, e)
 												break
 											}
 											res.WriteByte(b)
 											// unread and read again
 											e = reader.UnreadByte()
 											if e != nil {
 												t.Errorf("Unreading %q: %s", s, e)
 												break
 											}
 											b1, e := reader.ReadByte()
 											if e != nil {
 												t.Errorf("Reading %q after unreading: %s", s, e)
 												break
 											}
 											if b1 != b {
 												t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
 												break
 											}
 										}
 										if res.String() != s {
 											t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
 										}
 									}
 								}
-												strings: add ReadRune to Reader

R=rsc
CC=golang-dev
https://golang.org/cl/940041

											
										
										
											2010-04-20 22:18:26 -07:00
+								func TestReadRune(t *testing.T) {
 									testStrings := []string{"", abcd, faces, commas}
 									for _, s := range testStrings {
 										reader := NewReader(s)
-												strings: implement UnreadByte, UnreadRune

Added corresponding tests.

R=rsc
CC=golang-dev
https://golang.org/cl/4560045

											
										
										
											2011-05-26 11:02:07 -07:00
+										if e := reader.UnreadRune(); e == nil {
 											t.Errorf("Unreading %q at beginning: expected error", s)
 										}
-												strings: add ReadRune to Reader

R=rsc
CC=golang-dev
https://golang.org/cl/940041

											
										
										
											2010-04-20 22:18:26 -07:00
+										res := ""
 										for {
-												strings: implement UnreadByte, UnreadRune

Added corresponding tests.

R=rsc
CC=golang-dev
https://golang.org/cl/4560045

											
										
										
											2011-05-26 11:02:07 -07:00
+											r, z, e := reader.ReadRune()
-												src/pkg/[n-z]*: gofix -r error -force=error

R=golang-dev, bsiegert, iant
CC=golang-dev
https://golang.org/cl/5294074

											
										
										
											2011-11-01 22:05:34 -04:00
+											if e == io.EOF {
-												strings: add ReadRune to Reader

R=rsc
CC=golang-dev
https://golang.org/cl/940041

											
										
										
											2010-04-20 22:18:26 -07:00
+												break
 											}
 											if e != nil {
 												t.Errorf("Reading %q: %s", s, e)
 												break
 											}
 											res += string(r)
-												strings: implement UnreadByte, UnreadRune

Added corresponding tests.

R=rsc
CC=golang-dev
https://golang.org/cl/4560045

											
										
										
											2011-05-26 11:02:07 -07:00
+											// unread and read again
 											e = reader.UnreadRune()
 											if e != nil {
 												t.Errorf("Unreading %q: %s", s, e)
 												break
 											}
 											r1, z1, e := reader.ReadRune()
 											if e != nil {
 												t.Errorf("Reading %q after unreading: %s", s, e)
 												break
 											}
 											if r1 != r {
 												t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
 												break
 											}
 											if z1 != z {
 												t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
 												break
 											}
-												strings: add ReadRune to Reader

R=rsc
CC=golang-dev
https://golang.org/cl/940041

											
										
										
											2010-04-20 22:18:26 -07:00
+										}
 										if res != s {
 											t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
 										}
 									}
 								}
-												bytes, strings: add Replace

This is the Replace I suggested in the review of CL 1114041.
It's true that we already have

	regexp.MustCompile(regexp.QuoteMeta(old)).ReplaceAll(s, new)

but because this Replace is doing a simpler job it is
simpler to call and inherently more efficient.

I will add the bytes implementation and tests to the
CL after the strings one has been reviewed.

R=r, cw
CC=golang-dev
https://golang.org/cl/1731048

											
										
										
											2010-06-30 18:03:09 -07:00
-												strings, bytes: fix Reader.UnreadRune

UnreadRune should return an error if previous operation is not
ReadRune.

Fixes #7579.

LGTM=bradfitz
R=golang-codereviews, bradfitz
CC=golang-codereviews
https://golang.org/cl/77580046

											
										
										
											2014-03-19 09:00:58 -07:00
+								var UnreadRuneErrorTests = []struct {
 									name string
 									f    func(*Reader)
 								}{
-												net/http, strings, bytes: fix http race, revert part of Reader behavior change

I fixed this data race regression in two ways: in net/http itself, and also
partially reverting the change from https://golang.org/cl/77580046 .
Previously a Read from a strings.Reader or bytes.Reader returning 0 bytes
would not be a memory write. After 77580046 it was. This reverts that back
in case others depended on that. Also adds tests.

Fixes #7856

LGTM=ruiu, iant
R=iant, ruiu
CC=golang-codereviews, gri
https://golang.org/cl/94740044

											
										
										
											2014-04-25 06:44:51 -07:00
+									{"Read", func(r *Reader) { r.Read([]byte{0}) }},
-												strings, bytes: fix Reader.UnreadRune

UnreadRune should return an error if previous operation is not
ReadRune.

Fixes #7579.

LGTM=bradfitz
R=golang-codereviews, bradfitz
CC=golang-codereviews
https://golang.org/cl/77580046

											
										
										
											2014-03-19 09:00:58 -07:00
+									{"ReadByte", func(r *Reader) { r.ReadByte() }},
 									{"UnreadRune", func(r *Reader) { r.UnreadRune() }},
-												all: use SeekStart, SeekCurrent, SeekEnd

CL/19862 (f79b50b8d5bc159561c1dcf7c17e2a0db96a9a11) recently introduced the constants
SeekStart, SeekCurrent, and SeekEnd to the io package. We should use these constants
consistently throughout the code base.

Updates #15269

Change-Id: If7fcaca7676e4a51f588528f5ced28220d9639a2
Reviewed-on: https://go-review.googlesource.com/22097
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2016-04-05 11:22:53 -07:00
+									{"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }},
-												strings, bytes: fix Reader.UnreadRune

UnreadRune should return an error if previous operation is not
ReadRune.

Fixes #7579.

LGTM=bradfitz
R=golang-codereviews, bradfitz
CC=golang-codereviews
https://golang.org/cl/77580046

											
										
										
											2014-03-19 09:00:58 -07:00
+									{"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
 								}
 								func TestUnreadRuneError(t *testing.T) {
 									for _, tt := range UnreadRuneErrorTests {
 										reader := NewReader("0123456789")
 										if _, _, err := reader.ReadRune(); err != nil {
 											// should not happen
 											t.Fatal(err)
 										}
 										tt.f(reader)
 										err := reader.UnreadRune()
 										if err == nil {
 											t.Errorf("Unreading after %s: expected error", tt.name)
 										}
 									}
 								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var ReplaceTests = []struct {
-												bytes, strings: add Replace

This is the Replace I suggested in the review of CL 1114041.
It's true that we already have

	regexp.MustCompile(regexp.QuoteMeta(old)).ReplaceAll(s, new)

but because this Replace is doing a simpler job it is
simpler to call and inherently more efficient.

I will add the bytes implementation and tests to the
CL after the strings one has been reviewed.

R=r, cw
CC=golang-dev
https://golang.org/cl/1731048

											
										
										
											2010-06-30 18:03:09 -07:00
+									in       string
 									old, new string
 									n        int
 									out      string
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"hello", "l", "L", 0, "hello"},
 									{"hello", "l", "L", -1, "heLLo"},
 									{"hello", "x", "X", -1, "hello"},
 									{"", "x", "X", -1, ""},
 									{"radar", "r", "<r>", -1, "<r>ada<r>"},
 									{"", "", "<>", -1, "<>"},
 									{"banana", "a", "<>", -1, "b<>n<>n<>"},
 									{"banana", "a", "<>", 1, "b<>nana"},
 									{"banana", "a", "<>", 1000, "b<>n<>n<>"},
 									{"banana", "an", "<>", -1, "b<><>a"},
 									{"banana", "ana", "<>", -1, "b<>na"},
 									{"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
 									{"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
 									{"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
 									{"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
 									{"banana", "", "<>", 1, "<>banana"},
 									{"banana", "a", "a", -1, "banana"},
 									{"banana", "a", "a", 1, "banana"},
 									{"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
-												bytes, strings: add Replace

This is the Replace I suggested in the review of CL 1114041.
It's true that we already have

	regexp.MustCompile(regexp.QuoteMeta(old)).ReplaceAll(s, new)

but because this Replace is doing a simpler job it is
simpler to call and inherently more efficient.

I will add the bytes implementation and tests to the
CL after the strings one has been reviewed.

R=r, cw
CC=golang-dev
https://golang.org/cl/1731048

											
										
										
											2010-06-30 18:03:09 -07:00
+								}
 								func TestReplace(t *testing.T) {
 									for _, tt := range ReplaceTests {
 										if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
 											t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
 										}
 									}
 								}
-												strings: add Title
strings.ToTitle converts all characters to title case, which for consistency with the
other To* functions it should continue to do.  This CL adds string.Title, which
does a proper title-casing of the string.
A similar function for package bytes will follow once this is settled.
Fixes #933.

R=rsc
CC=golang-dev
https://golang.org/cl/1869042

											
										
										
											2010-07-20 00:03:59 -07:00
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var TitleTests = []struct {
-												strings: add Title
strings.ToTitle converts all characters to title case, which for consistency with the
other To* functions it should continue to do.  This CL adds string.Title, which
does a proper title-casing of the string.
A similar function for package bytes will follow once this is settled.
Fixes #933.

R=rsc
CC=golang-dev
https://golang.org/cl/1869042

											
										
										
											2010-07-20 00:03:59 -07:00
+									in, out string
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												gofmt -s -w src misc

R=r, rsc
CC=golang-dev
https://golang.org/cl/2662041

											
										
										
											2010-10-22 10:06:33 -07:00
+									{"", ""},
 									{"a", "A"},
 									{" aaa aaa aaa ", " Aaa Aaa Aaa "},
 									{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
 									{"123a456", "123a456"},
 									{"double-blind", "Double-Blind"},
 									{"ÿøû", "Ÿøû"},
-												bytes, strings: improve Title test coverage by adding cases with underscore and unicode line separator

R=golang-codereviews, gobot, r
CC=golang-codereviews
https://golang.org/cl/42310045

											
										
										
											2013-12-20 23:19:32 -08:00
+									{"with_underscore", "With_underscore"},
 									{"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
-												strings: add Title
strings.ToTitle converts all characters to title case, which for consistency with the
other To* functions it should continue to do.  This CL adds string.Title, which
does a proper title-casing of the string.
A similar function for package bytes will follow once this is settled.
Fixes #933.

R=rsc
CC=golang-dev
https://golang.org/cl/1869042

											
										
										
											2010-07-20 00:03:59 -07:00
+								}
 								func TestTitle(t *testing.T) {
 									for _, tt := range TitleTests {
 										if s := Title(tt.in); s != tt.out {
 											t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
 										}
 									}
 								}
-												strings: Contains

Tiny helper to avoid strings.Index(s, sub) != -1

R=rsc, r2, r
CC=golang-dev
https://golang.org/cl/2265044

											
										
										
											2010-11-01 14:32:48 -07:00
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var ContainsTests = []struct {
-												strings: Contains

Tiny helper to avoid strings.Index(s, sub) != -1

R=rsc, r2, r
CC=golang-dev
https://golang.org/cl/2265044

											
										
										
											2010-11-01 14:32:48 -07:00
+									str, substr string
 									expected    bool
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}{
-												strings: Contains

Tiny helper to avoid strings.Index(s, sub) != -1

R=rsc, r2, r
CC=golang-dev
https://golang.org/cl/2265044

											
										
										
											2010-11-01 14:32:48 -07:00
+									{"abc", "bc", true},
 									{"abc", "bcd", false},
 									{"abc", "", true},
 									{"", "a", false},
-												strings: fix Contains on amd64

The 17-31 byte code is broken.  Disabled it.

Added a bunch of tests to at least cover the cases
in indexShortStr.  I'll channel Brad and wonder why
this CL ever got in without any tests.

Fixes #15679

Change-Id: I84a7b283a74107db865b9586c955dcf5f2d60161
Reviewed-on: https://go-review.googlesource.com/23106
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2016-05-14 17:33:23 -07:00
 									// cases to cover code in runtime/asm_amd64.s:indexShortStr
 									// 2-byte needle
 									{"xxxxxx", "01", false},
 									{"01xxxx", "01", true},
 									{"xx01xx", "01", true},
 									{"xxxx01", "01", true},
 									{"01xxxxx"[1:], "01", false},
 									{"xxxxx01"[:6], "01", false},
 									// 3-byte needle
 									{"xxxxxxx", "012", false},
 									{"012xxxx", "012", true},
 									{"xx012xx", "012", true},
 									{"xxxx012", "012", true},
 									{"012xxxxx"[1:], "012", false},
 									{"xxxxx012"[:7], "012", false},
 									// 4-byte needle
 									{"xxxxxxxx", "0123", false},
 									{"0123xxxx", "0123", true},
 									{"xx0123xx", "0123", true},
 									{"xxxx0123", "0123", true},
 									{"0123xxxxx"[1:], "0123", false},
 									{"xxxxx0123"[:8], "0123", false},
 									// 5-7-byte needle
 									{"xxxxxxxxx", "01234", false},
 									{"01234xxxx", "01234", true},
 									{"xx01234xx", "01234", true},
 									{"xxxx01234", "01234", true},
 									{"01234xxxxx"[1:], "01234", false},
 									{"xxxxx01234"[:9], "01234", false},
 									// 8-byte needle
 									{"xxxxxxxxxxxx", "01234567", false},
 									{"01234567xxxx", "01234567", true},
 									{"xx01234567xx", "01234567", true},
 									{"xxxx01234567", "01234567", true},
 									{"01234567xxxxx"[1:], "01234567", false},
 									{"xxxxx01234567"[:12], "01234567", false},
 									// 9-15-byte needle
 									{"xxxxxxxxxxxxx", "012345678", false},
 									{"012345678xxxx", "012345678", true},
 									{"xx012345678xx", "012345678", true},
 									{"xxxx012345678", "012345678", true},
 									{"012345678xxxxx"[1:], "012345678", false},
 									{"xxxxx012345678"[:13], "012345678", false},
 									// 16-byte needle
 									{"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false},
 									{"0123456789ABCDEFxxxx", "0123456789ABCDEF", true},
 									{"xx0123456789ABCDEFxx", "0123456789ABCDEF", true},
 									{"xxxx0123456789ABCDEF", "0123456789ABCDEF", true},
 									{"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false},
 									{"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false},
 									// 17-31-byte needle
 									{"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false},
 									{"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true},
 									{"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true},
 									{"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true},
 									{"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false},
 									{"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false},
 									// partial match cases
 									{"xx01x", "012", false},                             // 3
 									{"xx0123x", "01234", false},                         // 5-7
 									{"xx01234567x", "012345678", false},                 // 9-15
 									{"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679
-												strings: Contains

Tiny helper to avoid strings.Index(s, sub) != -1

R=rsc, r2, r
CC=golang-dev
https://golang.org/cl/2265044

											
										
										
											2010-11-01 14:32:48 -07:00
+								}
 								func TestContains(t *testing.T) {
 									for _, ct := range ContainsTests {
 										if Contains(ct.str, ct.substr) != ct.expected {
 											t.Errorf("Contains(%s, %s) = %v, want %v",
 												ct.str, ct.substr, !ct.expected, ct.expected)
 										}
 									}
 								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
-												strings: Add ContainsAny and ContainsRune to correspond to IndexAny etc.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5430046

											
										
										
											2011-11-23 20:20:14 -08:00
+								var ContainsAnyTests = []struct {
 									str, substr string
 									expected    bool
 								}{
 									{"", "", false},
 									{"", "a", false},
 									{"", "abc", false},
 									{"a", "", false},
 									{"a", "a", true},
 									{"aaa", "a", true},
 									{"abc", "xyz", false},
 									{"abc", "xcz", true},
 									{"a☺b☻c☹d", "uvw☻xyz", true},
 									{"aRegExp*", ".(|)*+?^$[]", true},
 									{dots + dots + dots, " ", false},
 								}
 								func TestContainsAny(t *testing.T) {
 									for _, ct := range ContainsAnyTests {
 										if ContainsAny(ct.str, ct.substr) != ct.expected {
 											t.Errorf("ContainsAny(%s, %s) = %v, want %v",
 												ct.str, ct.substr, !ct.expected, ct.expected)
 										}
 									}
 								}
 								var ContainsRuneTests = []struct {
 									str      string
 									r        rune
 									expected bool
 								}{
 									{"", 'a', false},
 									{"a", 'a', true},
 									{"aaa", 'a', true},
 									{"abc", 'y', false},
 									{"abc", 'c', true},
 									{"a☺b☻c☹d", 'x', false},
 									{"a☺b☻c☹d", '☻', true},
 									{"aRegExp*", '*', true},
 								}
 								func TestContainsRune(t *testing.T) {
 									for _, ct := range ContainsRuneTests {
 										if ContainsRune(ct.str, ct.r) != ct.expected {
-												all: fix a few more printf arg bugs found by go vet

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7413045

											
										
										
											2013-02-28 11:33:08 -08:00
+											t.Errorf("ContainsRune(%q, %q) = %v, want %v",
-												strings: Add ContainsAny and ContainsRune to correspond to IndexAny etc.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5430046

											
										
										
											2011-11-23 20:20:14 -08:00
+												ct.str, ct.r, !ct.expected, ct.expected)
 										}
 									}
 								}
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								var EqualFoldTests = []struct {
 									s, t string
 									out  bool
 								}{
 									{"abc", "abc", true},
 									{"ABcd", "ABcd", true},
 									{"123abc", "123ABC", true},
 									{"αβδ", "ΑΒΔ", true},
 									{"abc", "xyz", false},
 									{"abc", "XYZ", false},
 									{"abcdefghijk", "abcdefghijX", false},
 									{"abcdefghijk", "abcdefghij\u212A", true},
 									{"abcdefghijK", "abcdefghij\u212A", true},
 									{"abcdefghijkz", "abcdefghij\u212Ay", false},
 									{"abcdefghijKz", "abcdefghij\u212Ay", false},
-												bytes, strings: improve EqualFold fast version for ASCII

The existing implementation only considers the special ASCII
case when the lower character is an upper case letter. This
means that most ASCII comparisons use unicode.SimpleFold even
when it is not necessary.

benchmark                old ns/op     new ns/op     delta
BenchmarkEqualFold-8     450           390           -13.33%

Change-Id: I735ca3c30fc0145c186d2a54f31fd39caab2c3fa
Reviewed-on: https://go-review.googlesource.com/110018
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-04-29 00:15:03 -04:00
+									{"1", "2", false},
 									{"utf-8", "US-ASCII", false},
-												strings: add EqualFold

Case-insensitive strcmp without using ToLower.
(Using ToLower is not always correct, and it allocates.)

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5143044

											
										
										
											2011-09-26 18:32:51 -04:00
+								}
 								func TestEqualFold(t *testing.T) {
 									for _, tt := range EqualFoldTests {
 										if out := EqualFold(tt.s, tt.t); out != tt.out {
 											t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
 										}
 										if out := EqualFold(tt.t, tt.s); out != tt.out {
 											t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
 										}
 									}
 								}
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
-												bytes, strings: improve EqualFold fast version for ASCII

The existing implementation only considers the special ASCII
case when the lower character is an upper case letter. This
means that most ASCII comparisons use unicode.SimpleFold even
when it is not necessary.

benchmark                old ns/op     new ns/op     delta
BenchmarkEqualFold-8     450           390           -13.33%

Change-Id: I735ca3c30fc0145c186d2a54f31fd39caab2c3fa
Reviewed-on: https://go-review.googlesource.com/110018
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-04-29 00:15:03 -04:00
+								func BenchmarkEqualFold(b *testing.B) {
 									for i := 0; i < b.N; i++ {
 										for _, tt := range EqualFoldTests {
 											if out := EqualFold(tt.s, tt.t); out != tt.out {
 												b.Fatal("wrong result")
 											}
 										}
 									}
 								}
-												strings: add test for Count

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12541050

											
										
										
											2013-08-09 12:51:21 -07:00
+								var CountTests = []struct {
 									s, sep string
 									num    int
 								}{
 									{"", "", 1},
 									{"", "notempty", 0},
 									{"notempty", "", 9},
 									{"smaller", "not smaller", 0},
 									{"12345678987654321", "6", 2},
 									{"611161116", "6", 3},
 									{"notequal", "NotEqual", 0},
 									{"equal", "equal", 1},
 									{"abc1231231123q", "123", 3},
 									{"11111", "11", 2},
 								}
 								func TestCount(t *testing.T) {
 									for _, tt := range CountTests {
 										if num := Count(tt.s, tt.sep); num != tt.num {
 											t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num)
 										}
 									}
 								}
-												strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically
by using Rabin-Karp algorithm, which provides the needed complexity
unless exceptional collisions occur, without memory allocation.

benchmark                 old ns/op    new ns/op    delta
BenchmarkIndexHard1         6532331      4045886  -38.06%
BenchmarkIndexHard2         8178173      4038975  -50.61%
BenchmarkIndexHard3         6973687      4042591  -42.03%
BenchmarkCountHard1         6270864      4071090  -35.08%
BenchmarkCountHard2         7838039      4072853  -48.04%
BenchmarkCountHard3         6697828      4071964  -39.20%
BenchmarkIndexTorture       2730546        28934  -98.94%
BenchmarkCountTorture       2729622        29064  -98.94%

Fixes #4600.

R=rsc, donovanhide, remyoudompheng
CC=golang-dev
https://golang.org/cl/7314095

											
										
										
											2013-02-17 13:07:17 +01:00
+								func makeBenchInputHard() string {
 									tokens := [...]string{
 										"<a>", "<p>", "<b>", "<strong>",
 										"</a>", "</p>", "</b>", "</strong>",
 										"hello", "world",
 									}
 									x := make([]byte, 0, 1<<20)
-												strings: avoid pointless slice growth in makeBenchInputHard

LGTM=ruiu
R=golang-codereviews, ruiu
CC=golang-codereviews
https://golang.org/cl/108150043

											
										
										
											2014-06-26 13:00:47 -07:00
+									for {
-												strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically
by using Rabin-Karp algorithm, which provides the needed complexity
unless exceptional collisions occur, without memory allocation.

benchmark                 old ns/op    new ns/op    delta
BenchmarkIndexHard1         6532331      4045886  -38.06%
BenchmarkIndexHard2         8178173      4038975  -50.61%
BenchmarkIndexHard3         6973687      4042591  -42.03%
BenchmarkCountHard1         6270864      4071090  -35.08%
BenchmarkCountHard2         7838039      4072853  -48.04%
BenchmarkCountHard3         6697828      4071964  -39.20%
BenchmarkIndexTorture       2730546        28934  -98.94%
BenchmarkCountTorture       2729622        29064  -98.94%

Fixes #4600.

R=rsc, donovanhide, remyoudompheng
CC=golang-dev
https://golang.org/cl/7314095

											
										
										
											2013-02-17 13:07:17 +01:00
+										i := rand.Intn(len(tokens))
-												strings: avoid pointless slice growth in makeBenchInputHard

LGTM=ruiu
R=golang-codereviews, ruiu
CC=golang-codereviews
https://golang.org/cl/108150043

											
										
										
											2014-06-26 13:00:47 -07:00
+										if len(x)+len(tokens[i]) >= 1<<20 {
 											break
 										}
-												strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically
by using Rabin-Karp algorithm, which provides the needed complexity
unless exceptional collisions occur, without memory allocation.

benchmark                 old ns/op    new ns/op    delta
BenchmarkIndexHard1         6532331      4045886  -38.06%
BenchmarkIndexHard2         8178173      4038975  -50.61%
BenchmarkIndexHard3         6973687      4042591  -42.03%
BenchmarkCountHard1         6270864      4071090  -35.08%
BenchmarkCountHard2         7838039      4072853  -48.04%
BenchmarkCountHard3         6697828      4071964  -39.20%
BenchmarkIndexTorture       2730546        28934  -98.94%
BenchmarkCountTorture       2729622        29064  -98.94%

Fixes #4600.

R=rsc, donovanhide, remyoudompheng
CC=golang-dev
https://golang.org/cl/7314095

											
										
										
											2013-02-17 13:07:17 +01:00
+										x = append(x, tokens[i]...)
 									}
 									return string(x)
 								}
 								var benchInputHard = makeBenchInputHard()
 								func benchmarkIndexHard(b *testing.B, sep string) {
 									for i := 0; i < b.N; i++ {
 										Index(benchInputHard, sep)
 									}
 								}
-												strings: use Rabin-Karp algorithm for LastIndex.

benchmark                  old ns/op     new ns/op     delta
BenchmarkSingleMatch       49443         52275         +5.73%
BenchmarkIndex             28.8          27.4          -4.86%
BenchmarkLastIndex         14.5          14.0          -3.45%
BenchmarkLastIndexHard1    3982782       2309200       -42.02%
BenchmarkLastIndexHard2    3985562       2287715       -42.60%
BenchmarkLastIndexHard3    3555259       2282866       -35.79%

LGTM=josharian, nigeltao
R=golang-codereviews, ality, josharian, bradfitz, dave, nigeltao, gobot, nightlyone
CC=golang-codereviews
https://golang.org/cl/102560043

											
										
										
											2014-09-01 17:47:57 +10:00
+								func benchmarkLastIndexHard(b *testing.B, sep string) {
 									for i := 0; i < b.N; i++ {
 										LastIndex(benchInputHard, sep)
 									}
 								}
-												strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically
by using Rabin-Karp algorithm, which provides the needed complexity
unless exceptional collisions occur, without memory allocation.

benchmark                 old ns/op    new ns/op    delta
BenchmarkIndexHard1         6532331      4045886  -38.06%
BenchmarkIndexHard2         8178173      4038975  -50.61%
BenchmarkIndexHard3         6973687      4042591  -42.03%
BenchmarkCountHard1         6270864      4071090  -35.08%
BenchmarkCountHard2         7838039      4072853  -48.04%
BenchmarkCountHard3         6697828      4071964  -39.20%
BenchmarkIndexTorture       2730546        28934  -98.94%
BenchmarkCountTorture       2729622        29064  -98.94%

Fixes #4600.

R=rsc, donovanhide, remyoudompheng
CC=golang-dev
https://golang.org/cl/7314095

											
										
										
											2013-02-17 13:07:17 +01:00
+								func benchmarkCountHard(b *testing.B, sep string) {
 									for i := 0; i < b.N; i++ {
 										Count(benchInputHard, sep)
 									}
 								}
 								func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
 								func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
 								func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
-												strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2016-04-28 17:39:55 +03:00
+								func BenchmarkIndexHard4(b *testing.B) {
 									benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
 								}
-												strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically
by using Rabin-Karp algorithm, which provides the needed complexity
unless exceptional collisions occur, without memory allocation.

benchmark                 old ns/op    new ns/op    delta
BenchmarkIndexHard1         6532331      4045886  -38.06%
BenchmarkIndexHard2         8178173      4038975  -50.61%
BenchmarkIndexHard3         6973687      4042591  -42.03%
BenchmarkCountHard1         6270864      4071090  -35.08%
BenchmarkCountHard2         7838039      4072853  -48.04%
BenchmarkCountHard3         6697828      4071964  -39.20%
BenchmarkIndexTorture       2730546        28934  -98.94%
BenchmarkCountTorture       2729622        29064  -98.94%

Fixes #4600.

R=rsc, donovanhide, remyoudompheng
CC=golang-dev
https://golang.org/cl/7314095

											
										
										
											2013-02-17 13:07:17 +01:00
-												strings: use Rabin-Karp algorithm for LastIndex.

benchmark                  old ns/op     new ns/op     delta
BenchmarkSingleMatch       49443         52275         +5.73%
BenchmarkIndex             28.8          27.4          -4.86%
BenchmarkLastIndex         14.5          14.0          -3.45%
BenchmarkLastIndexHard1    3982782       2309200       -42.02%
BenchmarkLastIndexHard2    3985562       2287715       -42.60%
BenchmarkLastIndexHard3    3555259       2282866       -35.79%

LGTM=josharian, nigeltao
R=golang-codereviews, ality, josharian, bradfitz, dave, nigeltao, gobot, nightlyone
CC=golang-codereviews
https://golang.org/cl/102560043

											
										
										
											2014-09-01 17:47:57 +10:00
+								func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
 								func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
 								func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
-												strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically
by using Rabin-Karp algorithm, which provides the needed complexity
unless exceptional collisions occur, without memory allocation.

benchmark                 old ns/op    new ns/op    delta
BenchmarkIndexHard1         6532331      4045886  -38.06%
BenchmarkIndexHard2         8178173      4038975  -50.61%
BenchmarkIndexHard3         6973687      4042591  -42.03%
BenchmarkCountHard1         6270864      4071090  -35.08%
BenchmarkCountHard2         7838039      4072853  -48.04%
BenchmarkCountHard3         6697828      4071964  -39.20%
BenchmarkIndexTorture       2730546        28934  -98.94%
BenchmarkCountTorture       2729622        29064  -98.94%

Fixes #4600.

R=rsc, donovanhide, remyoudompheng
CC=golang-dev
https://golang.org/cl/7314095

											
										
										
											2013-02-17 13:07:17 +01:00
+								func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
 								func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
 								func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
 								var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
 								var benchNeedleTorture = Repeat("ABC", 1<<10+1)
 								func BenchmarkIndexTorture(b *testing.B) {
 									for i := 0; i < b.N; i++ {
 										Index(benchInputTorture, benchNeedleTorture)
 									}
 								}
 								func BenchmarkCountTorture(b *testing.B) {
 									for i := 0; i < b.N; i++ {
 										Count(benchInputTorture, benchNeedleTorture)
 									}
 								}
-												strings: faster Count, Index

Slightly better benchmarks for when string and separator are equivalent and also less branching in inner loops.
benchmark                        old ns/op    new ns/op    delta
BenchmarkGenericNoMatch               3430         3442   +0.35%
BenchmarkGenericMatch1               23590        22855   -3.12%
BenchmarkGenericMatch2              108031       105025   -2.78%
BenchmarkSingleMaxSkipping            2969         2704   -8.93%
BenchmarkSingleLongSuffixFail         2826         2572   -8.99%
BenchmarkSingleMatch                205268       197832   -3.62%
BenchmarkByteByteNoMatch               987          921   -6.69%
BenchmarkByteByteMatch                2014         1749  -13.16%
BenchmarkByteStringMatch              3083         3050   -1.07%
BenchmarkHTMLEscapeNew                 922          915   -0.76%
BenchmarkHTMLEscapeOld                1654         1570   -5.08%
BenchmarkByteByteReplaces            11897        11556   -2.87%
BenchmarkByteByteMap                  4485         4255   -5.13%
BenchmarkIndexRune                     174          121  -30.46%
BenchmarkIndexRuneFastPath              41           41   -0.24%
BenchmarkIndex                          45           44   -0.22%
BenchmarkMapNoChanges                  433          431   -0.46%
BenchmarkIndexHard1                4015336      3316490  -17.40%
BenchmarkIndexHard2                3976254      3395627  -14.60%
BenchmarkIndexHard3                3973158      3378329  -14.97%
BenchmarkCountHard1                4403549      3448512  -21.69%
BenchmarkCountHard2                4387437      3413059  -22.21%
BenchmarkCountHard3                4403891      3382661  -23.19%
BenchmarkIndexTorture                28354        25864   -8.78%
BenchmarkCountTorture                29625        27463   -7.30%
BenchmarkFields                   38752040     39169840   +1.08%
BenchmarkFieldsFunc               38797765     38888060   +0.23%

benchmark                         old MB/s     new MB/s  speedup
BenchmarkSingleMaxSkipping         3367.07      3697.62    1.10x
BenchmarkSingleLongSuffixFail       354.51       389.47    1.10x
BenchmarkSingleMatch                 73.07        75.82    1.04x
BenchmarkFields                      27.06        26.77    0.99x
BenchmarkFieldsFunc                  27.03        26.96    1.00x

R=dave, fullung, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7350045

											
										
										
											2013-02-19 10:36:15 -05:00
+								func BenchmarkCountTortureOverlapping(b *testing.B) {
 									A := Repeat("ABC", 1<<20)
 									B := Repeat("ABC", 1<<10)
 									for i := 0; i < b.N; i++ {
 										Count(A, B)
 									}
 								}
-												strings: optimize Count for amd64

Move optimized Count implementation from bytes to runtime. Use in
both bytes and strings packages.
Add CountByte benchmark to strings.

Strings benchmarks:
name                       old time/op    new time/op    delta
CountHard1-4                 226µs ± 1%      226µs ± 2%      ~     (p=0.247 n=10+10)
CountHard2-4                 316µs ± 1%      315µs ± 0%      ~     (p=0.133 n=9+10)
CountHard3-4                 919µs ± 1%      920µs ± 1%      ~     (p=0.968 n=10+9)
CountTorture-4              15.4µs ± 1%     15.7µs ± 1%    +2.47%  (p=0.000 n=10+9)
CountTortureOverlapping-4   9.60ms ± 0%     9.65ms ± 1%      ~     (p=0.247 n=10+10)
CountByte/10-4              26.3ns ± 1%     10.9ns ± 1%   -58.71%  (p=0.000 n=9+9)
CountByte/32-4              42.7ns ± 0%     14.2ns ± 0%   -66.64%  (p=0.000 n=10+10)
CountByte/4096-4            3.07µs ± 0%     0.31µs ± 2%   -89.99%  (p=0.000 n=9+10)
CountByte/4194304-4         3.48ms ± 1%     0.34ms ± 1%   -90.09%  (p=0.000 n=10+9)
CountByte/67108864-4        55.6ms ± 1%      7.0ms ± 0%   -87.49%  (p=0.000 n=9+8)

name                      old speed      new speed       delta
CountByte/10-4             380MB/s ± 1%    919MB/s ± 1%  +142.21%  (p=0.000 n=9+9)
CountByte/32-4             750MB/s ± 0%   2247MB/s ± 0%  +199.62%  (p=0.000 n=10+10)
CountByte/4096-4          1.33GB/s ± 0%  13.32GB/s ± 2%  +898.13%  (p=0.000 n=9+10)
CountByte/4194304-4       1.21GB/s ± 1%  12.17GB/s ± 1%  +908.87%  (p=0.000 n=10+9)
CountByte/67108864-4      1.21GB/s ± 1%   9.65GB/s ± 0%  +699.29%  (p=0.000 n=9+8)

Fixes #19411

Change-Id: I8d2d409f0fa6df6d03b60790aa86e540b4a4e3b0
Reviewed-on: https://go-review.googlesource.com/38693
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2017-03-27 13:22:59 +02:00
+								func BenchmarkCountByte(b *testing.B) {
 									indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
 									benchStr := Repeat(benchmarkString,
 										(indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
 									benchFunc := func(b *testing.B, benchStr string) {
 										b.SetBytes(int64(len(benchStr)))
 										for i := 0; i < b.N; i++ {
 											Count(benchStr, "=")
 										}
 									}
 									for _, size := range indexSizes {
 										b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
 											benchFunc(b, benchStr[:size])
 										})
 									}
 								}
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
+								var makeFieldsInput = func() string {
 									x := make([]byte, 1<<20)
-												gofmt: apply gofmt -w src misc

Remove trailing whitespace in comments.
No other changes.

R=r
CC=golang-dev
https://golang.org/cl/6815053

											
										
										
											2012-10-30 13:38:01 -07:00
+									// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
+									for i := range x {
 										switch rand.Intn(10) {
 										case 0:
 											x[i] = ' '
 										case 1:
 											if i > 0 && x[i-1] == 'x' {
 												copy(x[i-1:], "χ")
 												break
 											}
 											fallthrough
 										default:
 											x[i] = 'x'
 										}
 									}
 									return string(x)
 								}
-												strings: speed up Fields

- use a string lookup to detect if a single byte is a space character
- determine the exact number of fields for ASCII and
  a possibly underestimated number of fields for non ASCII strings
  by doing a separate byte for byte scan of the input string
  before collecting the fields in an extra pass
- provide a fast path for ASCII only strings when collecting the fields
- avoid utf8.DecodeRuneInString and unicode.IsSpace for ASCII characters

Used golang.org/cl/33108 from Joe Tsai as starting point.

name                      old time/op    new time/op     delta
Fields/ASCII/16              284ns ± 1%      116ns ± 2%   -59.30%  (p=0.000 n=9+10)
Fields/ASCII/256            3.81µs ± 1%     0.80µs ± 1%   -79.10%  (p=0.000 n=10+10)
Fields/ASCII/4096           61.4µs ± 1%     12.3µs ± 1%   -79.96%  (p=0.000 n=10+9)
Fields/ASCII/65536           982µs ± 1%      235µs ± 0%   -76.04%  (p=0.000 n=10+9)
Fields/ASCII/1048576        16.7ms ± 2%      5.4ms ± 1%   -67.52%  (p=0.000 n=10+10)
Fields/Mixed/16              314ns ± 1%      168ns ± 1%   -46.33%  (p=0.000 n=9+10)
Fields/Mixed/256            3.92µs ± 1%     1.17µs ± 1%   -70.19%  (p=0.000 n=10+10)
Fields/Mixed/4096           69.1µs ± 1%     19.0µs ± 1%   -72.53%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.12ms ± 1%     0.39ms ± 0%   -65.37%  (p=0.000 n=10+9)
Fields/Mixed/1048576        19.0ms ± 2%      7.3ms ± 4%   -61.75%  (p=0.000 n=10+9)

name                      old speed      new speed       delta
Fields/ASCII/16           56.3MB/s ± 1%  138.1MB/s ± 2%  +145.31%  (p=0.000 n=9+10)
Fields/ASCII/256          67.1MB/s ± 1%  321.0MB/s ± 1%  +378.26%  (p=0.000 n=10+10)
Fields/ASCII/4096         66.7MB/s ± 1%  333.0MB/s ± 1%  +398.97%  (p=0.000 n=10+9)
Fields/ASCII/65536        66.7MB/s ± 1%  278.4MB/s ± 0%  +317.39%  (p=0.000 n=10+9)
Fields/ASCII/1048576      62.7MB/s ± 2%  192.9MB/s ± 1%  +207.82%  (p=0.000 n=10+10)
Fields/Mixed/16           51.0MB/s ± 2%   94.9MB/s ± 1%   +85.87%  (p=0.000 n=10+10)
Fields/Mixed/256          65.4MB/s ± 1%  219.2MB/s ± 1%  +235.33%  (p=0.000 n=10+10)
Fields/Mixed/4096         59.3MB/s ± 1%  215.7MB/s ± 1%  +263.98%  (p=0.000 n=10+10)
Fields/Mixed/65536        58.6MB/s ± 1%  169.1MB/s ± 0%  +188.73%  (p=0.000 n=10+9)
Fields/Mixed/1048576      55.1MB/s ± 2%  144.0MB/s ± 4%  +161.44%  (p=0.000 n=10+9)

Updates #19789
Updates #17856

Change-Id: If2ce1479542702e9cd65a82a462ba55ac8eb3876
Reviewed-on: https://go-review.googlesource.com/37959
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-03-06 09:34:39 +01:00
+								var makeFieldsInputASCII = func() string {
 									x := make([]byte, 1<<20)
 									// Input is ~10% space, rest ASCII non-space.
 									for i := range x {
 										if rand.Intn(10) == 0 {
 											x[i] = ' '
 										} else {
 											x[i] = 'x'
 										}
 									}
 									return string(x)
 								}
 								var stringdata = []struct{ name, data string }{
 									{"ASCII", makeFieldsInputASCII()},
 									{"Mixed", makeFieldsInput()},
 								}
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
 								func BenchmarkFields(b *testing.B) {
-												strings: speed up Fields

- use a string lookup to detect if a single byte is a space character
- determine the exact number of fields for ASCII and
  a possibly underestimated number of fields for non ASCII strings
  by doing a separate byte for byte scan of the input string
  before collecting the fields in an extra pass
- provide a fast path for ASCII only strings when collecting the fields
- avoid utf8.DecodeRuneInString and unicode.IsSpace for ASCII characters

Used golang.org/cl/33108 from Joe Tsai as starting point.

name                      old time/op    new time/op     delta
Fields/ASCII/16              284ns ± 1%      116ns ± 2%   -59.30%  (p=0.000 n=9+10)
Fields/ASCII/256            3.81µs ± 1%     0.80µs ± 1%   -79.10%  (p=0.000 n=10+10)
Fields/ASCII/4096           61.4µs ± 1%     12.3µs ± 1%   -79.96%  (p=0.000 n=10+9)
Fields/ASCII/65536           982µs ± 1%      235µs ± 0%   -76.04%  (p=0.000 n=10+9)
Fields/ASCII/1048576        16.7ms ± 2%      5.4ms ± 1%   -67.52%  (p=0.000 n=10+10)
Fields/Mixed/16              314ns ± 1%      168ns ± 1%   -46.33%  (p=0.000 n=9+10)
Fields/Mixed/256            3.92µs ± 1%     1.17µs ± 1%   -70.19%  (p=0.000 n=10+10)
Fields/Mixed/4096           69.1µs ± 1%     19.0µs ± 1%   -72.53%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.12ms ± 1%     0.39ms ± 0%   -65.37%  (p=0.000 n=10+9)
Fields/Mixed/1048576        19.0ms ± 2%      7.3ms ± 4%   -61.75%  (p=0.000 n=10+9)

name                      old speed      new speed       delta
Fields/ASCII/16           56.3MB/s ± 1%  138.1MB/s ± 2%  +145.31%  (p=0.000 n=9+10)
Fields/ASCII/256          67.1MB/s ± 1%  321.0MB/s ± 1%  +378.26%  (p=0.000 n=10+10)
Fields/ASCII/4096         66.7MB/s ± 1%  333.0MB/s ± 1%  +398.97%  (p=0.000 n=10+9)
Fields/ASCII/65536        66.7MB/s ± 1%  278.4MB/s ± 0%  +317.39%  (p=0.000 n=10+9)
Fields/ASCII/1048576      62.7MB/s ± 2%  192.9MB/s ± 1%  +207.82%  (p=0.000 n=10+10)
Fields/Mixed/16           51.0MB/s ± 2%   94.9MB/s ± 1%   +85.87%  (p=0.000 n=10+10)
Fields/Mixed/256          65.4MB/s ± 1%  219.2MB/s ± 1%  +235.33%  (p=0.000 n=10+10)
Fields/Mixed/4096         59.3MB/s ± 1%  215.7MB/s ± 1%  +263.98%  (p=0.000 n=10+10)
Fields/Mixed/65536        58.6MB/s ± 1%  169.1MB/s ± 0%  +188.73%  (p=0.000 n=10+9)
Fields/Mixed/1048576      55.1MB/s ± 2%  144.0MB/s ± 4%  +161.44%  (p=0.000 n=10+9)

Updates #19789
Updates #17856

Change-Id: If2ce1479542702e9cd65a82a462ba55ac8eb3876
Reviewed-on: https://go-review.googlesource.com/37959
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-03-06 09:34:39 +01:00
+									for _, sd := range stringdata {
 										b.Run(sd.name, func(b *testing.B) {
 											for j := 1 << 4; j <= 1<<20; j <<= 4 {
 												b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
 													b.ReportAllocs()
 													b.SetBytes(int64(j))
 													data := sd.data[:j]
 													for i := 0; i < b.N; i++ {
 														Fields(data)
 													}
 												})
 											}
 										})
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
+									}
 								}
 								func BenchmarkFieldsFunc(b *testing.B) {
-												strings: speed up Fields

- use a string lookup to detect if a single byte is a space character
- determine the exact number of fields for ASCII and
  a possibly underestimated number of fields for non ASCII strings
  by doing a separate byte for byte scan of the input string
  before collecting the fields in an extra pass
- provide a fast path for ASCII only strings when collecting the fields
- avoid utf8.DecodeRuneInString and unicode.IsSpace for ASCII characters

Used golang.org/cl/33108 from Joe Tsai as starting point.

name                      old time/op    new time/op     delta
Fields/ASCII/16              284ns ± 1%      116ns ± 2%   -59.30%  (p=0.000 n=9+10)
Fields/ASCII/256            3.81µs ± 1%     0.80µs ± 1%   -79.10%  (p=0.000 n=10+10)
Fields/ASCII/4096           61.4µs ± 1%     12.3µs ± 1%   -79.96%  (p=0.000 n=10+9)
Fields/ASCII/65536           982µs ± 1%      235µs ± 0%   -76.04%  (p=0.000 n=10+9)
Fields/ASCII/1048576        16.7ms ± 2%      5.4ms ± 1%   -67.52%  (p=0.000 n=10+10)
Fields/Mixed/16              314ns ± 1%      168ns ± 1%   -46.33%  (p=0.000 n=9+10)
Fields/Mixed/256            3.92µs ± 1%     1.17µs ± 1%   -70.19%  (p=0.000 n=10+10)
Fields/Mixed/4096           69.1µs ± 1%     19.0µs ± 1%   -72.53%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.12ms ± 1%     0.39ms ± 0%   -65.37%  (p=0.000 n=10+9)
Fields/Mixed/1048576        19.0ms ± 2%      7.3ms ± 4%   -61.75%  (p=0.000 n=10+9)

name                      old speed      new speed       delta
Fields/ASCII/16           56.3MB/s ± 1%  138.1MB/s ± 2%  +145.31%  (p=0.000 n=9+10)
Fields/ASCII/256          67.1MB/s ± 1%  321.0MB/s ± 1%  +378.26%  (p=0.000 n=10+10)
Fields/ASCII/4096         66.7MB/s ± 1%  333.0MB/s ± 1%  +398.97%  (p=0.000 n=10+9)
Fields/ASCII/65536        66.7MB/s ± 1%  278.4MB/s ± 0%  +317.39%  (p=0.000 n=10+9)
Fields/ASCII/1048576      62.7MB/s ± 2%  192.9MB/s ± 1%  +207.82%  (p=0.000 n=10+10)
Fields/Mixed/16           51.0MB/s ± 2%   94.9MB/s ± 1%   +85.87%  (p=0.000 n=10+10)
Fields/Mixed/256          65.4MB/s ± 1%  219.2MB/s ± 1%  +235.33%  (p=0.000 n=10+10)
Fields/Mixed/4096         59.3MB/s ± 1%  215.7MB/s ± 1%  +263.98%  (p=0.000 n=10+10)
Fields/Mixed/65536        58.6MB/s ± 1%  169.1MB/s ± 0%  +188.73%  (p=0.000 n=10+9)
Fields/Mixed/1048576      55.1MB/s ± 2%  144.0MB/s ± 4%  +161.44%  (p=0.000 n=10+9)

Updates #19789
Updates #17856

Change-Id: If2ce1479542702e9cd65a82a462ba55ac8eb3876
Reviewed-on: https://go-review.googlesource.com/37959
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

											
										
										
											2017-03-06 09:34:39 +01:00
+									for _, sd := range stringdata {
 										b.Run(sd.name, func(b *testing.B) {
 											for j := 1 << 4; j <= 1<<20; j <<= 4 {
 												b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
 													b.ReportAllocs()
 													b.SetBytes(int64(j))
 													data := sd.data[:j]
 													for i := 0; i < b.N; i++ {
 														FieldsFunc(data, unicode.IsSpace)
 													}
 												})
 											}
 										})
-												bytes, strings: add Fields benchmarks

The performance changes will be a few different CLs.
Start with benchmarks as a baseline.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/6537043

											
										
										
											2012-09-18 15:02:08 -04:00
+									}
 								}
-												strings: remove allocations in Split(s, "")

BenchmarkSplit1     77984460     24131380  -69.06%

R=golang-dev, rsc, minux.ma, dave, extemporalgenome
CC=golang-dev
https://golang.org/cl/7458043

											
										
										
											2013-03-06 15:21:19 -05:00
-												bytes, strings: optimize Split*

The relevant benchmark results on linux/amd64:

bytes:

SplitSingleByteSeparator-4   25.7ms ± 5%   9.1ms ± 4%  -64.40%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.8ms ±20%   4.3ms ± 8%  -69.23%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.88µs ± 9%  0.88µs ± 4%  -53.25%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   4.83µs ±10%  1.32µs ± 9%  -72.65%  (p=0.000 n=10+10)

strings:

name                         old time/op  new time/op  delta
SplitSingleByteSeparator-4   21.4ms ± 8%   8.5ms ± 5%  -60.19%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.2ms ± 9%   3.9ms ± 4%  -70.29%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.54µs ± 5%  0.75µs ± 7%  -51.21%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   3.57µs ± 8%  1.01µs ±11%  -71.76%  (p=0.000 n=10+10)

Fixes #18973

Change-Id: Ie4bc010c6cc389001e72eab530497c81e5b26f34
Reviewed-on: https://go-review.googlesource.com/36510
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-02-07 13:38:52 +02:00
+								func BenchmarkSplitEmptySeparator(b *testing.B) {
-												strings: remove allocations in Split(s, "")

BenchmarkSplit1     77984460     24131380  -69.06%

R=golang-dev, rsc, minux.ma, dave, extemporalgenome
CC=golang-dev
https://golang.org/cl/7458043

											
										
										
											2013-03-06 15:21:19 -05:00
+									for i := 0; i < b.N; i++ {
 										Split(benchInputHard, "")
 									}
 								}
-												bytes, strings: optimize Split*

The relevant benchmark results on linux/amd64:

bytes:

SplitSingleByteSeparator-4   25.7ms ± 5%   9.1ms ± 4%  -64.40%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.8ms ±20%   4.3ms ± 8%  -69.23%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.88µs ± 9%  0.88µs ± 4%  -53.25%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   4.83µs ±10%  1.32µs ± 9%  -72.65%  (p=0.000 n=10+10)

strings:

name                         old time/op  new time/op  delta
SplitSingleByteSeparator-4   21.4ms ± 8%   8.5ms ± 5%  -60.19%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.2ms ± 9%   3.9ms ± 4%  -70.29%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.54µs ± 5%  0.75µs ± 7%  -51.21%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   3.57µs ± 8%  1.01µs ±11%  -71.76%  (p=0.000 n=10+10)

Fixes #18973

Change-Id: Ie4bc010c6cc389001e72eab530497c81e5b26f34
Reviewed-on: https://go-review.googlesource.com/36510
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-02-07 13:38:52 +02:00
+								func BenchmarkSplitSingleByteSeparator(b *testing.B) {
-												strings: remove allocations in Split(s, "")

BenchmarkSplit1     77984460     24131380  -69.06%

R=golang-dev, rsc, minux.ma, dave, extemporalgenome
CC=golang-dev
https://golang.org/cl/7458043

											
										
										
											2013-03-06 15:21:19 -05:00
+									for i := 0; i < b.N; i++ {
 										Split(benchInputHard, "/")
 									}
 								}
-												bytes, strings: optimize Split*

The relevant benchmark results on linux/amd64:

bytes:

SplitSingleByteSeparator-4   25.7ms ± 5%   9.1ms ± 4%  -64.40%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.8ms ±20%   4.3ms ± 8%  -69.23%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.88µs ± 9%  0.88µs ± 4%  -53.25%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   4.83µs ±10%  1.32µs ± 9%  -72.65%  (p=0.000 n=10+10)

strings:

name                         old time/op  new time/op  delta
SplitSingleByteSeparator-4   21.4ms ± 8%   8.5ms ± 5%  -60.19%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.2ms ± 9%   3.9ms ± 4%  -70.29%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.54µs ± 5%  0.75µs ± 7%  -51.21%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   3.57µs ± 8%  1.01µs ±11%  -71.76%  (p=0.000 n=10+10)

Fixes #18973

Change-Id: Ie4bc010c6cc389001e72eab530497c81e5b26f34
Reviewed-on: https://go-review.googlesource.com/36510
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-02-07 13:38:52 +02:00
+								func BenchmarkSplitMultiByteSeparator(b *testing.B) {
-												strings: remove allocations in Split(s, "")

BenchmarkSplit1     77984460     24131380  -69.06%

R=golang-dev, rsc, minux.ma, dave, extemporalgenome
CC=golang-dev
https://golang.org/cl/7458043

											
										
										
											2013-03-06 15:21:19 -05:00
+									for i := 0; i < b.N; i++ {
 										Split(benchInputHard, "hello")
 									}
 								}
-												bytes, strings: optimize Repeat

Call copy with as large buffer as possible to reduce the
number of function calls.

benchmark                 old ns/op    new ns/op    delta
BenchmarkBytesRepeat            540          162  -70.00%
BenchmarkStringsRepeat          563          177  -68.56%

LGTM=josharian
R=golang-codereviews, josharian, dave, dvyukov
CC=golang-codereviews
https://golang.org/cl/90550043

											
										
										
											2014-06-11 19:03:59 -07:00
-												bytes, strings: optimize Split*

The relevant benchmark results on linux/amd64:

bytes:

SplitSingleByteSeparator-4   25.7ms ± 5%   9.1ms ± 4%  -64.40%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.8ms ±20%   4.3ms ± 8%  -69.23%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.88µs ± 9%  0.88µs ± 4%  -53.25%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   4.83µs ±10%  1.32µs ± 9%  -72.65%  (p=0.000 n=10+10)

strings:

name                         old time/op  new time/op  delta
SplitSingleByteSeparator-4   21.4ms ± 8%   8.5ms ± 5%  -60.19%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.2ms ± 9%   3.9ms ± 4%  -70.29%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.54µs ± 5%  0.75µs ± 7%  -51.21%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   3.57µs ± 8%  1.01µs ±11%  -71.76%  (p=0.000 n=10+10)

Fixes #18973

Change-Id: Ie4bc010c6cc389001e72eab530497c81e5b26f34
Reviewed-on: https://go-review.googlesource.com/36510
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-02-07 13:38:52 +02:00
+								func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
 									for i := 0; i < b.N; i++ {
 										SplitN(benchInputHard, "/", 10)
 									}
 								}
 								func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
 									for i := 0; i < b.N; i++ {
 										SplitN(benchInputHard, "hello", 10)
 									}
 								}
-												bytes, strings: optimize Repeat

Call copy with as large buffer as possible to reduce the
number of function calls.

benchmark                 old ns/op    new ns/op    delta
BenchmarkBytesRepeat            540          162  -70.00%
BenchmarkStringsRepeat          563          177  -68.56%

LGTM=josharian
R=golang-codereviews, josharian, dave, dvyukov
CC=golang-codereviews
https://golang.org/cl/90550043

											
										
										
											2014-06-11 19:03:59 -07:00
+								func BenchmarkRepeat(b *testing.B) {
 									for i := 0; i < b.N; i++ {
 										Repeat("-", 80)
 									}
 								}
-												bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>

											
										
										
											2016-10-20 03:16:22 -07:00
 								func BenchmarkIndexAnyASCII(b *testing.B) {
 									x := Repeat("#", 4096) // Never matches set
 									cs := "0123456789abcdef"
 									for k := 1; k <= 4096; k <<= 4 {
 										for j := 1; j <= 16; j <<= 1 {
 											b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
 												for i := 0; i < b.N; i++ {
 													IndexAny(x[:k], cs[:j])
 												}
 											})
 										}
 									}
 								}
 								func BenchmarkTrimASCII(b *testing.B) {
 									cs := "0123456789abcdef"
 									for k := 1; k <= 4096; k <<= 4 {
 										for j := 1; j <= 16; j <<= 1 {
 											b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
 												x := Repeat(cs[:j], k) // Always matches set
 												for i := 0; i < b.N; i++ {
 													Trim(x[:k], cs[:j])
 												}
 											})
 										}
 									}
 								}
-												bytes,strings: in generic Index, use mix of IndexByte and Rabin-Karp

Use IndexByte first, as it allows us to skip lots of bytes quickly.
If IndexByte is generating a lot of false positives, switch over to Rabin-Karp.

Experiments for ppc64le
bytes:
name                             old time/op  new time/op  delta
IndexPeriodic/IndexPeriodic2-2   1.12ms ± 0%  0.18ms ± 0%  -83.54%  (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic4-2    635µs ± 0%   184µs ± 0%  -71.06%  (p=0.000 n=9+9)
IndexPeriodic/IndexPeriodic8-2    289µs ± 0%   184µs ± 0%  -36.51%  (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic16-2   133µs ± 0%   183µs ± 0%  +37.68%  (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic32-2  68.3µs ± 0%  70.2µs ± 0%   +2.76%  (p=0.000 n=10+10)
IndexPeriodic/IndexPeriodic64-2  35.8µs ± 0%  36.6µs ± 0%   +2.17%  (p=0.000 n=8+10)

strings:
name                             old time/op  new time/op  delta
IndexPeriodic/IndexPeriodic2-2    184µs ± 0%   184µs ± 0%   +0.11%  (p=0.029 n=4+4)
IndexPeriodic/IndexPeriodic4-2    184µs ± 0%   184µs ± 0%     ~     (p=0.886 n=4+4)
IndexPeriodic/IndexPeriodic8-2    184µs ± 0%   184µs ± 0%     ~     (p=0.486 n=4+4)
IndexPeriodic/IndexPeriodic16-2   185µs ± 1%   184µs ± 0%     ~     (p=0.343 n=4+4)
IndexPeriodic/IndexPeriodic32-2   184µs ± 0%    69µs ± 0%  -62.37%  (p=0.029 n=4+4)
IndexPeriodic/IndexPeriodic64-2   184µs ± 0%    37µs ± 0%  -80.17%  (p=0.029 n=4+4)

Fixes #22578

Change-Id: If2a4d8554cb96bfd699b58149d13ac294615f8b8
Reviewed-on: https://go-review.googlesource.com/76070
Reviewed-by: Alberto Donizetti <alb.donizetti@gmail.com>

											
										
										
											2017-11-04 10:19:53 -07:00
 								func BenchmarkIndexPeriodic(b *testing.B) {
 									key := "aa"
 									for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
 										b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
 											s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
 											for i := 0; i < b.N; i++ {
 												Index(s, key)
 											}
 										})
 									}
 								}