strings: better mean complexity for Count and Index.

The O(n+m) complexity is obtained probabilistically by using Rabin-Karp algorithm, which provides the needed complexity unless exceptional collisions occur, without memory allocation. benchmark old ns/op new ns/op delta BenchmarkIndexHard1 6532331 4045886 -38.06% BenchmarkIndexHard2 8178173 4038975 -50.61% BenchmarkIndexHard3 6973687 4042591 -42.03% BenchmarkCountHard1 6270864 4071090 -35.08% BenchmarkCountHard2 7838039 4072853 -48.04% BenchmarkCountHard3 6697828 4071964 -39.20% BenchmarkIndexTorture 2730546 28934 -98.94% BenchmarkCountTorture 2729622 29064 -98.94% Fixes #4600. R=rsc, donovanhide, remyoudompheng CC=golang-dev https://golang.org/cl/7314095
2025-12-08 06:10:04 +00:00 · 2013-02-17 13:07:17 +01:00 · 2013-02-17 13:07:17 +01:00 · 23093f86ee
commit 23093f86ee
parent f1037f0e86
2 changed files with 109 additions and 8 deletions
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@ -1001,6 +1001,57 @@ func TestEqualFold(t *testing.T) {
 	}
 }

+func makeBenchInputHard() string {
+	tokens := [...]string{
+		"<a>", "<p>", "<b>", "<strong>",
+		"</a>", "</p>", "</b>", "</strong>",
+		"hello", "world",
+	}
+	x := make([]byte, 0, 1<<20)
+	for len(x) < 1<<20 {
+		i := rand.Intn(len(tokens))
+		x = append(x, tokens[i]...)
+	}
+	return string(x)
+}
+
+var benchInputHard = makeBenchInputHard()
+
+func benchmarkIndexHard(b *testing.B, sep string) {
+	for i := 0; i < b.N; i++ {
+		Index(benchInputHard, sep)
+	}
+}
+
+func benchmarkCountHard(b *testing.B, sep string) {
+	for i := 0; i < b.N; i++ {
+		Count(benchInputHard, sep)
+	}
+}
+
+func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
+func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
+func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
+
+func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
+func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
+func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
+
+var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
+var benchNeedleTorture = Repeat("ABC", 1<<10+1)
+
+func BenchmarkIndexTorture(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Index(benchInputTorture, benchNeedleTorture)
+	}
+}
+
+func BenchmarkCountTorture(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Count(benchInputTorture, benchNeedleTorture)
+	}
+}
+
 var makeFieldsInput = func() string {
 	x := make([]byte, 1<<20)
 	// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.