strings: avoid utf8.RuneError mangling in Split

Split should only split strings and not perform mangling
of invalid UTF-8 into ut8.RuneError.
The prior behavior is clearly a bug since mangling is not
performed in all other situations (e.g., separator is non-empty).

Fixes #53511

Change-Id: I112a2ef15ee46ddecda015ee14bca04cd76adfbf
Reviewed-on: https://go-review.googlesource.com/c/go/+/413715
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Joe Tsai 2022-06-22 20:57:50 -07:00 committed by Joseph Tsai
parent ced4d6fd2d
commit 9a4685f220
3 changed files with 6 additions and 5 deletions

View file

@ -15,7 +15,7 @@ import (
// explode splits s into a slice of UTF-8 strings,
// one string per Unicode character up to a maximum of n (n < 0 means no limit).
// Invalid UTF-8 sequences become correct encodings of U+FFFD.
// Invalid UTF-8 bytes are sliced individually.
func explode(s string, n int) []string {
l := utf8.RuneCountInString(s)
if n < 0 || n > l {
@ -23,12 +23,9 @@ func explode(s string, n int) []string {
}
a := make([]string, n)
for i := 0; i < n-1; i++ {
ch, size := utf8.DecodeRuneInString(s)
_, size := utf8.DecodeRuneInString(s)
a[i] = s[:size]
s = s[size:]
if ch == utf8.RuneError {
a[i] = string(utf8.RuneError)
}
}
if n > 0 {
a[n-1] = s