2008-09-12 16:12:20 -07:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2009-03-05 17:02:34 -08:00
|
|
|
// A package of simple functions to manipulate strings.
|
2008-09-12 16:12:20 -07:00
|
|
|
package strings
|
|
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
import (
|
|
|
|
|
"unicode";
|
|
|
|
|
"utf8";
|
|
|
|
|
)
|
2008-09-12 16:12:20 -07:00
|
|
|
|
2009-06-24 19:02:29 -07:00
|
|
|
// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n <= 0 means no limit).
|
2009-03-05 17:02:34 -08:00
|
|
|
// Invalid UTF-8 sequences become correct encodings of U+FFF8.
|
2009-06-24 19:02:29 -07:00
|
|
|
func explode(s string, n int) []string {
|
|
|
|
|
if n <= 0 {
|
|
|
|
|
n = len(s);
|
|
|
|
|
}
|
|
|
|
|
a := make([]string, n);
|
2008-12-04 21:00:34 -08:00
|
|
|
var size, rune int;
|
2009-06-24 19:02:29 -07:00
|
|
|
na := 0;
|
2009-05-11 14:10:34 -07:00
|
|
|
for len(s) > 0 {
|
2009-06-24 19:02:29 -07:00
|
|
|
if na+1 >= n {
|
|
|
|
|
a[na] = s;
|
|
|
|
|
na++;
|
|
|
|
|
break
|
|
|
|
|
}
|
2009-05-11 14:10:34 -07:00
|
|
|
rune, size = utf8.DecodeRuneInString(s);
|
|
|
|
|
s = s[size:len(s)];
|
2009-06-24 19:02:29 -07:00
|
|
|
a[na] = string(rune);
|
|
|
|
|
na++;
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
2009-06-24 19:02:29 -07:00
|
|
|
return a[0:na]
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
|
|
|
|
|
2009-03-05 17:02:34 -08:00
|
|
|
// Count counts the number of non-overlapping instances of sep in s.
|
2009-01-20 14:40:40 -08:00
|
|
|
func Count(s, sep string) int {
|
2008-09-12 16:12:20 -07:00
|
|
|
if sep == "" {
|
2009-05-04 22:12:13 -07:00
|
|
|
return utf8.RuneCountInString(s)+1
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
|
|
|
|
c := sep[0];
|
|
|
|
|
n := 0;
|
|
|
|
|
for i := 0; i+len(sep) <= len(s); i++ {
|
|
|
|
|
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
|
|
|
|
|
n++;
|
|
|
|
|
i += len(sep)-1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-07 00:32:16 -07:00
|
|
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
2009-01-20 14:40:40 -08:00
|
|
|
func Index(s, sep string) int {
|
2009-04-07 00:32:16 -07:00
|
|
|
n := len(sep);
|
|
|
|
|
if n == 0 {
|
2008-09-12 16:12:20 -07:00
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
c := sep[0];
|
2009-04-07 00:32:16 -07:00
|
|
|
for i := 0; i+n <= len(s); i++ {
|
|
|
|
|
if s[i] == c && (n == 1 || s[i:i+n] == sep) {
|
2008-09-12 16:12:20 -07:00
|
|
|
return i
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return -1
|
|
|
|
|
}
|
2009-06-09 10:58:58 -07:00
|
|
|
|
|
|
|
|
// Index returns the index of the last instance of sep in s, or -1 if sep is not present in s.
|
|
|
|
|
func LastIndex(s, sep string) int {
|
|
|
|
|
n := len(sep);
|
|
|
|
|
if n == 0 {
|
|
|
|
|
return len(s)
|
|
|
|
|
}
|
|
|
|
|
c := sep[0];
|
|
|
|
|
for i := len(s)-n; i >= 0; i-- {
|
|
|
|
|
if s[i] == c && (n == 1 || s[i:i+n] == sep) {
|
|
|
|
|
return i
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return -1
|
|
|
|
|
}
|
2008-09-12 16:12:20 -07:00
|
|
|
|
2009-06-24 19:02:29 -07:00
|
|
|
// Split splits the string s around each instance of sep, returning an array of substrings of s.
|
|
|
|
|
// If sep is empty, Split splits s after each UTF-8 sequence.
|
|
|
|
|
// If n > 0, split Splits s into at most n substrings; the last subarray will contain an unsplit remainder string.
|
|
|
|
|
func Split(s, sep string, n int) []string {
|
2008-09-12 16:12:20 -07:00
|
|
|
if sep == "" {
|
2009-06-24 19:02:29 -07:00
|
|
|
return explode(s, n)
|
|
|
|
|
}
|
|
|
|
|
if n <= 0 {
|
|
|
|
|
n = Count(s, sep) + 1;
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
|
|
|
|
c := sep[0];
|
|
|
|
|
start := 0;
|
2009-01-06 15:19:02 -08:00
|
|
|
a := make([]string, n);
|
2008-09-12 16:12:20 -07:00
|
|
|
na := 0;
|
2009-06-24 19:02:29 -07:00
|
|
|
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
|
2008-09-12 16:12:20 -07:00
|
|
|
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
|
|
|
|
|
a[na] = s[start:i];
|
|
|
|
|
na++;
|
|
|
|
|
start = i+len(sep);
|
2009-06-24 19:02:29 -07:00
|
|
|
i += len(sep)-1;
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
a[na] = s[start:len(s)];
|
2009-06-24 19:02:29 -07:00
|
|
|
return a[0:na+1]
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
2008-12-04 21:00:34 -08:00
|
|
|
|
2009-03-05 17:02:34 -08:00
|
|
|
// Join concatenates the elements of a to create a single string. The separator string
|
|
|
|
|
// sep is placed between elements in the resulting string.
|
2009-01-20 14:40:40 -08:00
|
|
|
func Join(a []string, sep string) string {
|
2008-09-12 16:12:20 -07:00
|
|
|
if len(a) == 0 {
|
2008-09-25 10:51:23 -07:00
|
|
|
return ""
|
2008-09-12 16:12:20 -07:00
|
|
|
}
|
|
|
|
|
if len(a) == 1 {
|
|
|
|
|
return a[0]
|
|
|
|
|
}
|
|
|
|
|
n := len(sep) * (len(a)-1);
|
|
|
|
|
for i := 0; i < len(a); i++ {
|
|
|
|
|
n += len(a[i])
|
|
|
|
|
}
|
|
|
|
|
|
2009-01-06 15:19:02 -08:00
|
|
|
b := make([]byte, n);
|
2008-09-12 16:12:20 -07:00
|
|
|
bp := 0;
|
|
|
|
|
for i := 0; i < len(a); i++ {
|
|
|
|
|
s := a[i];
|
|
|
|
|
for j := 0; j < len(s); j++ {
|
|
|
|
|
b[bp] = s[j];
|
|
|
|
|
bp++
|
|
|
|
|
}
|
|
|
|
|
if i + 1 < len(a) {
|
|
|
|
|
s = sep;
|
|
|
|
|
for j := 0; j < len(s); j++ {
|
|
|
|
|
b[bp] = s[j];
|
|
|
|
|
bp++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return string(b)
|
|
|
|
|
}
|
2009-04-13 16:50:42 -07:00
|
|
|
|
|
|
|
|
// HasPrefix tests whether the string s begins with prefix.
|
|
|
|
|
func HasPrefix(s, prefix string) bool {
|
|
|
|
|
return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// HasSuffix tests whether the string s ends with suffix.
|
|
|
|
|
func HasSuffix(s, suffix string) bool {
|
|
|
|
|
return len(s) >= len(suffix) && s[len(s)-len(suffix):len(s)] == suffix
|
|
|
|
|
}
|
2009-06-05 13:09:03 -07:00
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
// Map returns a copy of the string s with all its characters modified
|
2009-09-01 13:46:59 -07:00
|
|
|
// according to the mapping function.
|
2009-09-01 11:06:28 -07:00
|
|
|
func Map(mapping func(rune int) int, s string) string {
|
|
|
|
|
// In the worst case, the string can grow when mapped, making
|
|
|
|
|
// things unpleasant. But it's so rare we barge in assuming it's
|
|
|
|
|
// fine. It could also shrink but that falls out naturally.
|
|
|
|
|
maxbytes := len(s); // length of b
|
|
|
|
|
nbytes := 0; // number of bytes encoded in b
|
|
|
|
|
b := make([]byte, maxbytes);
|
2009-09-15 09:41:59 -07:00
|
|
|
for _, c := range s {
|
2009-09-01 11:06:28 -07:00
|
|
|
rune := mapping(c);
|
|
|
|
|
wid := 1;
|
|
|
|
|
if rune >= utf8.RuneSelf {
|
|
|
|
|
wid = utf8.RuneLen(rune);
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
2009-09-01 11:06:28 -07:00
|
|
|
if nbytes + wid > maxbytes {
|
|
|
|
|
// Grow the buffer.
|
|
|
|
|
maxbytes = maxbytes*2 + utf8.UTFMax;
|
|
|
|
|
nb := make([]byte, maxbytes);
|
|
|
|
|
for i, c := range b[0:nbytes] {
|
|
|
|
|
nb[i] = c
|
|
|
|
|
}
|
|
|
|
|
b = nb;
|
|
|
|
|
}
|
|
|
|
|
nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
2009-09-01 11:06:28 -07:00
|
|
|
return string(b[0:nbytes]);
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
|
|
|
|
|
2009-09-01 13:46:59 -07:00
|
|
|
// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
|
2009-09-01 11:06:28 -07:00
|
|
|
func ToUpper(s string) string {
|
|
|
|
|
return Map(unicode.ToUpper, s)
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
|
|
|
|
|
2009-09-01 13:46:59 -07:00
|
|
|
// ToUpper returns a copy of the string s with all Unicode letters mapped to their lower case.
|
2009-09-01 11:06:28 -07:00
|
|
|
func ToLower(s string) string {
|
|
|
|
|
return Map(unicode.ToLower, s)
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
|
|
|
|
|
2009-09-01 13:46:59 -07:00
|
|
|
// ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
|
2009-10-10 18:56:13 -07:00
|
|
|
func ToTitle(s string) string {
|
2009-09-01 11:06:28 -07:00
|
|
|
return Map(unicode.ToTitle, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Trim returns a slice of the string s, with all leading and trailing white space
|
|
|
|
|
// removed, as defined by Unicode.
|
|
|
|
|
func TrimSpace(s string) string {
|
2009-06-05 13:09:03 -07:00
|
|
|
start, end := 0, len(s);
|
2009-09-15 09:41:59 -07:00
|
|
|
for start < end {
|
|
|
|
|
wid := 1;
|
2009-09-01 11:06:28 -07:00
|
|
|
rune := int(s[start]);
|
|
|
|
|
if rune >= utf8.RuneSelf {
|
|
|
|
|
rune, wid = utf8.DecodeRuneInString(s[start:end])
|
|
|
|
|
}
|
|
|
|
|
if !unicode.IsSpace(rune) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2009-09-15 09:41:59 -07:00
|
|
|
start += wid;
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
2009-09-15 09:41:59 -07:00
|
|
|
for start < end {
|
|
|
|
|
wid := 1;
|
2009-09-01 11:06:28 -07:00
|
|
|
rune := int(s[end-1]);
|
|
|
|
|
if rune >= utf8.RuneSelf {
|
|
|
|
|
// Back up carefully looking for beginning of rune. Mustn't pass start.
|
|
|
|
|
for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ {
|
|
|
|
|
}
|
|
|
|
|
if start > end-wid { // invalid UTF-8 sequence; stop processing
|
|
|
|
|
return s[start:end]
|
|
|
|
|
}
|
|
|
|
|
rune, wid = utf8.DecodeRuneInString(s[end-wid:end]);
|
|
|
|
|
}
|
|
|
|
|
if !unicode.IsSpace(rune) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2009-09-15 09:41:59 -07:00
|
|
|
end -= wid;
|
2009-06-05 13:09:03 -07:00
|
|
|
}
|
|
|
|
|
return s[start:end];
|
|
|
|
|
}
|
2009-06-29 15:24:23 -07:00
|
|
|
|
2009-09-01 11:06:28 -07:00
|
|
|
// Bytes returns a new slice containing the bytes in s.
|
2009-06-29 15:24:23 -07:00
|
|
|
func Bytes(s string) []byte {
|
|
|
|
|
b := make([]byte, len(s));
|
|
|
|
|
for i := 0; i < len(s); i++ {
|
|
|
|
|
b[i] = s[i];
|
|
|
|
|
}
|
|
|
|
|
return b;
|
|
|
|
|
}
|