mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
regexp: new regularized methods for matching.
The previous set was spotty, incomplete, and confusing. This CL proposes a regular, clean set with clearer names. It's also complete. Many existing methods will be deprecated, but not in this CL. Ditto for the tests. R=rsc, gri CC=golang-dev, rog https://golang.org/cl/1946041
This commit is contained in:
parent
d75074974a
commit
6610d79eda
2 changed files with 770 additions and 13 deletions
|
|
@ -19,11 +19,41 @@
|
|||
// '[' [ '^' ] { character-range } ']'
|
||||
// '(' regexp ')'
|
||||
// character-range:
|
||||
// character '-' character
|
||||
// character [ '-' character ]
|
||||
//
|
||||
// All characters are UTF-8-encoded code points.
|
||||
// Backslashes escape special characters, including inside
|
||||
// character classes.
|
||||
// All characters are UTF-8-encoded code points. Backslashes escape special
|
||||
// characters, including inside character classes.
|
||||
//
|
||||
// There are 16 methods of Regexp that match a regular expression and identify
|
||||
// the matched text. Their names are matched by this regular expression:
|
||||
//
|
||||
// Find(All)?(String)?(Submatch)?(Index)?
|
||||
//
|
||||
// If 'All' is present, the routine matches successive non-overlapping
|
||||
// matches of the entire expression. Empty matches abutting a preceding
|
||||
// match are ignored. The return value is a slice containing the successive
|
||||
// return values of the corresponding non-'All' routine. These routines take
|
||||
// an extra integer argument, n; if n >= 0, the function returns at most n
|
||||
// matches/submatches.
|
||||
//
|
||||
// If 'String' is present, the argument is a string; otherwise it is a slice
|
||||
// of bytes; return values are adjusted as appropriate.
|
||||
//
|
||||
// If 'Submatch' is present, the return value is a slice identifying the
|
||||
// successive submatches of the expression. Submatches are matches of
|
||||
// parenthesized subexpressions within the regular expression, numbered from
|
||||
// left to right in order of opening parenthesis. Submatch 0 is the match of
|
||||
// the entire expression, submatch 1 the match of the first parenthesized
|
||||
// subexpression, and so on.
|
||||
//
|
||||
// If 'Index' is present, matches and submatches are identified by byte index
|
||||
// pairs within the input string: result[2*n:2*n+1] identifies the indexes of
|
||||
// the nth submatch. The pair for n==0 identifies the match of the entire
|
||||
// expression. If 'Index' is not present, the match is identified by the
|
||||
// text of the match/submatch. If an index is negative, it means that
|
||||
// subexpression did not match any string in the input.
|
||||
//
|
||||
// (There are a few other methods that do not match this pattern.)
|
||||
//
|
||||
package regexp
|
||||
|
||||
|
|
@ -814,7 +844,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||
advance = bytes.Index(bytestr[pos:], re.prefixBytes)
|
||||
}
|
||||
if advance == -1 {
|
||||
return []int{}
|
||||
return nil
|
||||
}
|
||||
pos += advance + len(re.prefix)
|
||||
prefixed = true
|
||||
|
|
@ -914,6 +944,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||
// s[a[2*i]:a[2*i+1]] for i > 0 is the substring matched by the ith parenthesized subexpression.
|
||||
// A negative value means the subexpression did not match any element of the string.
|
||||
// An empty array means "no match".
|
||||
// Deprecated; use FindString.
|
||||
func (re *Regexp) ExecuteString(s string) (a []int) {
|
||||
return re.doExecute(s, nil, 0)
|
||||
}
|
||||
|
|
@ -926,6 +957,7 @@ func (re *Regexp) ExecuteString(s string) (a []int) {
|
|||
// b[a[2*i]:a[2*i+1]] for i > 0 is the subslice matched by the ith parenthesized subexpression.
|
||||
// A negative value means the subexpression did not match any element of the slice.
|
||||
// An empty array means "no match".
|
||||
// Deprecated; use Find.
|
||||
func (re *Regexp) Execute(b []byte) (a []int) { return re.doExecute("", b, 0) }
|
||||
|
||||
|
||||
|
|
@ -944,6 +976,7 @@ func (re *Regexp) Match(b []byte) bool { return len(re.doExecute("", b, 0)) > 0
|
|||
// a[0] is the substring matched by the entire expression.
|
||||
// a[i] for i > 0 is the substring matched by the ith parenthesized subexpression.
|
||||
// An empty array means ``no match''.
|
||||
// Deprecated; use FindStringSubmatch.
|
||||
func (re *Regexp) MatchStrings(s string) (a []string) {
|
||||
r := re.doExecute(s, nil, 0)
|
||||
if r == nil {
|
||||
|
|
@ -963,6 +996,7 @@ func (re *Regexp) MatchStrings(s string) (a []string) {
|
|||
// a[0] is the subslice matched by the entire expression.
|
||||
// a[i] for i > 0 is the subslice matched by the ith parenthesized subexpression.
|
||||
// An empty array means ``no match''.
|
||||
// Deprecated; use FindSubmatch.
|
||||
func (re *Regexp) MatchSlices(b []byte) (a [][]byte) {
|
||||
r := re.doExecute("", b, 0)
|
||||
if r == nil {
|
||||
|
|
@ -1123,7 +1157,7 @@ func QuoteMeta(s string) string {
|
|||
}
|
||||
|
||||
// Find matches in slice b if b is non-nil, otherwise find matches in string s.
|
||||
func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int)) {
|
||||
func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
|
||||
var end int
|
||||
if b == nil {
|
||||
end = len(s)
|
||||
|
|
@ -1162,7 +1196,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int))
|
|||
prevMatchEnd = matches[1]
|
||||
|
||||
if accept {
|
||||
deliver(matches[0], matches[1])
|
||||
deliver(matches)
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
|
@ -1173,14 +1207,18 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int))
|
|||
// matches. Text that does not match the expression will be skipped. Empty
|
||||
// matches abutting a preceding match are ignored. The function returns a slice
|
||||
// containing the matching substrings.
|
||||
// Deprecated; use FindAll.
|
||||
func (re *Regexp) AllMatches(b []byte, n int) [][]byte {
|
||||
if n <= 0 {
|
||||
n = len(b) + 1
|
||||
}
|
||||
// TODO: here and below, grow the result slice on demand
|
||||
// to avoid allocating a huge slice for a small result and
|
||||
// wasting memory.
|
||||
result := make([][]byte, n)
|
||||
i := 0
|
||||
re.allMatches("", b, n, func(start, end int) {
|
||||
result[i] = b[start:end]
|
||||
re.allMatches("", b, n, func(match []int) {
|
||||
result[i] = b[match[0]:match[1]]
|
||||
i++
|
||||
})
|
||||
return result[0:i]
|
||||
|
|
@ -1191,14 +1229,15 @@ func (re *Regexp) AllMatches(b []byte, n int) [][]byte {
|
|||
// matches. Text that does not match the expression will be skipped. Empty
|
||||
// matches abutting a preceding match are ignored. The function returns a slice
|
||||
// containing the matching substrings.
|
||||
// Deprecated; use FindAllString.
|
||||
func (re *Regexp) AllMatchesString(s string, n int) []string {
|
||||
if n <= 0 {
|
||||
n = len(s) + 1
|
||||
}
|
||||
result := make([]string, n)
|
||||
i := 0
|
||||
re.allMatches(s, nil, n, func(start, end int) {
|
||||
result[i] = s[start:end]
|
||||
re.allMatches(s, nil, n, func(match []int) {
|
||||
result[i] = s[match[0]:match[1]]
|
||||
i++
|
||||
})
|
||||
return result[0:i]
|
||||
|
|
@ -1215,7 +1254,7 @@ func (re *Regexp) AllMatchesIter(b []byte, n int) <-chan []byte {
|
|||
}
|
||||
c := make(chan []byte, 10)
|
||||
go func() {
|
||||
re.allMatches("", b, n, func(start, end int) { c <- b[start:end] })
|
||||
re.allMatches("", b, n, func(match []int) { c <- b[match[0]:match[1]] })
|
||||
close(c)
|
||||
}()
|
||||
return c
|
||||
|
|
@ -1232,8 +1271,284 @@ func (re *Regexp) AllMatchesStringIter(s string, n int) <-chan string {
|
|||
}
|
||||
c := make(chan string, 10)
|
||||
go func() {
|
||||
re.allMatches(s, nil, n, func(start, end int) { c <- s[start:end] })
|
||||
re.allMatches(s, nil, n, func(match []int) { c <- s[match[0]:match[1]] })
|
||||
close(c)
|
||||
}()
|
||||
return c
|
||||
}
|
||||
|
||||
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) Find(b []byte) []byte {
|
||||
a := re.doExecute("", b, 0)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
return b[a[0]:a[1]]
|
||||
}
|
||||
|
||||
// FindIndex returns a two-element slice of integers defining the location of
|
||||
// the leftmost match in b of the regular expression. The match itself is at
|
||||
// b[loc[0]:loc[1]].
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
||||
a := re.doExecute("", b, 0)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
return a[0:2]
|
||||
}
|
||||
|
||||
// FindString returns a string holding the text of the leftmost match in s of the regular
|
||||
// expression. If there is no match, the return value is an empty string,
|
||||
// but it will also be empty if the regular expression successfully matches
|
||||
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
||||
// necessary to distinguish these cases.
|
||||
func (re *Regexp) FindString(s string) string {
|
||||
a := re.doExecute(s, nil, 0)
|
||||
if a == nil {
|
||||
return ""
|
||||
}
|
||||
return s[a[0]:a[1]]
|
||||
}
|
||||
|
||||
// FindStringIndex returns a two-element slice of integers defining the
|
||||
// location of the leftmost match in s of the regular expression. The match
|
||||
// itself is at s[loc[0]:loc[1]].
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindStringIndex(s string) []int {
|
||||
a := re.doExecute(s, nil, 0)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
return a[0:2]
|
||||
}
|
||||
|
||||
// FindSubmatch returns a slice of slices holding the text of the leftmost
|
||||
// match of the regular expression in b and the matches, if any, of its
|
||||
// subexpressions, as defined by the 'Submatch' descriptions in the package
|
||||
// comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
||||
a := re.doExecute("", b, 0)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
ret := make([][]byte, len(a)/2)
|
||||
for i := range ret {
|
||||
if a[2*i] >= 0 {
|
||||
ret[i] = b[a[2*i]:a[2*i+1]]
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// FindSubmatchIndex returns a slice holding the index pairs identifying the
|
||||
// leftmost match of the regular expression in b and the matches, if any, of
|
||||
// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
|
||||
// in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
||||
return re.doExecute("", b, 0)
|
||||
}
|
||||
|
||||
// FindStringSubmatch returns a slice of strings holding the text of the
|
||||
// leftmost match of the regular expression in s and the matches, if any, of
|
||||
// its subexpressions, as defined by the 'Submatch' description in the
|
||||
// package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindStringSubmatch(s string) []string {
|
||||
a := re.doExecute(s, nil, 0)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
ret := make([]string, len(a)/2)
|
||||
for i := range ret {
|
||||
if a[2*i] >= 0 {
|
||||
ret[i] = s[a[2*i]:a[2*i+1]]
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// FindStringSubmatchIndex returns a slice holding the index pairs
|
||||
// identifying the leftmost match of the regular expression in s and the
|
||||
// matches, if any, of its subexpressions, as defined by the 'Submatch' and
|
||||
// 'Index' descriptions in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
||||
return re.doExecute(s, nil, 0)
|
||||
}
|
||||
|
||||
// FindAll is the 'All' version of Find; it returns a slice of all successive
|
||||
// matches of the expression, as defined by the 'All' description in the
|
||||
// package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAll(b []byte, n int) [][]byte {
|
||||
if n < 0 {
|
||||
n = len(b) + 1
|
||||
}
|
||||
result := make([][]byte, n)
|
||||
i := 0
|
||||
re.allMatches("", b, n, func(match []int) {
|
||||
result[i] = b[match[0]:match[1]]
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
|
||||
// successive matches of the expression, as defined by the 'All' description
|
||||
// in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
|
||||
if n < 0 {
|
||||
n = len(b) + 1
|
||||
}
|
||||
result := make([][]int, n)
|
||||
i := 0
|
||||
re.allMatches("", b, n, func(match []int) {
|
||||
result[i] = match[0:2]
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllString is the 'All' version of FindString; it returns a slice of all
|
||||
// successive matches of the expression, as defined by the 'All' description
|
||||
// in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllString(s string, n int) []string {
|
||||
if n < 0 {
|
||||
n = len(s) + 1
|
||||
}
|
||||
result := make([]string, n)
|
||||
i := 0
|
||||
re.allMatches(s, nil, n, func(match []int) {
|
||||
result[i] = s[match[0]:match[1]]
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
|
||||
// slice of all successive matches of the expression, as defined by the 'All'
|
||||
// description in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
|
||||
if n < 0 {
|
||||
n = len(s) + 1
|
||||
}
|
||||
result := make([][]int, n)
|
||||
i := 0
|
||||
re.allMatches(s, nil, n, func(match []int) {
|
||||
result[i] = match[0:2]
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
|
||||
// of all successive matches of the expression, as defined by the 'All'
|
||||
// description in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
|
||||
if n < 0 {
|
||||
n = len(b) + 1
|
||||
}
|
||||
result := make([][][]byte, n)
|
||||
i := 0
|
||||
re.allMatches("", b, n, func(match []int) {
|
||||
slice := make([][]byte, len(match)/2)
|
||||
for j := range slice {
|
||||
if match[2*j] >= 0 {
|
||||
slice[j] = b[match[2*j]:match[2*j+1]]
|
||||
}
|
||||
}
|
||||
result[i] = slice
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
|
||||
// a slice of all successive matches of the expression, as defined by the
|
||||
// 'All' description in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
|
||||
if n < 0 {
|
||||
n = len(b) + 1
|
||||
}
|
||||
result := make([][]int, n)
|
||||
i := 0
|
||||
re.allMatches("", b, n, func(match []int) {
|
||||
result[i] = match
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
|
||||
// returns a slice of all successive matches of the expression, as defined by
|
||||
// the 'All' description in the package comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
|
||||
if n < 0 {
|
||||
n = len(s) + 1
|
||||
}
|
||||
result := make([][]string, n)
|
||||
i := 0
|
||||
re.allMatches(s, nil, n, func(match []int) {
|
||||
slice := make([]string, len(match)/2)
|
||||
for j := range slice {
|
||||
if match[2*j] >= 0 {
|
||||
slice[j] = s[match[2*j]:match[2*j+1]]
|
||||
}
|
||||
}
|
||||
result[i] = slice
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
||||
// FindAllStringSubmatchIndex is the 'All' version of
|
||||
// FindStringSubmatchIndex; it returns a slice of all successive matches of
|
||||
// the expression, as defined by the 'All' description in the package
|
||||
// comment.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
|
||||
if n < 0 {
|
||||
n = len(s) + 1
|
||||
}
|
||||
result := make([][]int, n)
|
||||
i := 0
|
||||
re.allMatches(s, nil, n, func(match []int) {
|
||||
result[i] = match
|
||||
i++
|
||||
})
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[0:i]
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue