exp/regexp: bug fixes and RE2 tests

Also add exp/regexp to build (forgot before). At this point I am very confident in exp/regexp's behavior. It should be usable as a drop-in replacement for regexp now. Later CLs could introduce a CompilePOSIX to get at traditional POSIX ``extended regular expressions'' as in egrep and also an re.MatchLongest method to change the matching mode to leftmost longest instead of leftmost first. On the other hand, I expect very few people to use either. R=r, r, gustavo CC=golang-dev https://golang.org/cl/4990041
2025-12-08 06:10:04 +00:00 · 2011-09-07 15:48:06 -04:00 · 2011-09-07 15:48:06 -04:00 · 08ae1a5a23
commit 08ae1a5a23
parent a2c2c87439
12 changed files with 474 additions and 58 deletions
--- a/src/pkg/exp/regexp/syntax/parse.go
+++ b/src/pkg/exp/regexp/syntax/parse.go
@ -419,8 +419,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
 		// used or marked for reuse, and the slice space has been reused
 		// for out (len(out) <= start).
 		//
-		// Invariant: sub[start:i] consists of regexps that all begin
-		// with str as modified by strflags.
+		// Invariant: sub[start:i] consists of regexps that all begin with ifirst.
 		var ifirst *Regexp
 		if i < len(sub) {
 			ifirst = p.leadingRegexp(sub[i])
@ -441,7 +440,6 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
 		} else {
 			// Construct factored form: prefix(suffix1|suffix2|...)
 			prefix := first
-
 			for j := start; j < i; j++ {
 				reuse := j != start // prefix came from sub[start] 
 				sub[j] = p.removeLeadingRegexp(sub[j], reuse)
@ -605,8 +603,10 @@ func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
 		}
 		return re
 	}
-	re.Op = OpEmptyMatch
-	return re
+	if reuse {
+		p.reuse(re)
+	}
+	return p.newRegexp(OpEmptyMatch)
 }

 func literalRegexp(s string, flags Flags) *Regexp {
@ -1053,18 +1053,18 @@ func mergeCharClass(dst, src *Regexp) {
 	case OpCharClass:
 		// src is simpler, so either literal or char class
 		if src.Op == OpLiteral {
-			dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0])
+			dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
 		} else {
 			dst.Rune = appendClass(dst.Rune, src.Rune)
 		}
 	case OpLiteral:
 		// both literal
-		if src.Rune[0] == dst.Rune[0] {
+		if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
 			break
 		}
 		dst.Op = OpCharClass
-		dst.Rune = append(dst.Rune, dst.Rune[0])
-		dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0])
+		dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
+		dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
 	}
 }

@ -1544,6 +1544,14 @@ func cleanClass(rp *[]int) []int {
 	return r[:w]
 }

+// appendLiteral returns the result of appending the literal x to the class r.
+func appendLiteral(r []int, x int, flags Flags) []int {
+	if flags&FoldCase != 0 {
+		return appendFoldedRange(r, x, x)
+	}
+	return appendRange(r, x, x)
+}
+
 // appendRange returns the result of appending the range lo-hi to the class r.
 func appendRange(r []int, lo, hi int) []int {
 	// Expand last range or next to last range if it overlaps or abuts.