go/src/pkg/exp/regexp/syntax/parse.go

// Copyright 2011 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package syntax

import (
	"os"
	"sort"
	"unicode"
	"utf8"
)

// An Error describes a failure to parse a regular expression
// and gives the offending expression.
type Error struct {
	Code ErrorCode
	Expr string
}

func (e *Error) String() string {
	return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
}

// An ErrorCode describes a failure to parse a regular expression.
type ErrorCode string

const (
	// Unexpected error
	ErrInternalError ErrorCode = "regexp/syntax: internal error"

	// Parse errors
	ErrInvalidCharClass      ErrorCode = "invalid character class"
	ErrInvalidCharRange      ErrorCode = "invalid character class range"
	ErrInvalidEscape         ErrorCode = "invalid escape sequence"
	ErrInvalidNamedCapture   ErrorCode = "invalid named capture"
	ErrInvalidPerlOp         ErrorCode = "invalid or unsupported Perl syntax"
	ErrInvalidRepeatOp       ErrorCode = "invalid nested repetition operator"
	ErrInvalidRepeatSize     ErrorCode = "invalid repeat count"
	ErrInvalidUTF8           ErrorCode = "invalid UTF-8"
	ErrMissingBracket        ErrorCode = "missing closing ]"
	ErrMissingParen          ErrorCode = "missing closing )"
	ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
	ErrTrailingBackslash     ErrorCode = "trailing backslash at end of expression"
)

func (e ErrorCode) String() string {
	return string(e)
}

// Flags control the behavior of the parser and record information about regexp context.
type Flags uint16

const (
	FoldCase      Flags = 1 << iota // case-insensitive match
	Literal                         // treat pattern as literal string
	ClassNL                         // allow character classes like [^a-z] and [[:space:]] to match newline
	DotNL                           // allow . to match newline
	OneLine                         // treat ^ and $ as only matching at beginning and end of text
	NonGreedy                       // make repetition operators default to non-greedy
	PerlX                           // allow Perl extensions
	UnicodeGroups                   // allow \p{Han}, \P{Han} for Unicode group and negation
	WasDollar                       // regexp OpEndText was $, not \z
	Simple                          // regexp contains no counted repetition

	MatchNL = ClassNL | DotNL

	Perl        = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible
	POSIX Flags = 0                                         // POSIX syntax
)

// Pseudo-ops for parsing stack.
const (
	opLeftParen = opPseudo + iota
	opVerticalBar
)

type parser struct {
	flags       Flags     // parse mode flags
	stack       []*Regexp // stack of parsed expressions
	numCap      int       // number of capturing groups seen
	wholeRegexp string
}

// Parse stack manipulation.

// push pushes the regexp re onto the parse stack and returns the regexp.
func (p *parser) push(re *Regexp) *Regexp {
	// TODO: automatic concatenation
	// TODO: turn character class into literal
	// TODO: compute simple

	p.stack = append(p.stack, re)
	return re
}

// newLiteral returns a new OpLiteral Regexp with the given flags
func newLiteral(r int, flags Flags) *Regexp {
	re := &Regexp{
		Op:    OpLiteral,
		Flags: flags,
	}
	re.Rune0[0] = r
	re.Rune = re.Rune0[:1]
	return re
}

// literal pushes a literal regexp for the rune r on the stack
// and returns that regexp.
func (p *parser) literal(r int) *Regexp {
	return p.push(newLiteral(r, p.flags))
}

// op pushes a regexp with the given op onto the stack
// and returns that regexp.
func (p *parser) op(op Op) *Regexp {
	return p.push(&Regexp{Op: op, Flags: p.flags})
}

// repeat replaces the top stack element with itself repeated
// according to op.
func (p *parser) repeat(op Op, opstr string) os.Error {
	n := len(p.stack)
	if n == 0 {
		return &Error{ErrMissingRepeatArgument, opstr}
	}
	sub := p.stack[n-1]
	re := &Regexp{
		Op: op,
	}
	re.Sub = re.Sub0[:1]
	re.Sub[0] = sub
	p.stack[n-1] = re
	return nil
}

// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp {
	// TODO: Flatten concats.

	// Scan down to find pseudo-operator | or (.
	i := len(p.stack)
	for i > 0 && p.stack[i-1].Op < opPseudo {
		i--
	}
	sub := p.stack[i:]
	p.stack = p.stack[:i]

	var re *Regexp
	switch len(sub) {
	case 0:
		re = &Regexp{Op: OpEmptyMatch}
	case 1:
		re = sub[0]
	default:
		re = &Regexp{Op: OpConcat}
		re.Sub = append(re.Sub0[:0], sub...)
	}
	return p.push(re)
}

// alternate replaces the top of the stack (above the topmost '(') with its alternation.
func (p *parser) alternate() *Regexp {
	// TODO: Flatten alternates.

	// Scan down to find pseudo-operator (.
	// There are no | above (.
	i := len(p.stack)
	for i > 0 && p.stack[i-1].Op < opPseudo {
		i--
	}
	sub := p.stack[i:]
	p.stack = p.stack[:i]

	var re *Regexp
	switch len(sub) {
	case 0:
		re = &Regexp{Op: OpNoMatch}
	case 1:
		re = sub[0]
	default:
		re = &Regexp{Op: OpAlternate}
		re.Sub = append(re.Sub0[:0], sub...)
	}
	return p.push(re)
}

// Parsing.

func Parse(s string, flags Flags) (*Regexp, os.Error) {
	if flags&Literal != 0 {
		// Trivial parser for literal string.
		if err := checkUTF8(s); err != nil {
			return nil, err
		}
		re := &Regexp{
			Op:    OpLiteral,
			Flags: flags,
		}
		re.Rune = re.Rune0[:0] // use local storage for small strings
		for _, c := range s {
			if len(re.Rune) >= cap(re.Rune) {
				// string is too long to fit in Rune0.  let Go handle it
				re.Rune = []int(s)
				break
			}
			re.Rune = append(re.Rune, c)
		}
		return re, nil
	}

	// Otherwise, must do real work.
	var (
		p   parser
		err os.Error
		c   int
		op  Op
	)
	p.flags = flags
	p.wholeRegexp = s
	t := s
	for t != "" {
		switch t[0] {
		default:
			if c, t, err = nextRune(t); err != nil {
				return nil, err
			}
			p.literal(c)

		case '(':
			// TODO: Actual Perl flag parsing.
			if len(t) >= 3 && t[1] == '?' && t[2] == ':' {
				// non-capturing paren
				p.op(opLeftParen)
				t = t[3:]
				break
			}
			p.numCap++
			p.op(opLeftParen).Cap = p.numCap
			t = t[1:]
		case '|':
			p.concat()
			if err = p.parseVerticalBar(); err != nil {
				return nil, err
			}
			t = t[1:]
		case ')':
			if err = p.parseRightParen(); err != nil {
				return nil, err
			}
			t = t[1:]
		case '^':
			if p.flags&OneLine != 0 {
				p.op(OpBeginText)
			} else {
				p.op(OpBeginLine)
			}
			t = t[1:]
		case '$':
			if p.flags&OneLine != 0 {
				p.op(OpEndText).Flags |= WasDollar
			} else {
				p.op(OpEndLine)
			}
			t = t[1:]
		case '.':
			if p.flags&DotNL != 0 {
				p.op(OpAnyChar)
			} else {
				p.op(OpAnyCharNotNL)
			}
			t = t[1:]
		case '[':
			if t, err = p.parseClass(t); err != nil {
				return nil, err
			}
		case '*', '+', '?':
			switch t[0] {
			case '*':
				op = OpStar
			case '+':
				op = OpPlus
			case '?':
				op = OpQuest
			}
			// TODO: greedy
			if err = p.repeat(op, t[0:1]); err != nil {
				return nil, err
			}
			t = t[1:]
		case '{':
			return nil, os.NewError("repeat not implemented")
		case '\\':
			return nil, os.NewError("escape not implemented")
		}
	}

	p.concat()
	if p.swapVerticalBar() {
		// pop vertical bar
		p.stack = p.stack[:len(p.stack)-1]
	}
	p.alternate()

	n := len(p.stack)
	if n != 1 {
		return nil, &Error{ErrMissingParen, s}
	}
	return p.stack[0], nil
}

// parseVerticalBar handles a | in the input.
func (p *parser) parseVerticalBar() os.Error {
	p.concat()

	// The concatenation we just parsed is on top of the stack.
	// If it sits above an opVerticalBar, swap it below
	// (things below an opVerticalBar become an alternation).
	// Otherwise, push a new vertical bar.
	if !p.swapVerticalBar() {
		p.op(opVerticalBar)
	}

	return nil
}

// If the top of the stack is an element followed by an opVerticalBar
// swapVerticalBar swaps the two and returns true.
// Otherwise it returns false.
func (p *parser) swapVerticalBar() bool {
	if n := len(p.stack); n >= 2 {
		re1 := p.stack[n-1]
		re2 := p.stack[n-2]
		if re2.Op == opVerticalBar {
			p.stack[n-2] = re1
			p.stack[n-1] = re2
			return true
		}
	}
	return false
}

// parseRightParen handles a ) in the input.
func (p *parser) parseRightParen() os.Error {
	p.concat()
	if p.swapVerticalBar() {
		// pop vertical bar
		p.stack = p.stack[:len(p.stack)-1]
	}
	p.alternate()

	n := len(p.stack)
	if n < 2 {
		return &Error{ErrInternalError, ""}
	}
	re1 := p.stack[n-1]
	re2 := p.stack[n-2]
	p.stack = p.stack[:n-2]
	if re2.Op != opLeftParen {
		return &Error{ErrMissingParen, p.wholeRegexp}
	}
	if re2.Cap == 0 {
		// Just for grouping.
		p.push(re1)
	} else {
		re2.Op = OpCapture
		re2.Sub = re2.Sub0[:1]
		re2.Sub[0] = re1
		p.push(re2)
	}
	return nil
}

// parseClassChar parses a character class character at the beginning of s
// and returns it.
func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) {
	if s == "" {
		return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
	}

	// TODO: Escapes

	return nextRune(s)
}

// parseClass parses a character class at the beginning of s
// and pushes it onto the parse stack.
func (p *parser) parseClass(s string) (rest string, err os.Error) {
	t := s[1:] // chop [
	re := &Regexp{Op: OpCharClass, Flags: p.flags}
	re.Rune = re.Rune0[:0]

	sign := +1
	if t != "" && t[0] == '^' {
		sign = -1
		t = t[1:]

		// If character class does not match \n, add it here,
		// so that negation later will do the right thing.
		if p.flags&ClassNL == 0 {
			re.Rune = append(re.Rune, '\n', '\n')
		}
	}

	class := re.Rune
	first := true // ] and - are okay as first char in class
	for t == "" || t[0] != ']' || first {
		// POSIX: - is only okay unescaped as first or last in class.
		// Perl: - is okay anywhere.
		if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
			_, size := utf8.DecodeRuneInString(t[1:])
			return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
		}
		first = false

		// TODO: Look for [:alnum:]
		// TODO: Look for Unicode group.
		// TODO: Look for Perl group.

		// Single character or simple range.
		rng := t
		var lo, hi int
		if lo, t, err = p.parseClassChar(t, s); err != nil {
			return "", err
		}
		hi = lo
		// [a-] means (a|-) so check for final ].
		if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
			t = t[1:]
			if hi, t, err = p.parseClassChar(t, s); err != nil {
				return "", err
			}
			if hi < lo {
				rng = rng[:len(rng)-len(t)]
				return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
			}
		}

		// Expand last range if overlaps or abuts.
		if n := len(class); n > 0 {
			clo, chi := class[n-2], class[n-1]
			if lo <= chi+1 && clo <= hi+1 {
				if lo < clo {
					class[n-2] = lo
				}
				if hi > chi {
					class[n-1] = hi
				}
				continue
			}
		}

		class = append(class, lo, hi)
	}
	t = t[1:] // chop ]

	// Use &re.Rune instead of &class to avoid allocation.
	re.Rune = class
	class = cleanClass(&re.Rune)
	if sign < 0 {
		class = negateClass(class)
	}
	re.Rune = class
	p.push(re)
	return t, nil
}

// cleanClass sorts the ranges (pairs of elements of r),
// merges them, and eliminates duplicates.
func cleanClass(rp *[]int) []int {
	// Sort by lo increasing, hi decreasing to break ties.
	sort.Sort(ranges{rp})

	r := *rp
	// Merge abutting, overlapping.
	w := 2 // write index
	for i := 2; i < len(r); i += 2 {
		lo, hi := r[i], r[i+1]
		if lo <= r[w-1]+1 {
			// merge with previous range
			if hi > r[w-1] {
				r[w-1] = hi
			}
			continue
		}
		// new disjoint range
		r[w] = lo
		r[w+1] = hi
		w += 2
	}

	return r[:w]
}

// negateClass overwrites r and returns r's negation.
// It assumes the class r is already clean.
func negateClass(r []int) []int {
	nextLo := 0 // lo end of next class to add
	w := 0      // write index
	for i := 0; i < len(r); i += 2 {
		lo, hi := r[i], r[i+1]
		if nextLo <= lo-1 {
			r[w] = nextLo
			r[w+1] = lo - 1
			w += 2
		}
		nextLo = hi + 1
	}
	if nextLo <= unicode.MaxRune {
		// It's possible for the negation to have one more
		// range - this one - than the original class, so use append.
		r = append(r[:w], nextLo, unicode.MaxRune)
	}
	return r
}

// ranges implements sort.Interface on a []rune.
// The choice of receiver type definition is strange
// but avoids an allocation since we already have
// a *[]int.
type ranges struct {
	p *[]int
}

func (ra ranges) Less(i, j int) bool {
	p := *ra.p
	i *= 2
	j *= 2
	return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
}

func (ra ranges) Len() int {
	return len(*ra.p) / 2
}

func (ra ranges) Swap(i, j int) {
	p := *ra.p
	i *= 2
	j *= 2
	p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
}


func checkUTF8(s string) os.Error {
	for s != "" {
		rune, size := utf8.DecodeRuneInString(s)
		if rune == utf8.RuneError && size == 1 {
			return &Error{Code: ErrInvalidUTF8, Expr: s}
		}
		s = s[size:]
	}
	return nil
}

func nextRune(s string) (c int, t string, err os.Error) {
	c, size := utf8.DecodeRuneInString(s)
	if c == utf8.RuneError && size == 1 {
		return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
	}
	return c, s[size:], nil
}
exp/regexp/syntax: syntax data structures, parser Parser is a work in progress but can populate most of the interesting parts of the data structure, so a good checkpoint. All the complicated Perl syntax is missing, as are various important optimizations made during parsing to the syntax tree. The plan is that exp/regexp's API will mimic regexp, and exp/regexp/syntax provides the parser directly for programs that need it (and for implementing exp/regexp). Once finished, exp/regexp will replace regexp. R=r, sam.thorogood, kevlar, edsrzf CC=golang-dev https://golang.org/cl/4538123 2011-06-13 09:20:23 -04:00			`// Copyright 2011 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`package syntax`

			`import (`
			`"os"`
			`"sort"`
			`"unicode"`
			`"utf8"`
			`)`

			`// An Error describes a failure to parse a regular expression`
			`// and gives the offending expression.`
			`type Error struct {`
			`Code ErrorCode`
			`Expr string`
			`}`

			`func (e *Error) String() string {`
			return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
			`}`

			`// An ErrorCode describes a failure to parse a regular expression.`
			`type ErrorCode string`

			`const (`
			`// Unexpected error`
			`ErrInternalError ErrorCode = "regexp/syntax: internal error"`

			`// Parse errors`
			`ErrInvalidCharClass ErrorCode = "invalid character class"`
			`ErrInvalidCharRange ErrorCode = "invalid character class range"`
			`ErrInvalidEscape ErrorCode = "invalid escape sequence"`
			`ErrInvalidNamedCapture ErrorCode = "invalid named capture"`
			`ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"`
			`ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"`
			`ErrInvalidRepeatSize ErrorCode = "invalid repeat count"`
			`ErrInvalidUTF8 ErrorCode = "invalid UTF-8"`
			`ErrMissingBracket ErrorCode = "missing closing ]"`
			`ErrMissingParen ErrorCode = "missing closing )"`
			`ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"`
			`ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"`
			`)`

			`func (e ErrorCode) String() string {`
			`return string(e)`
			`}`

			`// Flags control the behavior of the parser and record information about regexp context.`
			`type Flags uint16`

			`const (`
			`FoldCase Flags = 1 << iota // case-insensitive match`
			`Literal // treat pattern as literal string`
			`ClassNL // allow character classes like [^a-z] and [[:space:]] to match newline`
			`DotNL // allow . to match newline`
			`OneLine // treat ^ and $ as only matching at beginning and end of text`
			`NonGreedy // make repetition operators default to non-greedy`
			`PerlX // allow Perl extensions`
			`UnicodeGroups // allow \p{Han}, \P{Han} for Unicode group and negation`
			`WasDollar // regexp OpEndText was $, not \z`
			`Simple // regexp contains no counted repetition`

			`MatchNL = ClassNL \| DotNL`

			`Perl = ClassNL \| OneLine \| PerlX \| UnicodeGroups // as close to Perl as possible`
			`POSIX Flags = 0 // POSIX syntax`
			`)`

			`// Pseudo-ops for parsing stack.`
			`const (`
			`opLeftParen = opPseudo + iota`
			`opVerticalBar`
			`)`

			`type parser struct {`
			`flags Flags // parse mode flags`
			`stack []*Regexp // stack of parsed expressions`
			`numCap int // number of capturing groups seen`
			`wholeRegexp string`
			`}`

			`// Parse stack manipulation.`

			`// push pushes the regexp re onto the parse stack and returns the regexp.`
			`func (p parser) push(re Regexp) *Regexp {`
			`// TODO: automatic concatenation`
			`// TODO: turn character class into literal`
			`// TODO: compute simple`

			`p.stack = append(p.stack, re)`
			`return re`
			`}`

			`// newLiteral returns a new OpLiteral Regexp with the given flags`
			`func newLiteral(r int, flags Flags) *Regexp {`
			`re := &Regexp{`
			`Op: OpLiteral,`
			`Flags: flags,`
			`}`
			`re.Rune0[0] = r`
			`re.Rune = re.Rune0[:1]`
			`return re`
			`}`

			`// literal pushes a literal regexp for the rune r on the stack`
			`// and returns that regexp.`
			`func (p parser) literal(r int) Regexp {`
			`return p.push(newLiteral(r, p.flags))`
			`}`

			`// op pushes a regexp with the given op onto the stack`
			`// and returns that regexp.`
			`func (p parser) op(op Op) Regexp {`
			`return p.push(&Regexp{Op: op, Flags: p.flags})`
			`}`

			`// repeat replaces the top stack element with itself repeated`
			`// according to op.`
			`func (p *parser) repeat(op Op, opstr string) os.Error {`
			`n := len(p.stack)`
			`if n == 0 {`
			`return &Error{ErrMissingRepeatArgument, opstr}`
			`}`
			`sub := p.stack[n-1]`
			`re := &Regexp{`
			`Op: op,`
			`}`
			`re.Sub = re.Sub0[:1]`
			`re.Sub[0] = sub`
			`p.stack[n-1] = re`
			`return nil`
			`}`

			`// concat replaces the top of the stack (above the topmost '\|' or '(') with its concatenation.`
			`func (p parser) concat() Regexp {`
			`// TODO: Flatten concats.`

			`// Scan down to find pseudo-operator \| or (.`
			`i := len(p.stack)`
			`for i > 0 && p.stack[i-1].Op < opPseudo {`
			`i--`
			`}`
			`sub := p.stack[i:]`
			`p.stack = p.stack[:i]`

			`var re *Regexp`
			`switch len(sub) {`
			`case 0:`
			`re = &Regexp{Op: OpEmptyMatch}`
			`case 1:`
			`re = sub[0]`
			`default:`
			`re = &Regexp{Op: OpConcat}`
			`re.Sub = append(re.Sub0[:0], sub...)`
			`}`
			`return p.push(re)`
			`}`

			`// alternate replaces the top of the stack (above the topmost '(') with its alternation.`
			`func (p parser) alternate() Regexp {`
			`// TODO: Flatten alternates.`

			`// Scan down to find pseudo-operator (.`
			`// There are no \| above (.`
			`i := len(p.stack)`
			`for i > 0 && p.stack[i-1].Op < opPseudo {`
			`i--`
			`}`
			`sub := p.stack[i:]`
			`p.stack = p.stack[:i]`

			`var re *Regexp`
			`switch len(sub) {`
			`case 0:`
			`re = &Regexp{Op: OpNoMatch}`
			`case 1:`
			`re = sub[0]`
			`default:`
			`re = &Regexp{Op: OpAlternate}`
			`re.Sub = append(re.Sub0[:0], sub...)`
			`}`
			`return p.push(re)`
			`}`

			`// Parsing.`

			`func Parse(s string, flags Flags) (*Regexp, os.Error) {`
			`if flags&Literal != 0 {`
			`// Trivial parser for literal string.`
			`if err := checkUTF8(s); err != nil {`
			`return nil, err`
			`}`
			`re := &Regexp{`
			`Op: OpLiteral,`
			`Flags: flags,`
			`}`
			`re.Rune = re.Rune0[:0] // use local storage for small strings`
			`for _, c := range s {`
			`if len(re.Rune) >= cap(re.Rune) {`
			`// string is too long to fit in Rune0. let Go handle it`
			`re.Rune = []int(s)`
			`break`
			`}`
			`re.Rune = append(re.Rune, c)`
			`}`
			`return re, nil`
			`}`

			`// Otherwise, must do real work.`
			`var (`
			`p parser`
			`err os.Error`
			`c int`
			`op Op`
			`)`
			`p.flags = flags`
			`p.wholeRegexp = s`
			`t := s`
			`for t != "" {`
			`switch t[0] {`
			`default:`
			`if c, t, err = nextRune(t); err != nil {`
			`return nil, err`
			`}`
			`p.literal(c)`

			`case '(':`
			`// TODO: Actual Perl flag parsing.`
			`if len(t) >= 3 && t[1] == '?' && t[2] == ':' {`
			`// non-capturing paren`
			`p.op(opLeftParen)`
			`t = t[3:]`
			`break`
			`}`
			`p.numCap++`
			`p.op(opLeftParen).Cap = p.numCap`
			`t = t[1:]`
			`case '\|':`
			`p.concat()`
			`if err = p.parseVerticalBar(); err != nil {`
			`return nil, err`
			`}`
			`t = t[1:]`
			`case ')':`
			`if err = p.parseRightParen(); err != nil {`
			`return nil, err`
			`}`
			`t = t[1:]`
			`case '^':`
			`if p.flags&OneLine != 0 {`
			`p.op(OpBeginText)`
			`} else {`
			`p.op(OpBeginLine)`
			`}`
			`t = t[1:]`
			`case '$':`
			`if p.flags&OneLine != 0 {`
			`p.op(OpEndText).Flags \|= WasDollar`
			`} else {`
			`p.op(OpEndLine)`
			`}`
			`t = t[1:]`
			`case '.':`
			`if p.flags&DotNL != 0 {`
			`p.op(OpAnyChar)`
			`} else {`
			`p.op(OpAnyCharNotNL)`
			`}`
			`t = t[1:]`
			`case '[':`
			`if t, err = p.parseClass(t); err != nil {`
			`return nil, err`
			`}`
			`case '*', '+', '?':`
			`switch t[0] {`
			`case '*':`
			`op = OpStar`
			`case '+':`
			`op = OpPlus`
			`case '?':`
			`op = OpQuest`
			`}`
			`// TODO: greedy`
			`if err = p.repeat(op, t[0:1]); err != nil {`
			`return nil, err`
			`}`
			`t = t[1:]`
			`case '{':`
			`return nil, os.NewError("repeat not implemented")`
			`case '\\':`
			`return nil, os.NewError("escape not implemented")`
			`}`
			`}`

			`p.concat()`
			`if p.swapVerticalBar() {`
			`// pop vertical bar`
			`p.stack = p.stack[:len(p.stack)-1]`
			`}`
			`p.alternate()`

			`n := len(p.stack)`
			`if n != 1 {`
			`return nil, &Error{ErrMissingParen, s}`
			`}`
			`return p.stack[0], nil`
			`}`

			`// parseVerticalBar handles a \| in the input.`
			`func (p *parser) parseVerticalBar() os.Error {`
			`p.concat()`

			`// The concatenation we just parsed is on top of the stack.`
			`// If it sits above an opVerticalBar, swap it below`
			`// (things below an opVerticalBar become an alternation).`
			`// Otherwise, push a new vertical bar.`
			`if !p.swapVerticalBar() {`
			`p.op(opVerticalBar)`
			`}`

			`return nil`
			`}`

			`// If the top of the stack is an element followed by an opVerticalBar`
			`// swapVerticalBar swaps the two and returns true.`
			`// Otherwise it returns false.`
			`func (p *parser) swapVerticalBar() bool {`
			`if n := len(p.stack); n >= 2 {`
			`re1 := p.stack[n-1]`
			`re2 := p.stack[n-2]`
			`if re2.Op == opVerticalBar {`
			`p.stack[n-2] = re1`
			`p.stack[n-1] = re2`
			`return true`
			`}`
			`}`
			`return false`
			`}`

			`// parseRightParen handles a ) in the input.`
			`func (p *parser) parseRightParen() os.Error {`
			`p.concat()`
			`if p.swapVerticalBar() {`
			`// pop vertical bar`
			`p.stack = p.stack[:len(p.stack)-1]`
			`}`
			`p.alternate()`

			`n := len(p.stack)`
			`if n < 2 {`
			`return &Error{ErrInternalError, ""}`
			`}`
			`re1 := p.stack[n-1]`
			`re2 := p.stack[n-2]`
			`p.stack = p.stack[:n-2]`
			`if re2.Op != opLeftParen {`
			`return &Error{ErrMissingParen, p.wholeRegexp}`
			`}`
			`if re2.Cap == 0 {`
			`// Just for grouping.`
			`p.push(re1)`
			`} else {`
			`re2.Op = OpCapture`
			`re2.Sub = re2.Sub0[:1]`
			`re2.Sub[0] = re1`
			`p.push(re2)`
			`}`
			`return nil`
			`}`

			`// parseClassChar parses a character class character at the beginning of s`
			`// and returns it.`
			`func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) {`
			`if s == "" {`
			`return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}`
			`}`

			`// TODO: Escapes`

			`return nextRune(s)`
			`}`

			`// parseClass parses a character class at the beginning of s`
			`// and pushes it onto the parse stack.`
			`func (p *parser) parseClass(s string) (rest string, err os.Error) {`
			`t := s[1:] // chop [`
			`re := &Regexp{Op: OpCharClass, Flags: p.flags}`
			`re.Rune = re.Rune0[:0]`

			`sign := +1`
			`if t != "" && t[0] == '^' {`
			`sign = -1`
			`t = t[1:]`

			`// If character class does not match \n, add it here,`
			`// so that negation later will do the right thing.`
			`if p.flags&ClassNL == 0 {`
			`re.Rune = append(re.Rune, '\n', '\n')`
			`}`
			`}`

			`class := re.Rune`
			`first := true // ] and - are okay as first char in class`
			`for t == "" \|\| t[0] != ']' \|\| first {`
			`// POSIX: - is only okay unescaped as first or last in class.`
			`// Perl: - is okay anywhere.`
			`if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 \|\| t[1] != ']') {`
			`_, size := utf8.DecodeRuneInString(t[1:])`
			`return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}`
			`}`
			`first = false`

			`// TODO: Look for [:alnum:]`
			`// TODO: Look for Unicode group.`
			`// TODO: Look for Perl group.`

			`// Single character or simple range.`
			`rng := t`
			`var lo, hi int`
			`if lo, t, err = p.parseClassChar(t, s); err != nil {`
			`return "", err`
			`}`
			`hi = lo`
			`// [a-] means (a\|-) so check for final ].`
			`if len(t) >= 2 && t[0] == '-' && t[1] != ']' {`
			`t = t[1:]`
			`if hi, t, err = p.parseClassChar(t, s); err != nil {`
			`return "", err`
			`}`
			`if hi < lo {`
			`rng = rng[:len(rng)-len(t)]`
			`return "", &Error{Code: ErrInvalidCharRange, Expr: rng}`
			`}`
			`}`

			`// Expand last range if overlaps or abuts.`
			`if n := len(class); n > 0 {`
			`clo, chi := class[n-2], class[n-1]`
			`if lo <= chi+1 && clo <= hi+1 {`
			`if lo < clo {`
			`class[n-2] = lo`
			`}`
			`if hi > chi {`
			`class[n-1] = hi`
			`}`
			`continue`
			`}`
			`}`

			`class = append(class, lo, hi)`
			`}`
			`t = t[1:] // chop ]`

			`// Use &re.Rune instead of &class to avoid allocation.`
			`re.Rune = class`
			`class = cleanClass(&re.Rune)`
			`if sign < 0 {`
			`class = negateClass(class)`
			`}`
			`re.Rune = class`
			`p.push(re)`
			`return t, nil`
			`}`

			`// cleanClass sorts the ranges (pairs of elements of r),`
			`// merges them, and eliminates duplicates.`
			`func cleanClass(rp *[]int) []int {`
			`// Sort by lo increasing, hi decreasing to break ties.`
			`sort.Sort(ranges{rp})`

			`r := *rp`
			`// Merge abutting, overlapping.`
			`w := 2 // write index`
			`for i := 2; i < len(r); i += 2 {`
			`lo, hi := r[i], r[i+1]`
			`if lo <= r[w-1]+1 {`
			`// merge with previous range`
			`if hi > r[w-1] {`
			`r[w-1] = hi`
			`}`
			`continue`
			`}`
			`// new disjoint range`
			`r[w] = lo`
			`r[w+1] = hi`
			`w += 2`
			`}`

			`return r[:w]`
			`}`

			`// negateClass overwrites r and returns r's negation.`
			`// It assumes the class r is already clean.`
			`func negateClass(r []int) []int {`
			`nextLo := 0 // lo end of next class to add`
			`w := 0 // write index`
			`for i := 0; i < len(r); i += 2 {`
			`lo, hi := r[i], r[i+1]`
			`if nextLo <= lo-1 {`
			`r[w] = nextLo`
			`r[w+1] = lo - 1`
			`w += 2`
			`}`
			`nextLo = hi + 1`
			`}`
			`if nextLo <= unicode.MaxRune {`
			`// It's possible for the negation to have one more`
			`// range - this one - than the original class, so use append.`
			`r = append(r[:w], nextLo, unicode.MaxRune)`
			`}`
			`return r`
			`}`

			`// ranges implements sort.Interface on a []rune.`
			`// The choice of receiver type definition is strange`
			`// but avoids an allocation since we already have`
			`// a *[]int.`
			`type ranges struct {`
			`p *[]int`
			`}`

			`func (ra ranges) Less(i, j int) bool {`
			`p := *ra.p`
			`i *= 2`
			`j *= 2`
			`return p[i] < p[j] \|\| p[i] == p[j] && p[i+1] > p[j+1]`
			`}`

			`func (ra ranges) Len() int {`
			`return len(*ra.p) / 2`
			`}`

			`func (ra ranges) Swap(i, j int) {`
			`p := *ra.p`
			`i *= 2`
			`j *= 2`
			`p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]`
			`}`


			`func checkUTF8(s string) os.Error {`
			`for s != "" {`
			`rune, size := utf8.DecodeRuneInString(s)`
			`if rune == utf8.RuneError && size == 1 {`
			`return &Error{Code: ErrInvalidUTF8, Expr: s}`
			`}`
			`s = s[size:]`
			`}`
			`return nil`
			`}`

			`func nextRune(s string) (c int, t string, err os.Error) {`
			`c, size := utf8.DecodeRuneInString(s)`
			`if c == utf8.RuneError && size == 1 {`
			`return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}`
			`}`
			`return c, s[size:], nil`
			`}`