2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Secret Labs' Regular Expression Engine
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# re-compatible interface for the sre matching engine
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# This version of the SRE library can be redistributed under CNRI's
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Python 1.6 license.  For any other use, please contact Secret Labs
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# AB (info@pythonware.com).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Portions of this engine have been developed in cooperation with
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# other compatibility work.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:20:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								r"""Support for regular expressions (RE).
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This module provides regular expression matching operations similar to
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 21:48:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								those found in Perl.  It supports both 8-bit and Unicode strings; both
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								the pattern and the strings being processed can contain null bytes and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								characters outside the US ASCII range.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Regular expressions can contain both special and ordinary characters.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Most ordinary characters, like "A", "a", or "0", are the simplest
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								regular expressions; they simply match themselves.  You can
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								concatenate ordinary characters, so last matches the string 'last'.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								The special characters are:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "."      Matches any character except a newline.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "^"      Matches the start of the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "$"      Matches the end of the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             Greedy means that it will match as many repetitions as possible.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "?"      Matches 0 or 1 (greedy) of the preceding RE.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    *?,+?,?? Non-greedy versions of the previous three special characters.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {m,n}    Matches from m to n repetitions of the preceding RE.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {m,n}?   Non-greedy version of the above.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "\\"      Either escapes special characters or signals a special sequence.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    []       Indicates a set of characters.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             A "^" as the first character indicates a complementing set.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "|"      A|B, creates an RE that will match either A or B.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (...)    Matches the RE inside the parentheses.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             The contents can be retrieved or matched later in the string.
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 21:48:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    (?:...)  Non-grouping version of regular parentheses.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (?P<name>...) The substring matched by the group is accessible by name.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (?P=name)     Matches the text matched earlier by the group named name.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (?#...)  A comment; ignored.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (?=...)  Matches if ... matches next, but doesn't consume the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (?!...)  Matches if ... doesn't match next.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								The special sequences consist of "\\" and a character from the list
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 21:48:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								below.  If the ordinary character is not on the list, then the
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								resulting RE will match the second character.
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:20:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    \number  Matches the contents of the group of the same number.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \A       Matches only at the start of the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \Z       Matches only at the end of the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \b       Matches the empty string, but only at the start or end of a word.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \B       Matches the empty string, but not at the start or end of a word.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \d       Matches any decimal digit; equivalent to the set [0-9].
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \D       Matches any non-digit character; equivalent to the set [^0-9].
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								             With LOCALE, it will match the set [0-9_] plus characters defined
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             as letters for the current locale.
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:20:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    \W       Matches the complement of \w.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    \\       Matches a literal backslash.
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This module exports the following functions:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    match    Match a regular expression pattern to the beginning of a string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    search   Search a string for the presence of a pattern.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sub      Substitute occurrences of a pattern found in a string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    subn     Same as sub, but also return the number of substitutions made.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    split    Split a string by the occurrences of a pattern.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    findall  Find all occurrences of a pattern in a string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    compile  Compile a pattern into a RegexObject.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    purge    Clear the regular expression cache.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    escape   Backslash all non-alphanumerics in a string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Some of the functions in this module takes flags as optional parameters:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    I  IGNORECASE  Perform case-insensitive matching.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    M  MULTILINE   "^" matches the beginning of lines as well as the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                   "$" matches the end of lines as well as the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    S  DOTALL      "." matches any character at all, including the newline.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-18 18:47:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-04 19:10:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This module also defines an exception 'error'.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-18 18:47:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-24 22:16:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import sys
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import sre_compile
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import sre_parse
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-02-18 12:05:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# public symbols
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__all__ = [ "match", "search", "sub", "subn", "split", "findall",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "UNICODE", "error" ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 16:47:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__version__ = "2.2.1"
							 | 
						
					
						
							
								
									
										
										
										
											2001-03-22 15:50:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-02-18 12:05:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# this module works under 1.5.2 and later.  don't use string methods
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import string
							 | 
						
					
						
							
								
									
										
										
										
											2001-02-15 22:15:14 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# flags
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# sre extensions (experimental, don't rely on these)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# sre exception
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-29 16:57:40 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								error = sre_compile.error
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# --------------------------------------------------------------------
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# public interface
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def match(pattern, string, flags=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Try to apply the pattern at the start of the string, returning
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    a match object, or None if no match was found."""
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, flags).match(string)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def search(pattern, string, flags=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Scan through string looking for a match to the pattern, returning
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    a match object, or None if no match was found."""
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, flags).search(string)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def sub(pattern, repl, string, count=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Return the string obtained by replacing the leftmost
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    non-overlapping occurrences of the pattern in string by the
							 | 
						
					
						
							
								
									
										
										
										
											2004-08-07 17:41:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    replacement repl.  repl can be either a string or a callable;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if a callable, it's passed the match object and must return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    a replacement string to be used."""
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, 0).sub(repl, string, count)
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def subn(pattern, repl, string, count=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Return a 2-tuple containing (new_string, number).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    new_string is the string obtained by replacing the leftmost
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    non-overlapping occurrences of the pattern in the source
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    string by the replacement repl.  number is the number of
							 | 
						
					
						
							
								
									
										
										
										
											2004-08-07 17:41:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    substitutions that were made. repl can be either a string or a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    callable; if a callable, it's passed the match object and must
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return a replacement string to be used."""
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, 0).subn(repl, string, count)
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def split(pattern, string, maxsplit=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Split the source string by the occurrences of the pattern,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    returning a list containing the resulting substrings."""
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, 0).split(string, maxsplit)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-07-06 20:56:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def findall(pattern, string):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Return a list of all non-overlapping matches in the string.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    If one or more groups are present in the pattern, return a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    list of groups; this will be a list of tuples if the pattern
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    has more than one group.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Empty matches are included in the result."""
							 | 
						
					
						
							
								
									
										
										
										
											2001-07-06 20:56:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, 0).findall(string)
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-24 22:16:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								if sys.hexversion >= 0x02020000:
							 | 
						
					
						
							
								
									
										
										
										
											2002-10-14 12:22:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    __all__.append("finditer")
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-24 22:16:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def finditer(pattern, string):
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-28 20:15:40 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        """Return an iterator over all non-overlapping matches in the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        string.  For each match, the iterator returns a match object.
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-24 22:16:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Empty matches are included in the result."""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return _compile(pattern, 0).finditer(string)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def compile(pattern, flags=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    "Compile a regular expression pattern, returning a pattern object."
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, flags)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def purge():
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    "Clear the regular expression cache"
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _cache.clear()
							 | 
						
					
						
							
								
									
										
										
										
											2001-03-22 15:50:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _cache_repl.clear()
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def template(pattern, flags=0):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    "Compile a template pattern, returning a pattern object"
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _compile(pattern, flags|T)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def escape(pattern):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    "Escape all non-alphanumeric characters in pattern."
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    s = list(pattern)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for i in range(len(pattern)):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        c = pattern[i]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if c == "\000":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                s[i] = "\\000"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                s[i] = "\\" + c
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _join(s, pattern)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# --------------------------------------------------------------------
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# internals
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								_cache = {}
							 | 
						
					
						
							
								
									
										
										
										
											2001-03-22 15:50:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								_cache_repl = {}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-18 19:30:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								_pattern_type = type(sre_compile.compile("", 0))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								_MAXCACHE = 100
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _join(seq, sep):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # internal: join into string having the same type as sep
							 | 
						
					
						
							
								
									
										
										
										
											2001-02-18 12:05:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return string.join(seq, sep[:0])
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _compile(*key):
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # internal: compile pattern
							 | 
						
					
						
							
								
									
										
										
										
											2004-04-20 21:11:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    cachekey = (type(key[0]),) + key
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    p = _cache.get(cachekey)
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if p is not None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return p
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    pattern, flags = key
							 | 
						
					
						
							
								
									
										
										
										
											2003-07-02 20:03:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if isinstance(pattern, _pattern_type):
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return pattern
							 | 
						
					
						
							
								
									
										
										
										
											2003-07-02 21:37:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not sre_compile.isstring(pattern):
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-18 19:30:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        raise TypeError, "first argument must be string or compiled pattern"
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-01 22:47:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        p = sre_compile.compile(pattern, flags)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    except error, v:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise error, v # invalid expression
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if len(_cache) >= _MAXCACHE:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        _cache.clear()
							 | 
						
					
						
							
								
									
										
										
										
											2004-04-20 21:11:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _cache[cachekey] = p
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return p
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-03-22 15:50:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _compile_repl(*key):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # internal: compile replacement pattern
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    p = _cache_repl.get(key)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if p is not None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return p
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    repl, pattern = key
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        p = sre_parse.parse_template(repl, pattern)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    except error, v:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise error, v # invalid expression
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if len(_cache_repl) >= _MAXCACHE:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        _cache_repl.clear()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    _cache_repl[key] = p
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return p
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-09-21 17:03:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _expand(pattern, match, template):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # internal: match.expand implementation hook
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    template = sre_parse.parse_template(template, pattern)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return sre_parse.expand_template(template, match)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 16:47:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _subx(pattern, template):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # internal: pattern.sub/subn implementation helper
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 21:48:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    template = _compile_repl(template, pattern)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not template[0] and len(template[1]) == 1:
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 16:47:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # literal replacement
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 21:48:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return template[1][0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def filter(match, template=template):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return sre_parse.expand_template(template, match)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return filter
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 13:55:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# register myself for pickling
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import copy_reg
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def _pickle(p):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return _compile, (p.pattern, p.flags)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-18 19:30:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								copy_reg.pickle(_pattern_type, _pickle, _compile)
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# --------------------------------------------------------------------
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# experimental stuff (see python-dev discussions for details)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class Scanner:
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 18:04:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, lexicon, flags=0):
							 | 
						
					
						
							
								
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        from sre_constants import BRANCH, SUBPATTERN
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.lexicon = lexicon
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # combine phrases into a compound pattern
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        p = []
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        s = sre_parse.Pattern()
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 18:04:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        s.flags = flags
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for phrase, action in lexicon:
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            p.append(sre_parse.SubPattern(s, [
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 18:04:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                ]))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        s.groups = len(p)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.scanner = sre_compile.compile(p)
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def scan(self, string):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result = []
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        append = result.append
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 18:04:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        match = self.scanner.scanner(string).match
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        i = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        while 1:
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 18:04:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            m = match()
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if not m:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                break
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            j = m.end()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if i == j:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                break
							 | 
						
					
						
							
								
									
										
										
										
											2001-10-21 18:04:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            action = self.lexicon[m.lastindex-1][1]
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if callable(action):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 15:06:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                self.match = m
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                action = action(self, m.group())
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if action is not None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                append(action)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            i = j
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return result, string[i:]
							 |