| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # Secret Labs' Regular Expression Engine | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # re-compatible interface for the sre matching engine | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 |  |  | # This version of the SRE library can be redistributed under CNRI's | 
					
						
							|  |  |  | # Python 1.6 license.  For any other use, please contact Secret Labs | 
					
						
							|  |  |  | # AB (info@pythonware.com). | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | # Portions of this engine have been developed in cooperation with | 
					
						
							| 
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 |  |  | # CNRI.  Hewlett-Packard provided funding for 1.6 integration and | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | # other compatibility work. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  | # FIXME: change all FIXME's to XXX ;-) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | import sre_compile | 
					
						
							| 
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 |  |  | import sre_parse | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  | import string | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | # flags | 
					
						
							|  |  |  | I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE | 
					
						
							|  |  |  | L = LOCALE = sre_compile.SRE_FLAG_LOCALE | 
					
						
							|  |  |  | M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE | 
					
						
							|  |  |  | S = DOTALL = sre_compile.SRE_FLAG_DOTALL | 
					
						
							|  |  |  | X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 |  |  | # sre extensions (may or may not be in 1.6/2.0 final) | 
					
						
							| 
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 |  |  | T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE | 
					
						
							|  |  |  | U = UNICODE = sre_compile.SRE_FLAG_UNICODE | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # sre exception | 
					
						
							| 
									
										
										
										
											2000-06-29 16:57:40 +00:00
										 |  |  | error = sre_compile.error | 
					
						
							| 
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | # -------------------------------------------------------------------- | 
					
						
							|  |  |  | # public interface | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | # FIXME: add docstrings | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def match(pattern, string, flags=0): | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     return _compile(pattern, flags).match(string) | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def search(pattern, string, flags=0): | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     return _compile(pattern, flags).search(string) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def sub(pattern, repl, string, count=0): | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  |     return _compile(pattern, 0).sub(repl, string, count) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def subn(pattern, repl, string, count=0): | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  |     return _compile(pattern, 0).subn(repl, string, count) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def split(pattern, string, maxsplit=0): | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  |     return _compile(pattern, 0).split(string, maxsplit) | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | def findall(pattern, string, maxsplit=0): | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  |     return _compile(pattern, 0).findall(string, maxsplit) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def compile(pattern, flags=0): | 
					
						
							|  |  |  |     return _compile(pattern, flags) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  | def purge(): | 
					
						
							|  |  |  |     _cache.clear() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 |  |  | def template(pattern, flags=0): | 
					
						
							|  |  |  |     return _compile(pattern, flags|T) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | def escape(pattern): | 
					
						
							|  |  |  |     s = list(pattern) | 
					
						
							|  |  |  |     for i in range(len(pattern)): | 
					
						
							|  |  |  |         c = pattern[i] | 
					
						
							|  |  |  |         if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"): | 
					
						
							|  |  |  |             if c == "\000": | 
					
						
							|  |  |  |                 s[i] = "\\000" | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 s[i] = "\\" + c | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  |     return _join(s, pattern) | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # -------------------------------------------------------------------- | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | # internals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _cache = {} | 
					
						
							|  |  |  | _MAXCACHE = 100 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  | def _join(seq, sep): | 
					
						
							|  |  |  |     # internal: join into string having the same type as sep | 
					
						
							|  |  |  |     return string.join(seq, sep[:0]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  | def _compile(*key): | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     # internal: compile pattern | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  |     p = _cache.get(key) | 
					
						
							|  |  |  |     if p is not None: | 
					
						
							|  |  |  |         return p | 
					
						
							|  |  |  |     pattern, flags = key | 
					
						
							|  |  |  |     if type(pattern) not in sre_compile.STRING_TYPES: | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |         return pattern | 
					
						
							| 
									
										
										
										
											2000-08-01 22:47:49 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         p = sre_compile.compile(pattern, flags) | 
					
						
							|  |  |  |     except error, v: | 
					
						
							|  |  |  |         raise error, v # invalid expression | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     if len(_cache) >= _MAXCACHE: | 
					
						
							|  |  |  |         _cache.clear() | 
					
						
							|  |  |  |     _cache[key] = p | 
					
						
							|  |  |  |     return p | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _sub(pattern, template, string, count=0): | 
					
						
							|  |  |  |     # internal: pattern.sub implementation hook | 
					
						
							|  |  |  |     return _subn(pattern, template, string, count)[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _subn(pattern, template, string, count=0): | 
					
						
							|  |  |  |     # internal: pattern.subn implementation hook | 
					
						
							|  |  |  |     if callable(template): | 
					
						
							| 
									
										
										
										
											2000-06-18 20:27:10 +00:00
										 |  |  |         filter = template | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											2000-06-30 07:50:59 +00:00
										 |  |  |         template = sre_parse.parse_template(template, pattern) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |         def filter(match, template=template): | 
					
						
							| 
									
										
										
										
											2000-06-29 08:58:44 +00:00
										 |  |  |             return sre_parse.expand_template(template, match) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     n = i = 0 | 
					
						
							|  |  |  |     s = [] | 
					
						
							|  |  |  |     append = s.append | 
					
						
							| 
									
										
										
										
											2000-06-29 16:57:40 +00:00
										 |  |  |     c = pattern.scanner(string) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     while not count or n < count: | 
					
						
							|  |  |  |         m = c.search() | 
					
						
							|  |  |  |         if not m: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2000-06-30 07:50:59 +00:00
										 |  |  |         b, e = m.span() | 
					
						
							| 
									
										
										
										
											2000-06-30 00:27:46 +00:00
										 |  |  |         if i < b: | 
					
						
							|  |  |  |             append(string[i:b]) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |         append(filter(m)) | 
					
						
							| 
									
										
										
										
											2000-06-30 07:50:59 +00:00
										 |  |  |         i = e | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |         n = n + 1 | 
					
						
							| 
									
										
										
										
											2000-06-30 00:27:46 +00:00
										 |  |  |     append(string[i:]) | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  |     return _join(s, string[:0]), n | 
					
						
							| 
									
										
										
										
											2000-03-31 14:58:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  | def _split(pattern, string, maxsplit=0): | 
					
						
							|  |  |  |     # internal: pattern.split implementation hook | 
					
						
							|  |  |  |     n = i = 0 | 
					
						
							|  |  |  |     s = [] | 
					
						
							|  |  |  |     append = s.append | 
					
						
							| 
									
										
										
										
											2000-06-29 16:57:40 +00:00
										 |  |  |     extend = s.extend | 
					
						
							|  |  |  |     c = pattern.scanner(string) | 
					
						
							| 
									
										
										
										
											2000-06-30 00:27:46 +00:00
										 |  |  |     g = pattern.groups | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     while not maxsplit or n < maxsplit: | 
					
						
							|  |  |  |         m = c.search() | 
					
						
							|  |  |  |         if not m: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2000-06-30 07:50:59 +00:00
										 |  |  |         b, e = m.span() | 
					
						
							|  |  |  |         if b == e: | 
					
						
							|  |  |  |             if i >= len(string): | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2000-06-29 16:57:40 +00:00
										 |  |  |         append(string[i:b]) | 
					
						
							| 
									
										
										
										
											2000-06-30 07:50:59 +00:00
										 |  |  |         if g and b != e: | 
					
						
							|  |  |  |             extend(m.groups()) | 
					
						
							|  |  |  |         i = e | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |         n = n + 1 | 
					
						
							| 
									
										
										
										
											2000-06-29 18:03:25 +00:00
										 |  |  |     append(string[i:]) | 
					
						
							| 
									
										
										
										
											2000-06-01 17:39:12 +00:00
										 |  |  |     return s | 
					
						
							| 
									
										
										
										
											2000-06-30 13:55:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # register myself for pickling | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import copy_reg | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _pickle(p): | 
					
						
							|  |  |  |     return _compile, (p.pattern, p.flags) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-08-07 20:59:04 +00:00
										 |  |  | copy_reg.pickle(type(_compile("", 0)), _pickle, _compile) | 
					
						
							| 
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # -------------------------------------------------------------------- | 
					
						
							|  |  |  | # experimental stuff (see python-dev discussions for details) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Scanner: | 
					
						
							|  |  |  |     def __init__(self, lexicon): | 
					
						
							| 
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 |  |  |         from sre_constants import BRANCH, SUBPATTERN | 
					
						
							| 
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 |  |  |         self.lexicon = lexicon | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  |         # combine phrases into a compound pattern | 
					
						
							| 
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 |  |  |         p = [] | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  |         s = sre_parse.Pattern() | 
					
						
							| 
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 |  |  |         for phrase, action in lexicon: | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  |             p.append(sre_parse.SubPattern(s, [ | 
					
						
							| 
									
										
										
										
											2000-08-01 18:20:07 +00:00
										 |  |  |                 (SUBPATTERN, (len(p), sre_parse.parse(phrase))), | 
					
						
							| 
									
										
										
										
											2000-07-23 21:46:17 +00:00
										 |  |  |                 ])) | 
					
						
							|  |  |  |         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | 
					
						
							|  |  |  |         s.groups = len(p) | 
					
						
							|  |  |  |         self.scanner = sre_compile.compile(p) | 
					
						
							| 
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 |  |  |     def scan(self, string): | 
					
						
							|  |  |  |         result = [] | 
					
						
							|  |  |  |         append = result.append | 
					
						
							|  |  |  |         match = self.scanner.match | 
					
						
							|  |  |  |         i = 0 | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             m = match(string, i) | 
					
						
							|  |  |  |             if not m: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             j = m.end() | 
					
						
							|  |  |  |             if i == j: | 
					
						
							|  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2000-07-02 22:59:57 +00:00
										 |  |  |             action = self.lexicon[m.lastindex][1] | 
					
						
							| 
									
										
										
										
											2000-07-02 17:33:27 +00:00
										 |  |  |             if callable(action): | 
					
						
							|  |  |  |                 self.match = match | 
					
						
							|  |  |  |                 action = action(self, m.group()) | 
					
						
							|  |  |  |             if action is not None: | 
					
						
							|  |  |  |                 append(action) | 
					
						
							|  |  |  |             i = j | 
					
						
							|  |  |  |         return result, string[i:] |