| 
									
										
										
										
											2000-02-04 15:28:42 +00:00
										 |  |  | """A parser for SGML, using the derived class as a static DTD.""" | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # XXX This only supports those SGML features used by HTML. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # XXX There should be a way to distinguish between PCDATA (parsed | 
					
						
							|  |  |  | # character data -- the normal case), RCDATA (replaceable character | 
					
						
							|  |  |  | # data -- only char and entity references and end tags are special) | 
					
						
							| 
									
										
										
										
											2001-07-16 18:30:35 +00:00
										 |  |  | # and CDATA (character data -- only end tags are special).  RCDATA is | 
					
						
							|  |  |  | # not supported at all. | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  | import markupbase | 
					
						
							| 
									
										
										
										
											1997-10-23 19:09:21 +00:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-09-09 01:49:58 +00:00
										 |  |  | __all__ = ["SGMLParser", "SGMLParseError"] | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Regular expressions used for parsing | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1997-10-23 19:09:21 +00:00
										 |  |  | interesting = re.compile('[&<]') | 
					
						
							|  |  |  | incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                            '<([a-zA-Z][^<>]*|' | 
					
						
							|  |  |  |                               '/([a-zA-Z][^<>]*)?|' | 
					
						
							|  |  |  |                               '![^<>]*)?') | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-05-28 22:48:53 +00:00
										 |  |  | entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') | 
					
						
							| 
									
										
										
										
											1997-10-23 19:09:21 +00:00
										 |  |  | charref = re.compile('&#([0-9]+)[^0-9]') | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1997-10-23 19:09:21 +00:00
										 |  |  | starttagopen = re.compile('<[>a-zA-Z]') | 
					
						
							| 
									
										
										
										
											1998-08-24 20:59:13 +00:00
										 |  |  | shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') | 
					
						
							|  |  |  | shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') | 
					
						
							| 
									
										
										
										
											1998-05-28 22:48:53 +00:00
										 |  |  | piclose = re.compile('>') | 
					
						
							| 
									
										
										
										
											2006-06-29 00:51:53 +00:00
										 |  |  | starttag = re.compile(r'<[a-zA-Z][-_.:a-zA-Z0-9]*\s*(' | 
					
						
							|  |  |  |         r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' | 
					
						
							|  |  |  |         r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]' | 
					
						
							|  |  |  |         r'[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*(?=[\s>/<])))?' | 
					
						
							|  |  |  |     r')*\s*/?\s*(?=[<>])') | 
					
						
							|  |  |  | endtag = re.compile(r'</?[a-zA-Z][-_.:a-zA-Z0-9]*\s*/?\s*(?=[<>])') | 
					
						
							| 
									
										
										
										
											2001-07-05 18:21:57 +00:00
										 |  |  | tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') | 
					
						
							| 
									
										
										
										
											1997-10-23 19:09:21 +00:00
										 |  |  | attrfind = re.compile( | 
					
						
							| 
									
										
										
										
											2001-07-14 05:50:33 +00:00
										 |  |  |     r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' | 
					
						
							| 
									
										
										
										
											2006-06-23 06:03:45 +00:00
										 |  |  |     r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-03-16 20:04:57 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | class SGMLParseError(RuntimeError): | 
					
						
							|  |  |  |     """Exception raised for all parse errors.""" | 
					
						
							|  |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # SGML parser base class -- find tags and call handler functions. | 
					
						
							|  |  |  | # Usage: p = SGMLParser(); p.feed(data); ...; p.close(). | 
					
						
							|  |  |  | # The dtd is defined by deriving a class which defines methods | 
					
						
							|  |  |  | # with special names to handle tags: start_foo and end_foo to handle | 
					
						
							|  |  |  | # <foo> and </foo>, respectively, or do_foo to handle <foo> by itself. | 
					
						
							|  |  |  | # (Tags are converted to lower case for this purpose.)  The data | 
					
						
							|  |  |  | # between tags is passed to the parser by calling self.handle_data() | 
					
						
							| 
									
										
										
										
											2000-06-28 14:48:01 +00:00
										 |  |  | # with some data as argument (the data may be split up in arbitrary | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | # chunks).  Entity references are passed by calling | 
					
						
							|  |  |  | # self.handle_entityref() with the entity reference as argument. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  | class SGMLParser(markupbase.ParserBase): | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |     # Definition of entities -- derived classes may override | 
					
						
							|  |  |  |     entity_or_charref = re.compile('&(?:' | 
					
						
							|  |  |  |       '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' | 
					
						
							|  |  |  |       ')(;?)') | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     def __init__(self, verbose=0): | 
					
						
							| 
									
										
										
										
											2001-07-19 20:08:04 +00:00
										 |  |  |         """Initialize and reset this instance.""" | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.verbose = verbose | 
					
						
							|  |  |  |         self.reset() | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def reset(self): | 
					
						
							| 
									
										
										
										
											2001-07-19 20:08:04 +00:00
										 |  |  |         """Reset this instance. Loses all unprocessed data.""" | 
					
						
							| 
									
										
										
										
											2003-09-20 10:58:38 +00:00
										 |  |  |         self.__starttag_text = None | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.rawdata = '' | 
					
						
							|  |  |  |         self.stack = [] | 
					
						
							|  |  |  |         self.lasttag = '???' | 
					
						
							|  |  |  |         self.nomoretags = 0 | 
					
						
							|  |  |  |         self.literal = 0 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |         markupbase.ParserBase.reset(self) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def setnomoretags(self): | 
					
						
							| 
									
										
										
										
											2001-07-19 20:57:23 +00:00
										 |  |  |         """Enter literal mode (CDATA) till EOF.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Intended for derived classes only. | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.nomoretags = self.literal = 1 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def setliteral(self, *args): | 
					
						
							| 
									
										
										
										
											2001-07-19 20:57:23 +00:00
										 |  |  |         """Enter literal mode (CDATA).
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Intended for derived classes only. | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.literal = 1 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def feed(self, data): | 
					
						
							| 
									
										
										
										
											2001-07-19 20:57:23 +00:00
										 |  |  |         """Feed some data to the parser.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Call this as often as you want, with as little or as much text | 
					
						
							|  |  |  |         as you want (may include '\n').  (This just saves the text, | 
					
						
							|  |  |  |         all the processing is done by goahead().) | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.rawdata = self.rawdata + data | 
					
						
							|  |  |  |         self.goahead(0) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def close(self): | 
					
						
							| 
									
										
										
										
											2001-07-19 20:08:04 +00:00
										 |  |  |         """Handle the remaining data.""" | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.goahead(1) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |     def error(self, message): | 
					
						
							|  |  |  |         raise SGMLParseError(message) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     # Internal -- handle data as far as reasonable.  May leave state | 
					
						
							|  |  |  |     # and data to be processed by a subsequent call.  If 'end' is | 
					
						
							|  |  |  |     # true, force handling all data as if followed by EOF marker. | 
					
						
							|  |  |  |     def goahead(self, end): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         i = 0 | 
					
						
							|  |  |  |         n = len(rawdata) | 
					
						
							|  |  |  |         while i < n: | 
					
						
							|  |  |  |             if self.nomoretags: | 
					
						
							|  |  |  |                 self.handle_data(rawdata[i:n]) | 
					
						
							|  |  |  |                 i = n | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             match = interesting.search(rawdata, i) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |             if match: j = match.start() | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             else: j = n | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |             if i < j: | 
					
						
							|  |  |  |                 self.handle_data(rawdata[i:j]) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             i = j | 
					
						
							|  |  |  |             if i == n: break | 
					
						
							|  |  |  |             if rawdata[i] == '<': | 
					
						
							|  |  |  |                 if starttagopen.match(rawdata, i): | 
					
						
							|  |  |  |                     if self.literal: | 
					
						
							|  |  |  |                         self.handle_data(rawdata[i]) | 
					
						
							|  |  |  |                         i = i+1 | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  |                     k = self.parse_starttag(i) | 
					
						
							|  |  |  |                     if k < 0: break | 
					
						
							|  |  |  |                     i = k | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                 if rawdata.startswith("</", i): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                     k = self.parse_endtag(i) | 
					
						
							|  |  |  |                     if k < 0: break | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                     i = k | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                     self.literal = 0 | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                 if self.literal: | 
					
						
							|  |  |  |                     if n > (i + 1): | 
					
						
							|  |  |  |                         self.handle_data("<") | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                         i = i+1 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                     else: | 
					
						
							|  |  |  |                         # incomplete | 
					
						
							|  |  |  |                         break | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 if rawdata.startswith("<!--", i): | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  |                         # Strictly speaking, a comment is --.*-- | 
					
						
							|  |  |  |                         # within a declaration tag <!...>. | 
					
						
							|  |  |  |                         # This should be removed, | 
					
						
							|  |  |  |                         # and comments handled only in parse_declaration. | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                     k = self.parse_comment(i) | 
					
						
							|  |  |  |                     if k < 0: break | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                     i = k | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                 if rawdata.startswith("<?", i): | 
					
						
							| 
									
										
										
										
											1998-05-28 22:48:53 +00:00
										 |  |  |                     k = self.parse_pi(i) | 
					
						
							|  |  |  |                     if k < 0: break | 
					
						
							|  |  |  |                     i = i+k | 
					
						
							| 
									
										
										
										
											2001-01-15 01:36:40 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                 if rawdata.startswith("<!", i): | 
					
						
							| 
									
										
										
										
											2001-03-16 20:04:57 +00:00
										 |  |  |                     # This is some sort of declaration; in "HTML as | 
					
						
							|  |  |  |                     # deployed," this should only be the document type | 
					
						
							|  |  |  |                     # declaration ("<!DOCTYPE html...>"). | 
					
						
							|  |  |  |                     k = self.parse_declaration(i) | 
					
						
							|  |  |  |                     if k < 0: break | 
					
						
							|  |  |  |                     i = k | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                     continue | 
					
						
							|  |  |  |             elif rawdata[i] == '&': | 
					
						
							| 
									
										
										
										
											2001-07-16 18:30:35 +00:00
										 |  |  |                 if self.literal: | 
					
						
							|  |  |  |                     self.handle_data(rawdata[i]) | 
					
						
							|  |  |  |                     i = i+1 | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                 match = charref.match(rawdata, i) | 
					
						
							|  |  |  |                 if match: | 
					
						
							|  |  |  |                     name = match.group(1) | 
					
						
							|  |  |  |                     self.handle_charref(name) | 
					
						
							|  |  |  |                     i = match.end(0) | 
					
						
							|  |  |  |                     if rawdata[i-1] != ';': i = i-1 | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 match = entityref.match(rawdata, i) | 
					
						
							|  |  |  |                 if match: | 
					
						
							|  |  |  |                     name = match.group(1) | 
					
						
							|  |  |  |                     self.handle_entityref(name) | 
					
						
							|  |  |  |                     i = match.end(0) | 
					
						
							|  |  |  |                     if rawdata[i-1] != ';': i = i-1 | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                 self.error('neither < nor & ??') | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             # We get here only if incomplete matches but | 
					
						
							|  |  |  |             # nothing else | 
					
						
							|  |  |  |             match = incomplete.match(rawdata, i) | 
					
						
							|  |  |  |             if not match: | 
					
						
							|  |  |  |                 self.handle_data(rawdata[i]) | 
					
						
							|  |  |  |                 i = i+1 | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             j = match.end(0) | 
					
						
							|  |  |  |             if j == n: | 
					
						
							|  |  |  |                 break # Really incomplete | 
					
						
							|  |  |  |             self.handle_data(rawdata[i:j]) | 
					
						
							|  |  |  |             i = j | 
					
						
							|  |  |  |         # end while | 
					
						
							|  |  |  |         if end and i < n: | 
					
						
							|  |  |  |             self.handle_data(rawdata[i:n]) | 
					
						
							|  |  |  |             i = n | 
					
						
							|  |  |  |         self.rawdata = rawdata[i:] | 
					
						
							|  |  |  |         # XXX if end: check for empty stack | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |     # Extensions for the DOCTYPE scanner: | 
					
						
							|  |  |  |     _decl_otherchars = '=' | 
					
						
							| 
									
										
										
										
											2001-03-16 20:04:57 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-05-28 22:48:53 +00:00
										 |  |  |     # Internal -- parse processing instr, return length or -1 if not terminated | 
					
						
							|  |  |  |     def parse_pi(self, i): | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							| 
									
										
										
										
											2000-12-12 23:20:45 +00:00
										 |  |  |         if rawdata[i:i+2] != '<?': | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |             self.error('unexpected call to parse_pi()') | 
					
						
							| 
									
										
										
										
											1998-05-28 22:48:53 +00:00
										 |  |  |         match = piclose.search(rawdata, i+2) | 
					
						
							|  |  |  |         if not match: | 
					
						
							|  |  |  |             return -1 | 
					
						
							|  |  |  |         j = match.start(0) | 
					
						
							|  |  |  |         self.handle_pi(rawdata[i+2: j]) | 
					
						
							|  |  |  |         j = match.end(0) | 
					
						
							|  |  |  |         return j-i | 
					
						
							| 
									
										
										
										
											2000-06-29 18:50:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def get_starttag_text(self): | 
					
						
							|  |  |  |         return self.__starttag_text | 
					
						
							| 
									
										
										
										
											2001-01-15 01:36:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     # Internal -- handle starttag, return length or -1 if not terminated | 
					
						
							|  |  |  |     def parse_starttag(self, i): | 
					
						
							| 
									
										
										
										
											2000-06-29 18:50:59 +00:00
										 |  |  |         self.__starttag_text = None | 
					
						
							|  |  |  |         start_pos = i | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         if shorttagopen.match(rawdata, i): | 
					
						
							|  |  |  |             # SGML shorthand: <tag/data/ == <tag>data</tag> | 
					
						
							|  |  |  |             # XXX Can data contain &... (entity or char refs)? | 
					
						
							|  |  |  |             # XXX Can data contain < or > (tag characters)? | 
					
						
							|  |  |  |             # XXX Can there be whitespace before the first /? | 
					
						
							|  |  |  |             match = shorttag.match(rawdata, i) | 
					
						
							|  |  |  |             if not match: | 
					
						
							|  |  |  |                 return -1 | 
					
						
							|  |  |  |             tag, data = match.group(1, 2) | 
					
						
							| 
									
										
										
										
											2000-06-29 18:50:59 +00:00
										 |  |  |             self.__starttag_text = '<%s/' % tag | 
					
						
							| 
									
										
										
										
											2001-02-09 07:49:30 +00:00
										 |  |  |             tag = tag.lower() | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             k = match.end(0) | 
					
						
							| 
									
										
										
										
											2000-06-29 18:50:59 +00:00
										 |  |  |             self.finish_shorttag(tag, data) | 
					
						
							|  |  |  |             self.__starttag_text = rawdata[start_pos:match.end(1) + 1] | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             return k | 
					
						
							| 
									
										
										
										
											2006-06-29 00:51:53 +00:00
										 |  |  |         match = starttag.match(rawdata, i) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         if not match: | 
					
						
							|  |  |  |             return -1 | 
					
						
							| 
									
										
										
										
											2006-06-29 00:51:53 +00:00
										 |  |  |         j = match.end(0) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         # Now parse the data between i+1 and j into a tag and attrs | 
					
						
							|  |  |  |         attrs = [] | 
					
						
							|  |  |  |         if rawdata[i:i+2] == '<>': | 
					
						
							|  |  |  |             # SGML shorthand: <> == <last open tag seen> | 
					
						
							|  |  |  |             k = j | 
					
						
							|  |  |  |             tag = self.lasttag | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             match = tagfind.match(rawdata, i+1) | 
					
						
							|  |  |  |             if not match: | 
					
						
							| 
									
										
										
										
											2001-09-24 20:15:51 +00:00
										 |  |  |                 self.error('unexpected call to parse_starttag') | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             k = match.end(0) | 
					
						
							| 
									
										
										
										
											2001-02-09 07:49:30 +00:00
										 |  |  |             tag = rawdata[i+1:k].lower() | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             self.lasttag = tag | 
					
						
							|  |  |  |         while k < j: | 
					
						
							|  |  |  |             match = attrfind.match(rawdata, k) | 
					
						
							|  |  |  |             if not match: break | 
					
						
							|  |  |  |             attrname, rest, attrvalue = match.group(1, 2, 3) | 
					
						
							|  |  |  |             if not rest: | 
					
						
							|  |  |  |                 attrvalue = attrname | 
					
						
							| 
									
										
										
										
											2006-04-01 08:35:18 +00:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2006-04-03 02:46:44 +00:00
										 |  |  |                 if (attrvalue[:1] == "'" == attrvalue[-1:] or | 
					
						
							| 
									
										
										
										
											2006-04-01 08:35:18 +00:00
										 |  |  |                     attrvalue[:1] == '"' == attrvalue[-1:]): | 
					
						
							|  |  |  |                     # strip quotes | 
					
						
							|  |  |  |                     attrvalue = attrvalue[1:-1] | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |                 attrvalue = self.entity_or_charref.sub( | 
					
						
							|  |  |  |                     self._convert_ref, attrvalue) | 
					
						
							| 
									
										
										
										
											2001-02-09 07:49:30 +00:00
										 |  |  |             attrs.append((attrname.lower(), attrvalue)) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             k = match.end(0) | 
					
						
							|  |  |  |         if rawdata[j] == '>': | 
					
						
							|  |  |  |             j = j+1 | 
					
						
							| 
									
										
										
										
											2000-06-29 18:50:59 +00:00
										 |  |  |         self.__starttag_text = rawdata[start_pos:j] | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.finish_starttag(tag, attrs) | 
					
						
							|  |  |  |         return j | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |     # Internal -- convert entity or character reference | 
					
						
							|  |  |  |     def _convert_ref(self, match): | 
					
						
							|  |  |  |         if match.group(2): | 
					
						
							|  |  |  |             return self.convert_charref(match.group(2)) or \ | 
					
						
							|  |  |  |                 '&#%s%s' % match.groups()[1:] | 
					
						
							|  |  |  |         elif match.group(3): | 
					
						
							|  |  |  |             return self.convert_entityref(match.group(1)) or \ | 
					
						
							|  |  |  |                 '&%s;' % match.group(1) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             return '&%s' % match.group(1) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     # Internal -- parse endtag | 
					
						
							|  |  |  |     def parse_endtag(self, i): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         rawdata = self.rawdata | 
					
						
							| 
									
										
										
										
											2006-06-29 00:51:53 +00:00
										 |  |  |         match = endtag.match(rawdata, i) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         if not match: | 
					
						
							|  |  |  |             return -1 | 
					
						
							| 
									
										
										
										
											2006-06-29 00:51:53 +00:00
										 |  |  |         j = match.end(0) | 
					
						
							| 
									
										
										
										
											2001-02-09 07:49:30 +00:00
										 |  |  |         tag = rawdata[i+2:j].strip().lower() | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         if rawdata[j] == '>': | 
					
						
							|  |  |  |             j = j+1 | 
					
						
							|  |  |  |         self.finish_endtag(tag) | 
					
						
							|  |  |  |         return j | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>) | 
					
						
							|  |  |  |     def finish_shorttag(self, tag, data): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.finish_starttag(tag, []) | 
					
						
							|  |  |  |         self.handle_data(data) | 
					
						
							|  |  |  |         self.finish_endtag(tag) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- finish processing of start tag | 
					
						
							|  |  |  |     # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag | 
					
						
							|  |  |  |     def finish_starttag(self, tag, attrs): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             method = getattr(self, 'start_' + tag) | 
					
						
							|  |  |  |         except AttributeError: | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 method = getattr(self, 'do_' + tag) | 
					
						
							|  |  |  |             except AttributeError: | 
					
						
							|  |  |  |                 self.unknown_starttag(tag, attrs) | 
					
						
							|  |  |  |                 return -1 | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 self.handle_starttag(tag, method, attrs) | 
					
						
							|  |  |  |                 return 0 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.stack.append(tag) | 
					
						
							|  |  |  |             self.handle_starttag(tag, method, attrs) | 
					
						
							|  |  |  |             return 1 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- finish processing of end tag | 
					
						
							|  |  |  |     def finish_endtag(self, tag): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         if not tag: | 
					
						
							|  |  |  |             found = len(self.stack) - 1 | 
					
						
							|  |  |  |             if found < 0: | 
					
						
							|  |  |  |                 self.unknown_endtag(tag) | 
					
						
							|  |  |  |                 return | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             if tag not in self.stack: | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     method = getattr(self, 'end_' + tag) | 
					
						
							|  |  |  |                 except AttributeError: | 
					
						
							|  |  |  |                     self.unknown_endtag(tag) | 
					
						
							| 
									
										
										
										
											1998-07-07 22:46:11 +00:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     self.report_unbalanced(tag) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |                 return | 
					
						
							|  |  |  |             found = len(self.stack) | 
					
						
							|  |  |  |             for i in range(found): | 
					
						
							|  |  |  |                 if self.stack[i] == tag: found = i | 
					
						
							|  |  |  |         while len(self.stack) > found: | 
					
						
							|  |  |  |             tag = self.stack[-1] | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 method = getattr(self, 'end_' + tag) | 
					
						
							|  |  |  |             except AttributeError: | 
					
						
							|  |  |  |                 method = None | 
					
						
							|  |  |  |             if method: | 
					
						
							|  |  |  |                 self.handle_endtag(tag, method) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 self.unknown_endtag(tag) | 
					
						
							|  |  |  |             del self.stack[-1] | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Overridable -- handle start tag | 
					
						
							|  |  |  |     def handle_starttag(self, tag, method, attrs): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         method(attrs) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Overridable -- handle end tag | 
					
						
							|  |  |  |     def handle_endtag(self, tag, method): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         method() | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Example -- report an unbalanced </...> tag. | 
					
						
							|  |  |  |     def report_unbalanced(self, tag): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         if self.verbose: | 
					
						
							|  |  |  |             print '*** Unbalanced </' + tag + '>' | 
					
						
							|  |  |  |             print '*** Stack:', self.stack | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |     def convert_charref(self, name): | 
					
						
							|  |  |  |         """Convert character reference, may be overridden.""" | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2001-02-09 07:49:30 +00:00
										 |  |  |             n = int(name) | 
					
						
							| 
									
										
										
										
											2001-02-09 10:12:19 +00:00
										 |  |  |         except ValueError: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             return | 
					
						
							|  |  |  |         if not 0 <= n <= 255: | 
					
						
							|  |  |  |             return | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |         return self.convert_codepoint(n) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def convert_codepoint(self, codepoint): | 
					
						
							|  |  |  |         return chr(codepoint) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def handle_charref(self, name): | 
					
						
							|  |  |  |         """Handle character reference, no need to override.""" | 
					
						
							| 
									
										
										
										
											2006-06-23 06:03:45 +00:00
										 |  |  |         replacement = self.convert_charref(name) | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |         if replacement is None: | 
					
						
							|  |  |  |             self.unknown_charref(name) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2006-06-23 06:03:45 +00:00
										 |  |  |             self.handle_data(replacement) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Definition of entities -- derived classes may override | 
					
						
							|  |  |  |     entitydefs = \ | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |     def convert_entityref(self, name): | 
					
						
							|  |  |  |         """Convert entity references.
 | 
					
						
							| 
									
										
										
										
											2001-07-19 20:08:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |         As an alternative to overriding this method; one can tailor the | 
					
						
							|  |  |  |         results by setting up the self.entitydefs mapping appropriately. | 
					
						
							| 
									
										
										
										
											2001-07-19 20:08:04 +00:00
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         table = self.entitydefs | 
					
						
							| 
									
										
										
										
											2002-06-01 14:18:47 +00:00
										 |  |  |         if name in table: | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |             return table[name] | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         else: | 
					
						
							|  |  |  |             return | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |     def handle_entityref(self, name): | 
					
						
							|  |  |  |         """Handle entity references, no need to override.""" | 
					
						
							| 
									
										
										
										
											2006-06-17 01:07:54 +00:00
										 |  |  |         replacement = self.convert_entityref(name) | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  |         if replacement is None: | 
					
						
							|  |  |  |             self.unknown_entityref(name) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2006-06-17 01:07:54 +00:00
										 |  |  |             self.handle_data(self.convert_entityref(name)) | 
					
						
							| 
									
										
										
										
											2006-06-16 23:45:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     # Example -- handle data, should be overridden | 
					
						
							|  |  |  |     def handle_data(self, data): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         pass | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Example -- handle comment, could be overridden | 
					
						
							|  |  |  |     def handle_comment(self, data): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         pass | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-03-16 20:04:57 +00:00
										 |  |  |     # Example -- handle declaration, could be overridden | 
					
						
							|  |  |  |     def handle_decl(self, decl): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-05-28 22:48:53 +00:00
										 |  |  |     # Example -- handle processing instruction, could be overridden | 
					
						
							|  |  |  |     def handle_pi(self, data): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     # To be overridden -- handlers for unknown objects | 
					
						
							|  |  |  |     def unknown_starttag(self, tag, attrs): pass | 
					
						
							|  |  |  |     def unknown_endtag(self, tag): pass | 
					
						
							|  |  |  |     def unknown_charref(self, ref): pass | 
					
						
							|  |  |  |     def unknown_entityref(self, ref): pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class TestSGMLParser(SGMLParser): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, verbose=0): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.testdata = "" | 
					
						
							|  |  |  |         SGMLParser.__init__(self, verbose) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def handle_data(self, data): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.testdata = self.testdata + data | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |         if len(repr(self.testdata)) >= 70: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |             self.flush() | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def flush(self): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         data = self.testdata | 
					
						
							|  |  |  |         if data: | 
					
						
							|  |  |  |             self.testdata = "" | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |             print 'data:', repr(data) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def handle_comment(self, data): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.flush() | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |         r = repr(data) | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         if len(r) > 68: | 
					
						
							|  |  |  |             r = r[:32] + '...' + r[-32:] | 
					
						
							|  |  |  |         print 'comment:', r | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def unknown_starttag(self, tag, attrs): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.flush() | 
					
						
							|  |  |  |         if not attrs: | 
					
						
							|  |  |  |             print 'start tag: <' + tag + '>' | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             print 'start tag: <' + tag, | 
					
						
							|  |  |  |             for name, value in attrs: | 
					
						
							|  |  |  |                 print name + '=' + '"' + value + '"', | 
					
						
							|  |  |  |             print '>' | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def unknown_endtag(self, tag): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.flush() | 
					
						
							|  |  |  |         print 'end tag: </' + tag + '>' | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def unknown_entityref(self, ref): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.flush() | 
					
						
							|  |  |  |         print '*** unknown entity ref: &' + ref + ';' | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def unknown_charref(self, ref): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         self.flush() | 
					
						
							|  |  |  |         print '*** unknown char ref: &#' + ref + ';' | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |     def unknown_decl(self, data): | 
					
						
							|  |  |  |         self.flush() | 
					
						
							|  |  |  |         print '*** unknown decl: [' + data + ']' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     def close(self): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         SGMLParser.close(self) | 
					
						
							|  |  |  |         self.flush() | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test(args = None): | 
					
						
							|  |  |  |     import sys | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-06-02 00:40:05 +00:00
										 |  |  |     if args is None: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         args = sys.argv[1:] | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if args and args[0] == '-s': | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         args = args[1:] | 
					
						
							|  |  |  |         klass = SGMLParser | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         klass = TestSGMLParser | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if args: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         file = args[0] | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         file = 'test.html' | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if file == '-': | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         f = sys.stdin | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             f = open(file, 'r') | 
					
						
							|  |  |  |         except IOError, msg: | 
					
						
							|  |  |  |             print file, ":", msg | 
					
						
							|  |  |  |             sys.exit(1) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     data = f.read() | 
					
						
							|  |  |  |     if f is not sys.stdin: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         f.close() | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     x = klass() | 
					
						
							|  |  |  |     for c in data: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         x.feed(c) | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     x.close() | 
					
						
							| 
									
										
										
										
											1995-02-27 13:16:55 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1995-03-04 22:28:49 +00:00
										 |  |  | if __name__ == '__main__': | 
					
						
							| 
									
										
										
										
											1996-03-28 18:45:04 +00:00
										 |  |  |     test() |