mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 03:04:41 +00:00 
			
		
		
		
	 07cbc4e5bd
			
		
	
	
		07cbc4e5bd
		
	
	
	
	
		
			
			Add support for parsing already-opened files. Make sure the parse() method closes exactly those files that it opens. Modified by FLD for better conformance to the Python style guide. This closes SourceForge patch #101512.
		
			
				
	
	
		
			235 lines
		
	
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			235 lines
		
	
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
 | |
| should be based on this code. """
 | |
| 
 | |
| import handler
 | |
| 
 | |
| # ===== XMLREADER =====
 | |
| 
 | |
| class XMLReader:
 | |
|     def __init__(self):
 | |
|         self._cont_handler = handler.ContentHandler()
 | |
|         #self._dtd_handler = handler.DTDHandler()
 | |
|         #self._ent_handler = handler.EntityResolver()
 | |
|         self._err_handler = handler.ErrorHandler()
 | |
| 
 | |
|     def parse(self, source):
 | |
|         "Parse an XML document from a system identifier or an InputSource."
 | |
|         raise NotImplementedError("This method must be implemented!")
 | |
| 
 | |
|     def getContentHandler(self):
 | |
|         "Returns the current ContentHandler."
 | |
|         return self._cont_handler
 | |
| 
 | |
|     def setContentHandler(self, handler):
 | |
|         "Registers a new object to receive document content events."
 | |
|         self._cont_handler = handler
 | |
|         
 | |
|     def getDTDHandler(self):
 | |
|         "Returns the current DTD handler."
 | |
|         return self._dtd_handler
 | |
|         
 | |
|     def setDTDHandler(self, handler):
 | |
|         "Register an object to receive basic DTD-related events."
 | |
|         self._dtd_handler = handler
 | |
| 
 | |
|     def getEntityResolver(self):
 | |
|         "Returns the current EntityResolver."
 | |
|         return self._ent_handler
 | |
|         
 | |
|     def setEntityResolver(self, resolver):
 | |
|         "Register an object to resolve external entities."
 | |
|         self._ent_handler = resolver
 | |
| 
 | |
|     def getErrorHandler(self):
 | |
|         "Returns the current ErrorHandler."
 | |
|         return self._err_handler
 | |
|         
 | |
|     def setErrorHandler(self, handler):
 | |
|         "Register an object to receive error-message events."
 | |
|         self._err_handler = handler
 | |
| 
 | |
|     def setLocale(self, locale):
 | |
|         """Allow an application to set the locale for errors and warnings. 
 | |
|    
 | |
|         SAX parsers are not required to provide localization for errors
 | |
|         and warnings; if they cannot support the requested locale,
 | |
|         however, they must throw a SAX exception. Applications may
 | |
|         request a locale change in the middle of a parse."""
 | |
|         raise SAXNotSupportedException("Locale support not implemented")
 | |
|     
 | |
|     def getFeature(self, name):
 | |
|         "Looks up and returns the state of a SAX2 feature."
 | |
|         raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
 | |
| 
 | |
|     def setFeature(self, name, state):
 | |
|         "Sets the state of a SAX2 feature."
 | |
|         raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
 | |
| 
 | |
|     def getProperty(self, name):
 | |
|         "Looks up and returns the value of a SAX2 property."
 | |
|         raise SAXNotRecognizedException("Property '%s' not recognized" % name)
 | |
| 
 | |
|     def setProperty(self, name, value):
 | |
|         "Sets the value of a SAX2 property."
 | |
|         raise SAXNotRecognizedException("Property '%s' not recognized" % name)
 | |
| 
 | |
|  
 | |
| class IncrementalParser(XMLReader):
 | |
|     """This interface adds three extra methods to the XMLReader
 | |
|     interface that allow XML parsers to support incremental
 | |
|     parsing. Support for this interface is optional, since not all
 | |
|     underlying XML parsers support this functionality.
 | |
| 
 | |
|     When the parser is instantiated it is ready to begin accepting
 | |
|     data from the feed method immediately. After parsing has been
 | |
|     finished with a call to close the reset method must be called to
 | |
|     make the parser ready to accept new data, either from feed or
 | |
|     using the parse method.
 | |
| 
 | |
|     Note that these methods must _not_ be called during parsing, that
 | |
|     is, after parse has been called and before it returns.
 | |
| 
 | |
|     By default, the class also implements the parse method of the XMLReader
 | |
|     interface using the feed, close and reset methods of the
 | |
|     IncrementalParser interface as a convenience to SAX 2.0 driver
 | |
|     writers."""
 | |
| 
 | |
|     def __init__(self, bufsize=2**16):
 | |
|         self._bufsize = bufsize
 | |
|         XMLReader.__init__(self)
 | |
| 
 | |
|     def _parseOpenFile(self, source):
 | |
|         buffer = source.read(self._bufsize)
 | |
|         while buffer != "":
 | |
|             self.feed(buffer)
 | |
|             buffer = source.read(self._bufsize)
 | |
|         self.close()
 | |
|         self.reset()
 | |
| 
 | |
|     def parse(self, source):
 | |
|         if hasattr(source, "read"):
 | |
|             self._parseOpenFile(source)
 | |
|         else:
 | |
|             #FIXME: how to recognize if it is a URL instead of filename?
 | |
|             self.prepareParser(source)
 | |
|             file = open(source)
 | |
|             self._parseOpenFile(file)
 | |
|             file.close()
 | |
| 
 | |
|     def feed(self, data):        
 | |
|         """This method gives the raw XML data in the data parameter to
 | |
|         the parser and makes it parse the data, emitting the
 | |
|         corresponding events. It is allowed for XML constructs to be
 | |
|         split across several calls to feed.
 | |
| 
 | |
|         feed may raise SAXException."""
 | |
|         raise NotImplementedError("This method must be implemented!")
 | |
| 
 | |
|     def prepareParser(self, source):
 | |
|         """This method is called by the parse implementation to allow
 | |
|         the SAX 2.0 driver to prepare itself for parsing."""
 | |
|         raise NotImplementedError("prepareParser must be overridden!")
 | |
| 
 | |
|     def close(self):
 | |
|         """This method is called when the entire XML document has been
 | |
|         passed to the parser through the feed method, to notify the
 | |
|         parser that there are no more data. This allows the parser to
 | |
|         do the final checks on the document and empty the internal
 | |
|         data buffer.
 | |
| 
 | |
|         The parser will not be ready to parse another document until
 | |
|         the reset method has been called.
 | |
| 
 | |
|         close may raise SAXException."""
 | |
|         raise NotImplementedError("This method must be implemented!")
 | |
| 
 | |
|     def reset(self):
 | |
|         """This method is called after close has been called to reset
 | |
|         the parser so that it is ready to parse new documents. The
 | |
|         results of calling parse or feed after close without calling
 | |
|         reset are undefined."""
 | |
|         raise NotImplementedError("This method must be implemented!")
 | |
| 
 | |
| # ===== LOCATOR =====
 | |
| class Locator:
 | |
|     """Interface for associating a SAX event with a document
 | |
|     location. A locator object will return valid results only during
 | |
|     calls to DocumentHandler methods; at any other time, the
 | |
|     results are unpredictable."""
 | |
| 
 | |
|     def getColumnNumber(self):
 | |
|         "Return the column number where the current event ends."
 | |
|         return -1
 | |
| 
 | |
|     def getLineNumber(self):
 | |
|         "Return the line number where the current event ends."
 | |
|         return -1
 | |
| 
 | |
|     def getPublicId(self):
 | |
|         "Return the public identifier for the current event."
 | |
|         return None
 | |
| 
 | |
|     def getSystemId(self):
 | |
|         "Return the system identifier for the current event."
 | |
|         return None
 | |
| 
 | |
| # --- AttributesImpl
 | |
| class AttributesImpl:
 | |
|     def __init__(self, attrs, rawnames):
 | |
|         self._attrs = attrs
 | |
|         self._rawnames = rawnames
 | |
| 
 | |
|     def getLength(self):
 | |
|         return len(self._attrs)
 | |
| 
 | |
|     def getType(self, name):
 | |
|         return "CDATA"
 | |
| 
 | |
|     def getValue(self, name):
 | |
|         return self._attrs[name]
 | |
| 
 | |
|     def getValueByQName(self, name):
 | |
|         return self._attrs[self._rawnames[name]]
 | |
| 
 | |
|     def getNameByQName(self, name):
 | |
|         return self._rawnames[name]
 | |
| 
 | |
|     def getNames(self):
 | |
|         return self._attrs.keys()
 | |
| 
 | |
|     def getQNames(self):
 | |
|         return self._rawnames.keys()    
 | |
| 
 | |
|     def __len__(self):
 | |
|         return len(self._attrs)
 | |
| 
 | |
|     def __getitem__(self, name):
 | |
|         return self._attrs[name]
 | |
| 
 | |
|     def keys(self):
 | |
|         return self._attrs.keys()
 | |
| 
 | |
|     def has_key(self, name):
 | |
|         return self._attrs.has_key(name)
 | |
| 
 | |
|     def get(self, name, alternative=None):
 | |
|         return self._attrs.get(name, alternative)
 | |
| 
 | |
|     def copy(self):
 | |
|         return self.__class__(self._attrs, self._rawnames)
 | |
| 
 | |
|     def items(self):
 | |
|         return self._attrs.items()
 | |
| 
 | |
|     def values(self):
 | |
|         return self._attrs.values()
 | |
| 
 | |
| 
 | |
| def _test():
 | |
|     XMLReader()
 | |
|     IncrementalParser()
 | |
|     Locator()
 | |
|     AttributesImpl()
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     _test()
 |