mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Added back the InputSource class (patch 101630).
This commit is contained in:
		
							parent
							
								
									b7536d5860
								
							
						
					
					
						commit
						523b0a6ec8
					
				
					 4 changed files with 138 additions and 31 deletions
				
			
		|  | @ -21,16 +21,17 @@ | |||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from xmlreader import InputSource | ||||
| from handler import ContentHandler, ErrorHandler | ||||
| from _exceptions import SAXException, SAXNotRecognizedException, \ | ||||
|                         SAXParseException, SAXNotSupportedException | ||||
| 
 | ||||
| 
 | ||||
| def parse(filename_or_stream, handler, errorHandler=ErrorHandler()): | ||||
| def parse(source, handler, errorHandler=ErrorHandler()): | ||||
|     parser = ExpatParser() | ||||
|     parser.setContentHandler(handler) | ||||
|     parser.setErrorHandler(errorHandler) | ||||
|     parser.parse(filename_or_stream) | ||||
|     parser.parse(source) | ||||
| 
 | ||||
| def parseString(string, handler, errorHandler=ErrorHandler()): | ||||
|     try: | ||||
|  | @ -43,7 +44,10 @@ def parseString(string, handler, errorHandler=ErrorHandler()): | |||
|     parser = ExpatParser() | ||||
|     parser.setContentHandler(handler) | ||||
|     parser.setErrorHandler(errorHandler) | ||||
|     parser.parse(StringIO(string)) | ||||
| 
 | ||||
|     inpsrc = InputSource() | ||||
|     inpsrc.setByteStream(StringIO(string)) | ||||
|     parser.parse(inpsrc) | ||||
| 
 | ||||
| # this is the parser list used by the make_parser function if no | ||||
| # alternatives are given as parameters to the function | ||||
|  |  | |||
|  | @ -18,7 +18,7 @@ | |||
| 
 | ||||
| from xml.sax._exceptions import * | ||||
| from xml.parsers import expat | ||||
| from xml.sax import xmlreader | ||||
| from xml.sax import xmlreader, saxutils | ||||
| 
 | ||||
| AttributesImpl = xmlreader.AttributesImpl | ||||
| AttributesNSImpl = xmlreader.AttributesNSImpl | ||||
|  | @ -37,28 +37,24 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20): | |||
| 
 | ||||
|     # XMLReader methods | ||||
| 
 | ||||
|     def parse(self, stream_or_string): | ||||
|     def parse(self, source): | ||||
|         "Parse an XML document from a URL." | ||||
|         if type(stream_or_string) is type(""): | ||||
|             stream = open(stream_or_string) | ||||
|         else: | ||||
|             stream = stream_or_string | ||||
|         source = saxutils.prepare_input_source(source) | ||||
| 
 | ||||
|         self._source = source | ||||
|         self.reset() | ||||
|         self._cont_handler.setDocumentLocator(self) | ||||
|         try: | ||||
|             xmlreader.IncrementalParser.parse(self, stream) | ||||
|             xmlreader.IncrementalParser.parse(self, source) | ||||
|         except expat.error: | ||||
|             error_code = self._parser.ErrorCode | ||||
|             raise SAXParseException(expat.ErrorString(error_code), None, self) | ||||
|              | ||||
|             self._cont_handler.endDocument() | ||||
| 
 | ||||
|     def prepareParser(self, filename=None): | ||||
|         self._source = filename | ||||
|          | ||||
|         if self._source != None: | ||||
|             self._parser.SetBase(self._source) | ||||
|     def prepareParser(self, source): | ||||
|         if source.getSystemId() != None: | ||||
|             self._parser.SetBase(source.getSystemId()) | ||||
|          | ||||
|     def getFeature(self, name): | ||||
|         if name == feature_namespaces: | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ | |||
| convenience of application and driver writers. | ||||
| """ | ||||
| 
 | ||||
| import os, urlparse, urllib | ||||
| import handler | ||||
| import xmlreader | ||||
| 
 | ||||
|  | @ -181,3 +182,24 @@ def getProperty(self, name): | |||
| 
 | ||||
|     def setProperty(self, name, value): | ||||
|         self._parent.setProperty(name, value) | ||||
| 
 | ||||
| # --- Utility functions | ||||
| 
 | ||||
| def prepare_input_source(source, base = ""): | ||||
|     """This function takes an InputSource and an optional base URL and | ||||
|     returns a fully resolved InputSource object ready for reading.""" | ||||
|      | ||||
|     if type(source) == type(""): | ||||
|         source = xmlreader.InputSource(source) | ||||
| 
 | ||||
|     if source.getByteStream() == None: | ||||
|         sysid = source.getSystemId() | ||||
|         if urlparse.urlparse(sysid)[0] == '': | ||||
|             basehead = os.path.split(os.path.normpath(base))[0] | ||||
|             source.setSystemId(os.path.join(basehead, sysid)) | ||||
|         else: | ||||
|             source.setSystemId(urlparse.urljoin(base, sysid)) | ||||
|              | ||||
|         source.setByteStream(urllib.urlopen(source.getSystemId())) | ||||
|          | ||||
|     return source | ||||
|  |  | |||
|  | @ -6,6 +6,7 @@ | |||
| # ===== XMLREADER ===== | ||||
| 
 | ||||
| class XMLReader: | ||||
|      | ||||
|     def __init__(self): | ||||
|         self._cont_handler = handler.ContentHandler() | ||||
|         #self._dtd_handler = handler.DTDHandler() | ||||
|  | @ -73,6 +74,7 @@ def setProperty(self, name, value): | |||
|         "Sets the value of a SAX2 property." | ||||
|         raise SAXNotRecognizedException("Property '%s' not recognized" % name) | ||||
| 
 | ||||
| import saxutils | ||||
|      | ||||
| class IncrementalParser(XMLReader): | ||||
|     """This interface adds three extra methods to the XMLReader | ||||
|  | @ -98,23 +100,17 @@ def __init__(self, bufsize=2**16): | |||
|         self._bufsize = bufsize | ||||
|         XMLReader.__init__(self) | ||||
| 
 | ||||
|     def _parseOpenFile(self, source): | ||||
|         buffer = source.read(self._bufsize) | ||||
|     def parse(self, source): | ||||
|         source = saxutils.prepare_input_source(source) | ||||
|              | ||||
|         self.prepareParser(source) | ||||
|         file = source.getByteStream() | ||||
|         buffer = file.read(self._bufsize) | ||||
|         while buffer != "": | ||||
|             self.feed(buffer) | ||||
|             buffer = source.read(self._bufsize) | ||||
|         self.close() | ||||
|         self.reset() | ||||
|             buffer = file.read(self._bufsize) | ||||
|              | ||||
|     def parse(self, source): | ||||
|         if hasattr(source, "read"): | ||||
|             self._parseOpenFile(source) | ||||
|         else: | ||||
|             #FIXME: how to recognize if it is a URL instead of filename? | ||||
|             self.prepareParser(source) | ||||
|             file = open(source) | ||||
|             self._parseOpenFile(file) | ||||
|             file.close() | ||||
|         self.reset() | ||||
| 
 | ||||
|     def feed(self, data):         | ||||
|         """This method gives the raw XML data in the data parameter to | ||||
|  | @ -174,6 +170,95 @@ def getSystemId(self): | |||
|         "Return the system identifier for the current event." | ||||
|         return None | ||||
| 
 | ||||
| # ===== INPUTSOURCE ===== | ||||
| 
 | ||||
| class InputSource: | ||||
|     """Encapsulation of the information needed by the XMLReader to | ||||
|     read entities. | ||||
| 
 | ||||
|     This class may include information about the public identifier, | ||||
|     system identifier, byte stream (possibly with character encoding | ||||
|     information) and/or the character stream of an entity. | ||||
| 
 | ||||
|     Applications will create objects of this class for use in the | ||||
|     XMLReader.parse method and for returning from | ||||
|     EntityResolver.resolveEntity. | ||||
| 
 | ||||
|     An InputSource belongs to the application, the XMLReader is not | ||||
|     allowed to modify InputSource objects passed to it from the | ||||
|     application, although it may make copies and modify those.""" | ||||
| 
 | ||||
|     def __init__(self, system_id = None): | ||||
|         self.__system_id = system_id | ||||
|         self.__public_id = None | ||||
|         self.__encoding  = None | ||||
|         self.__bytefile  = None | ||||
|         self.__charfile  = None | ||||
| 
 | ||||
|     def setPublicId(self, public_id): | ||||
|         "Sets the public identifier of this InputSource." | ||||
|         self.__public_id = public_id | ||||
| 
 | ||||
|     def getPublicId(self): | ||||
|         "Returns the public identifier of this InputSource." | ||||
|         return self.__public_id | ||||
| 
 | ||||
|     def setSystemId(self, system_id): | ||||
|         "Sets the system identifier of this InputSource." | ||||
|         self.__system_id = system_id | ||||
| 
 | ||||
|     def getSystemId(self): | ||||
|         "Returns the system identifier of this InputSource." | ||||
|         return self.__system_id | ||||
| 
 | ||||
|     def setEncoding(self, encoding): | ||||
|         """Sets the character encoding of this InputSource. | ||||
| 
 | ||||
|         The encoding must be a string acceptable for an XML encoding | ||||
|         declaration (see section 4.3.3 of the XML recommendation). | ||||
| 
 | ||||
|         The encoding attribute of the InputSource is ignored if the | ||||
|         InputSource also contains a character stream.""" | ||||
|         self.__encoding = encoding | ||||
| 
 | ||||
|     def getEncoding(self): | ||||
|         "Get the character encoding of this InputSource." | ||||
|         return self.__encoding | ||||
| 
 | ||||
|     def setByteStream(self, bytefile): | ||||
|         """Set the byte stream (a Python file-like object which does | ||||
|         not perform byte-to-character conversion) for this input | ||||
|         source. | ||||
|          | ||||
|         The SAX parser will ignore this if there is also a character | ||||
|         stream specified, but it will use a byte stream in preference | ||||
|         to opening a URI connection itself. | ||||
| 
 | ||||
|         If the application knows the character encoding of the byte | ||||
|         stream, it should set it with the setEncoding method.""" | ||||
|         self.__bytefile = bytefile | ||||
| 
 | ||||
|     def getByteStream(self): | ||||
|         """Get the byte stream for this input source. | ||||
|          | ||||
|         The getEncoding method will return the character encoding for | ||||
|         this byte stream, or None if unknown."""         | ||||
|         return self.__bytefile | ||||
|          | ||||
|     def setCharacterStream(self, charfile): | ||||
|         """Set the character stream for this input source. (The stream | ||||
|         must be a Python 1.6 Unicode-wrapped file-like that performs | ||||
|         conversion to Unicode strings.) | ||||
|          | ||||
|         If there is a character stream specified, the SAX parser will | ||||
|         ignore any byte stream and will not attempt to open a URI | ||||
|         connection to the system identifier.""" | ||||
|         self.__charfile = charfile | ||||
| 
 | ||||
|     def getCharacterStream(self): | ||||
|         "Get the character stream for this input source." | ||||
|         return self.__charfile | ||||
|      | ||||
| # ===== ATTRIBUTESIMPL ===== | ||||
| 
 | ||||
| class AttributesImpl: | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Lars Gustäbel
						Lars Gustäbel