mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	svn+ssh://pythondev@svn.python.org/python/trunk ........ r62021 | benjamin.peterson | 2008-03-28 18:11:01 -0500 (Fri, 28 Mar 2008) | 2 lines NIL => NULL ........ r62029 | amaury.forgeotdarc | 2008-03-28 20:42:31 -0500 (Fri, 28 Mar 2008) | 3 lines Correctly call the base class tearDown(); otherwise running test_logging twice produce the errors we see on all buildbots ........ r62035 | raymond.hettinger | 2008-03-29 05:42:07 -0500 (Sat, 29 Mar 2008) | 1 line Be explicit about what efficient means. ........ r62036 | georg.brandl | 2008-03-29 06:46:18 -0500 (Sat, 29 Mar 2008) | 2 lines Fix capitalization. ........ r62037 | amaury.forgeotdarc | 2008-03-29 07:42:54 -0500 (Sat, 29 Mar 2008) | 5 lines lib2to3 should install a logging handler only when run as a main program, not when used as a library. This may please the buildbots, which fail when test_lib2to3 is run before test_logging. ........ r62043 | benjamin.peterson | 2008-03-29 10:24:25 -0500 (Sat, 29 Mar 2008) | 3 lines #2503 make singletons compared with "is" not == or != Thanks to Wummel for the patch ........ r62044 | gerhard.haering | 2008-03-29 14:11:52 -0500 (Sat, 29 Mar 2008) | 2 lines Documented the lastrowid attribute. ........ r62052 | benjamin.peterson | 2008-03-30 14:35:10 -0500 (Sun, 30 Mar 2008) | 2 lines Updated README regarding doc formats ........ r62053 | georg.brandl | 2008-03-30 14:41:39 -0500 (Sun, 30 Mar 2008) | 2 lines The other download formats will be available for 2.6 too. ........
		
			
				
	
	
		
			414 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			414 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
SAX driver for the pyexpat C module.  This driver works with
 | 
						|
pyexpat.__version__ == '2.22'.
 | 
						|
"""
 | 
						|
 | 
						|
version = "0.20"
 | 
						|
 | 
						|
from xml.sax._exceptions import *
 | 
						|
from xml.sax.handler import feature_validation, feature_namespaces
 | 
						|
from xml.sax.handler import feature_namespace_prefixes
 | 
						|
from xml.sax.handler import feature_external_ges, feature_external_pes
 | 
						|
from xml.sax.handler import feature_string_interning
 | 
						|
from xml.sax.handler import property_xml_string, property_interning_dict
 | 
						|
 | 
						|
# xml.parsers.expat does not raise ImportError in Jython
 | 
						|
import sys
 | 
						|
if sys.platform[:4] == "java":
 | 
						|
    raise SAXReaderNotAvailable("expat not available in Java", None)
 | 
						|
del sys
 | 
						|
 | 
						|
try:
 | 
						|
    from xml.parsers import expat
 | 
						|
except ImportError:
 | 
						|
    raise SAXReaderNotAvailable("expat not supported", None)
 | 
						|
else:
 | 
						|
    if not hasattr(expat, "ParserCreate"):
 | 
						|
        raise SAXReaderNotAvailable("expat not supported", None)
 | 
						|
from xml.sax import xmlreader, saxutils, handler
 | 
						|
 | 
						|
AttributesImpl = xmlreader.AttributesImpl
 | 
						|
AttributesNSImpl = xmlreader.AttributesNSImpl
 | 
						|
 | 
						|
# If we're using a sufficiently recent version of Python, we can use
 | 
						|
# weak references to avoid cycles between the parser and content
 | 
						|
# handler, otherwise we'll just have to pretend.
 | 
						|
try:
 | 
						|
    import _weakref
 | 
						|
except ImportError:
 | 
						|
    def _mkproxy(o):
 | 
						|
        return o
 | 
						|
else:
 | 
						|
    import weakref
 | 
						|
    _mkproxy = weakref.proxy
 | 
						|
    del weakref, _weakref
 | 
						|
 | 
						|
# --- ExpatLocator
 | 
						|
 | 
						|
class ExpatLocator(xmlreader.Locator):
 | 
						|
    """Locator for use with the ExpatParser class.
 | 
						|
 | 
						|
    This uses a weak reference to the parser object to avoid creating
 | 
						|
    a circular reference between the parser and the content handler.
 | 
						|
    """
 | 
						|
    def __init__(self, parser):
 | 
						|
        self._ref = _mkproxy(parser)
 | 
						|
 | 
						|
    def getColumnNumber(self):
 | 
						|
        parser = self._ref
 | 
						|
        if parser._parser is None:
 | 
						|
            return None
 | 
						|
        return parser._parser.ErrorColumnNumber
 | 
						|
 | 
						|
    def getLineNumber(self):
 | 
						|
        parser = self._ref
 | 
						|
        if parser._parser is None:
 | 
						|
            return 1
 | 
						|
        return parser._parser.ErrorLineNumber
 | 
						|
 | 
						|
    def getPublicId(self):
 | 
						|
        parser = self._ref
 | 
						|
        if parser is None:
 | 
						|
            return None
 | 
						|
        return parser._source.getPublicId()
 | 
						|
 | 
						|
    def getSystemId(self):
 | 
						|
        parser = self._ref
 | 
						|
        if parser is None:
 | 
						|
            return None
 | 
						|
        return parser._source.getSystemId()
 | 
						|
 | 
						|
 | 
						|
# --- ExpatParser
 | 
						|
 | 
						|
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
 | 
						|
    """SAX driver for the pyexpat C module."""
 | 
						|
 | 
						|
    def __init__(self, namespaceHandling=0, bufsize=2**16-20):
 | 
						|
        xmlreader.IncrementalParser.__init__(self, bufsize)
 | 
						|
        self._source = xmlreader.InputSource()
 | 
						|
        self._parser = None
 | 
						|
        self._namespaces = namespaceHandling
 | 
						|
        self._lex_handler_prop = None
 | 
						|
        self._parsing = 0
 | 
						|
        self._entity_stack = []
 | 
						|
        self._external_ges = 1
 | 
						|
        self._interning = None
 | 
						|
 | 
						|
    # XMLReader methods
 | 
						|
 | 
						|
    def parse(self, source):
 | 
						|
        "Parse an XML document from a URL or an InputSource."
 | 
						|
        source = saxutils.prepare_input_source(source)
 | 
						|
 | 
						|
        self._source = source
 | 
						|
        self.reset()
 | 
						|
        self._cont_handler.setDocumentLocator(ExpatLocator(self))
 | 
						|
        xmlreader.IncrementalParser.parse(self, source)
 | 
						|
 | 
						|
    def prepareParser(self, source):
 | 
						|
        if source.getSystemId() is not None:
 | 
						|
            self._parser.SetBase(source.getSystemId())
 | 
						|
 | 
						|
    # Redefined setContentHandler to allow changing handlers during parsing
 | 
						|
 | 
						|
    def setContentHandler(self, handler):
 | 
						|
        xmlreader.IncrementalParser.setContentHandler(self, handler)
 | 
						|
        if self._parsing:
 | 
						|
            self._reset_cont_handler()
 | 
						|
 | 
						|
    def getFeature(self, name):
 | 
						|
        if name == feature_namespaces:
 | 
						|
            return self._namespaces
 | 
						|
        elif name == feature_string_interning:
 | 
						|
            return self._interning is not None
 | 
						|
        elif name in (feature_validation, feature_external_pes,
 | 
						|
                      feature_namespace_prefixes):
 | 
						|
            return 0
 | 
						|
        elif name == feature_external_ges:
 | 
						|
            return self._external_ges
 | 
						|
        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
 | 
						|
 | 
						|
    def setFeature(self, name, state):
 | 
						|
        if self._parsing:
 | 
						|
            raise SAXNotSupportedException("Cannot set features while parsing")
 | 
						|
 | 
						|
        if name == feature_namespaces:
 | 
						|
            self._namespaces = state
 | 
						|
        elif name == feature_external_ges:
 | 
						|
            self._external_ges = state
 | 
						|
        elif name == feature_string_interning:
 | 
						|
            if state:
 | 
						|
                if self._interning is None:
 | 
						|
                    self._interning = {}
 | 
						|
            else:
 | 
						|
                self._interning = None
 | 
						|
        elif name == feature_validation:
 | 
						|
            if state:
 | 
						|
                raise SAXNotSupportedException(
 | 
						|
                    "expat does not support validation")
 | 
						|
        elif name == feature_external_pes:
 | 
						|
            if state:
 | 
						|
                raise SAXNotSupportedException(
 | 
						|
                    "expat does not read external parameter entities")
 | 
						|
        elif name == feature_namespace_prefixes:
 | 
						|
            if state:
 | 
						|
                raise SAXNotSupportedException(
 | 
						|
                    "expat does not report namespace prefixes")
 | 
						|
        else:
 | 
						|
            raise SAXNotRecognizedException(
 | 
						|
                "Feature '%s' not recognized" % name)
 | 
						|
 | 
						|
    def getProperty(self, name):
 | 
						|
        if name == handler.property_lexical_handler:
 | 
						|
            return self._lex_handler_prop
 | 
						|
        elif name == property_interning_dict:
 | 
						|
            return self._interning
 | 
						|
        elif name == property_xml_string:
 | 
						|
            if self._parser:
 | 
						|
                if hasattr(self._parser, "GetInputContext"):
 | 
						|
                    return self._parser.GetInputContext()
 | 
						|
                else:
 | 
						|
                    raise SAXNotRecognizedException(
 | 
						|
                        "This version of expat does not support getting"
 | 
						|
                        " the XML string")
 | 
						|
            else:
 | 
						|
                raise SAXNotSupportedException(
 | 
						|
                    "XML string cannot be returned when not parsing")
 | 
						|
        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
 | 
						|
 | 
						|
    def setProperty(self, name, value):
 | 
						|
        if name == handler.property_lexical_handler:
 | 
						|
            self._lex_handler_prop = value
 | 
						|
            if self._parsing:
 | 
						|
                self._reset_lex_handler_prop()
 | 
						|
        elif name == property_interning_dict:
 | 
						|
            self._interning = value
 | 
						|
        elif name == property_xml_string:
 | 
						|
            raise SAXNotSupportedException("Property '%s' cannot be set" %
 | 
						|
                                           name)
 | 
						|
        else:
 | 
						|
            raise SAXNotRecognizedException("Property '%s' not recognized" %
 | 
						|
                                            name)
 | 
						|
 | 
						|
    # IncrementalParser methods
 | 
						|
 | 
						|
    def feed(self, data, isFinal = 0):
 | 
						|
        if not self._parsing:
 | 
						|
            self.reset()
 | 
						|
            self._parsing = 1
 | 
						|
            self._cont_handler.startDocument()
 | 
						|
 | 
						|
        try:
 | 
						|
            # The isFinal parameter is internal to the expat reader.
 | 
						|
            # If it is set to true, expat will check validity of the entire
 | 
						|
            # document. When feeding chunks, they are not normally final -
 | 
						|
            # except when invoked from close.
 | 
						|
            self._parser.Parse(data, isFinal)
 | 
						|
        except expat.error as e:
 | 
						|
            exc = SAXParseException(expat.ErrorString(e.code), e, self)
 | 
						|
            # FIXME: when to invoke error()?
 | 
						|
            self._err_handler.fatalError(exc)
 | 
						|
 | 
						|
    def close(self):
 | 
						|
        if self._entity_stack:
 | 
						|
            # If we are completing an external entity, do nothing here
 | 
						|
            return
 | 
						|
        self.feed("", isFinal = 1)
 | 
						|
        self._cont_handler.endDocument()
 | 
						|
        self._parsing = 0
 | 
						|
        # break cycle created by expat handlers pointing to our methods
 | 
						|
        self._parser = None
 | 
						|
 | 
						|
    def _reset_cont_handler(self):
 | 
						|
        self._parser.ProcessingInstructionHandler = \
 | 
						|
                                    self._cont_handler.processingInstruction
 | 
						|
        self._parser.CharacterDataHandler = self._cont_handler.characters
 | 
						|
 | 
						|
    def _reset_lex_handler_prop(self):
 | 
						|
        lex = self._lex_handler_prop
 | 
						|
        parser = self._parser
 | 
						|
        if lex is None:
 | 
						|
            parser.CommentHandler = None
 | 
						|
            parser.StartCdataSectionHandler = None
 | 
						|
            parser.EndCdataSectionHandler = None
 | 
						|
            parser.StartDoctypeDeclHandler = None
 | 
						|
            parser.EndDoctypeDeclHandler = None
 | 
						|
        else:
 | 
						|
            parser.CommentHandler = lex.comment
 | 
						|
            parser.StartCdataSectionHandler = lex.startCDATA
 | 
						|
            parser.EndCdataSectionHandler = lex.endCDATA
 | 
						|
            parser.StartDoctypeDeclHandler = self.start_doctype_decl
 | 
						|
            parser.EndDoctypeDeclHandler = lex.endDTD
 | 
						|
 | 
						|
    def reset(self):
 | 
						|
        if self._namespaces:
 | 
						|
            self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
 | 
						|
                                              intern=self._interning)
 | 
						|
            self._parser.namespace_prefixes = 1
 | 
						|
            self._parser.StartElementHandler = self.start_element_ns
 | 
						|
            self._parser.EndElementHandler = self.end_element_ns
 | 
						|
        else:
 | 
						|
            self._parser = expat.ParserCreate(self._source.getEncoding(),
 | 
						|
                                              intern = self._interning)
 | 
						|
            self._parser.StartElementHandler = self.start_element
 | 
						|
            self._parser.EndElementHandler = self.end_element
 | 
						|
 | 
						|
        self._reset_cont_handler()
 | 
						|
        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
 | 
						|
        self._parser.NotationDeclHandler = self.notation_decl
 | 
						|
        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
 | 
						|
        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
 | 
						|
 | 
						|
        self._decl_handler_prop = None
 | 
						|
        if self._lex_handler_prop:
 | 
						|
            self._reset_lex_handler_prop()
 | 
						|
#         self._parser.DefaultHandler =
 | 
						|
#         self._parser.DefaultHandlerExpand =
 | 
						|
#         self._parser.NotStandaloneHandler =
 | 
						|
        self._parser.ExternalEntityRefHandler = self.external_entity_ref
 | 
						|
        try:
 | 
						|
            self._parser.SkippedEntityHandler = self.skipped_entity_handler
 | 
						|
        except AttributeError:
 | 
						|
            # This pyexpat does not support SkippedEntity
 | 
						|
            pass
 | 
						|
        self._parser.SetParamEntityParsing(
 | 
						|
            expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
 | 
						|
 | 
						|
        self._parsing = 0
 | 
						|
        self._entity_stack = []
 | 
						|
 | 
						|
    # Locator methods
 | 
						|
 | 
						|
    def getColumnNumber(self):
 | 
						|
        if self._parser is None:
 | 
						|
            return None
 | 
						|
        return self._parser.ErrorColumnNumber
 | 
						|
 | 
						|
    def getLineNumber(self):
 | 
						|
        if self._parser is None:
 | 
						|
            return 1
 | 
						|
        return self._parser.ErrorLineNumber
 | 
						|
 | 
						|
    def getPublicId(self):
 | 
						|
        return self._source.getPublicId()
 | 
						|
 | 
						|
    def getSystemId(self):
 | 
						|
        return self._source.getSystemId()
 | 
						|
 | 
						|
    # event handlers
 | 
						|
    def start_element(self, name, attrs):
 | 
						|
        self._cont_handler.startElement(name, AttributesImpl(attrs))
 | 
						|
 | 
						|
    def end_element(self, name):
 | 
						|
        self._cont_handler.endElement(name)
 | 
						|
 | 
						|
    def start_element_ns(self, name, attrs):
 | 
						|
        pair = name.split()
 | 
						|
        if len(pair) == 1:
 | 
						|
            # no namespace
 | 
						|
            pair = (None, name)
 | 
						|
        elif len(pair) == 3:
 | 
						|
            pair = pair[0], pair[1]
 | 
						|
        else:
 | 
						|
            # default namespace
 | 
						|
            pair = tuple(pair)
 | 
						|
 | 
						|
        newattrs = {}
 | 
						|
        qnames = {}
 | 
						|
        for (aname, value) in attrs.items():
 | 
						|
            parts = aname.split()
 | 
						|
            length = len(parts)
 | 
						|
            if length == 1:
 | 
						|
                # no namespace
 | 
						|
                qname = aname
 | 
						|
                apair = (None, aname)
 | 
						|
            elif length == 3:
 | 
						|
                qname = "%s:%s" % (parts[2], parts[1])
 | 
						|
                apair = parts[0], parts[1]
 | 
						|
            else:
 | 
						|
                # default namespace
 | 
						|
                qname = parts[1]
 | 
						|
                apair = tuple(parts)
 | 
						|
 | 
						|
            newattrs[apair] = value
 | 
						|
            qnames[apair] = qname
 | 
						|
 | 
						|
        self._cont_handler.startElementNS(pair, None,
 | 
						|
                                          AttributesNSImpl(newattrs, qnames))
 | 
						|
 | 
						|
    def end_element_ns(self, name):
 | 
						|
        pair = name.split()
 | 
						|
        if len(pair) == 1:
 | 
						|
            pair = (None, name)
 | 
						|
        elif len(pair) == 3:
 | 
						|
            pair = pair[0], pair[1]
 | 
						|
        else:
 | 
						|
            pair = tuple(pair)
 | 
						|
 | 
						|
        self._cont_handler.endElementNS(pair, None)
 | 
						|
 | 
						|
    # this is not used (call directly to ContentHandler)
 | 
						|
    def processing_instruction(self, target, data):
 | 
						|
        self._cont_handler.processingInstruction(target, data)
 | 
						|
 | 
						|
    # this is not used (call directly to ContentHandler)
 | 
						|
    def character_data(self, data):
 | 
						|
        self._cont_handler.characters(data)
 | 
						|
 | 
						|
    def start_namespace_decl(self, prefix, uri):
 | 
						|
        self._cont_handler.startPrefixMapping(prefix, uri)
 | 
						|
 | 
						|
    def end_namespace_decl(self, prefix):
 | 
						|
        self._cont_handler.endPrefixMapping(prefix)
 | 
						|
 | 
						|
    def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
 | 
						|
        self._lex_handler_prop.startDTD(name, pubid, sysid)
 | 
						|
 | 
						|
    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
 | 
						|
        self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
 | 
						|
 | 
						|
    def notation_decl(self, name, base, sysid, pubid):
 | 
						|
        self._dtd_handler.notationDecl(name, pubid, sysid)
 | 
						|
 | 
						|
    def external_entity_ref(self, context, base, sysid, pubid):
 | 
						|
        if not self._external_ges:
 | 
						|
            return 1
 | 
						|
 | 
						|
        source = self._ent_handler.resolveEntity(pubid, sysid)
 | 
						|
        source = saxutils.prepare_input_source(source,
 | 
						|
                                               self._source.getSystemId() or
 | 
						|
                                               "")
 | 
						|
 | 
						|
        self._entity_stack.append((self._parser, self._source))
 | 
						|
        self._parser = self._parser.ExternalEntityParserCreate(context)
 | 
						|
        self._source = source
 | 
						|
 | 
						|
        try:
 | 
						|
            xmlreader.IncrementalParser.parse(self, source)
 | 
						|
        except:
 | 
						|
            return 0  # FIXME: save error info here?
 | 
						|
 | 
						|
        (self._parser, self._source) = self._entity_stack[-1]
 | 
						|
        del self._entity_stack[-1]
 | 
						|
        return 1
 | 
						|
 | 
						|
    def skipped_entity_handler(self, name, is_pe):
 | 
						|
        if is_pe:
 | 
						|
            # The SAX spec requires to report skipped PEs with a '%'
 | 
						|
            name = '%'+name
 | 
						|
        self._cont_handler.skippedEntity(name)
 | 
						|
 | 
						|
# ---
 | 
						|
 | 
						|
def create_parser(*args, **kwargs):
 | 
						|
    return ExpatParser(*args, **kwargs)
 | 
						|
 | 
						|
# ---
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    import xml.sax
 | 
						|
    p = create_parser()
 | 
						|
    p.setContentHandler(xml.sax.XMLGenerator())
 | 
						|
    p.setErrorHandler(xml.sax.ErrorHandler())
 | 
						|
    p.parse("../../../hamlet.xml")
 |