mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 2c8a89cc3f
			
		
	
	
		2c8a89cc3f
		
	
	
	
	
		
			
			correct order of constructor args in createAttributeNS
pulldom: use symbolic names for uri and localnames
         correct usage of createAttribute and setAttributeNode signatures.
		
	
			
		
			
				
	
	
		
			463 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			463 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """\
 | |
| minidom.py -- a lightweight DOM implementation based on SAX.
 | |
| 
 | |
| parse( "foo.xml" )
 | |
| 
 | |
| parseString( "<foo><bar/></foo>" )
 | |
| 
 | |
| Todo:
 | |
| =====
 | |
|  * convenience methods for getting elements and text.
 | |
|  * more testing
 | |
|  * bring some of the writer and linearizer code into conformance with this
 | |
|         interface
 | |
|  * SAX 2 namespaces
 | |
| """
 | |
| 
 | |
| import pulldom
 | |
| import string
 | |
| from StringIO import StringIO
 | |
| import types
 | |
| 
 | |
| class Node:
 | |
|     ELEMENT_NODE                = 1
 | |
|     ATTRIBUTE_NODE              = 2
 | |
|     TEXT_NODE                   = 3
 | |
|     CDATA_SECTION_NODE          = 4
 | |
|     ENTITY_REFERENCE_NODE       = 5
 | |
|     ENTITY_NODE                 = 6
 | |
|     PROCESSING_INSTRUCTION_NODE = 7
 | |
|     COMMENT_NODE                = 8
 | |
|     DOCUMENT_NODE               = 9
 | |
|     DOCUMENT_TYPE_NODE          = 10
 | |
|     DOCUMENT_FRAGMENT_NODE      = 11
 | |
|     NOTATION_NODE               = 12
 | |
| 
 | |
|     allnodes = {}
 | |
|     _debug = 0
 | |
|     _makeParentNodes = 1
 | |
|     debug = None
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.childNodes = []
 | |
|         if Node._debug: 
 | |
|             index = repr(id(self)) + repr(self.__class__)
 | |
|             Node.allnodes[index] = repr(self.__dict__)
 | |
|             if Node.debug is None:
 | |
|                 Node.debug = StringIO()
 | |
|                 #open( "debug4.out", "w" )
 | |
|             Node.debug.write("create %s\n" % index)
 | |
| 
 | |
|     def __getattr__(self, key):
 | |
|         if key[0:2] == "__":
 | |
|             raise AttributeError
 | |
|         # getattr should never call getattr!
 | |
|         if self.__dict__.has_key("inGetAttr"): 
 | |
|             del self.inGetAttr
 | |
|             raise AttributeError, key
 | |
| 
 | |
|         prefix, attrname = key[:5], key[5:]
 | |
|         if prefix == "_get_":
 | |
|             self.inGetAttr = 1
 | |
|             if hasattr(self, attrname): 
 | |
|                 del self.inGetAttr
 | |
|                 return (lambda self=self, attrname=attrname: 
 | |
|                                 getattr(self, attrname))
 | |
|             else:
 | |
|                 del self.inGetAttr
 | |
|                 raise AttributeError, key
 | |
|         else:
 | |
|             self.inGetAttr = 1
 | |
|             try:
 | |
|                 func = getattr(self, "_get_" + key)
 | |
|             except AttributeError:
 | |
|                 raise AttributeError, key
 | |
|             del self.inGetAttr
 | |
|             return func()
 | |
| 
 | |
|     def __nonzero__(self):
 | |
|         return 1
 | |
| 
 | |
|     def toxml(self):
 | |
|         writer = StringIO()
 | |
|         self.writexml(writer)
 | |
|         return writer.getvalue()
 | |
| 
 | |
|     def hasChildNodes(self):
 | |
|         if self.childNodes:
 | |
|             return 1
 | |
|         else:
 | |
|             return 0
 | |
| 
 | |
|     def _get_firstChild(self):
 | |
|         return self.childNodes[0]
 | |
| 
 | |
|     def _get_lastChild(self):
 | |
|         return self.childNodes[-1]
 | |
| 
 | |
|     def insertBefore(self, newChild, refChild):
 | |
|         index = self.childNodes.index(refChild)
 | |
|         self.childNodes.insert(index, newChild)
 | |
|         if self._makeParentNodes:
 | |
|             newChild.parentNode = self
 | |
| 
 | |
|     def appendChild(self, node):
 | |
|         self.childNodes.append(node)
 | |
|         return node
 | |
| 
 | |
|     def replaceChild(self, newChild, oldChild):
 | |
|         index = self.childNodes.index(oldChild)
 | |
|         self.childNodes[index] = oldChild
 | |
| 
 | |
|     def removeChild(self, oldChild):
 | |
|         index = self.childNodes.index(oldChild)
 | |
|         del self.childNodes[index]
 | |
| 
 | |
|     def cloneNode(self, deep):
 | |
|         import new
 | |
|         clone = new.instance(self.__class__, self.__dict__)
 | |
|         clone.attributes = self.attributes.copy()
 | |
|         if not deep:
 | |
|             clone.childNodes = []
 | |
|         else:
 | |
|             clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
 | |
|         return clone
 | |
| 
 | |
|     def unlink(self):
 | |
|         self.parentNode = None
 | |
|         while self.childNodes:
 | |
|             self.childNodes[-1].unlink()
 | |
|             del self.childNodes[-1] # probably not most efficient!
 | |
|         self.childNodes = None
 | |
|         if self.attributes:
 | |
|             for attr in self._attrs.values():
 | |
|                 self.removeAttributeNode(attr)
 | |
|             assert not len(self._attrs)
 | |
|             assert not len(self._attrsNS)
 | |
|         if Node._debug:
 | |
|             index = repr(id(self)) + repr(self.__class__)
 | |
|             self.debug.write("Deleting: %s\n" % index)
 | |
|             del Node.allnodes[index]
 | |
| 
 | |
| def _write_data(writer, data):
 | |
|     "Writes datachars to writer."
 | |
|     data = string.replace(data, "&", "&")
 | |
|     data = string.replace(data, "<", "<")
 | |
|     data = string.replace(data, "\"", """)
 | |
|     data = string.replace(data, ">", ">")
 | |
|     writer.write(data)
 | |
| 
 | |
| def _getElementsByTagNameHelper(parent, name, rc):
 | |
|     for node in parent.childNodes:
 | |
|         if node.nodeType == Node.ELEMENT_NODE and \
 | |
|             (name == "*" or node.tagName == name):
 | |
|             rc.append(node)
 | |
|         _getElementsByTagNameHelper(node, name, rc)
 | |
|     return rc
 | |
| 
 | |
| def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
 | |
|     for node in parent.childNodes:
 | |
|         if node.nodeType == Node.ELEMENT_NODE:
 | |
|             if ((localName == "*" or node.tagName == localName) and
 | |
|                 (nsURI == "*" or node.namespaceURI == nsURI)):
 | |
|                 rc.append(node)
 | |
|             _getElementsByTagNameNSHelper(node, name, rc)
 | |
| 
 | |
| class Attr(Node):
 | |
|     nodeType = Node.ATTRIBUTE_NODE
 | |
| 
 | |
|     def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
 | |
|         # skip setattr for performance
 | |
|         self.__dict__["localName"] = localName or qName
 | |
|         self.__dict__["nodeName"] = self.__dict__["name"] = qName
 | |
|         self.__dict__["namespaceURI"] = namespaceURI
 | |
|         self.__dict__["prefix"] = prefix
 | |
|         self.attributes = None
 | |
|         Node.__init__(self)
 | |
|         # nodeValue and value are set elsewhere
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name in ("value", "nodeValue"):
 | |
|             self.__dict__["value"] = self.__dict__["nodeValue"] = value
 | |
|         else:
 | |
|             self.__dict__[name] = value
 | |
| 
 | |
| class AttributeList:
 | |
|     """the attribute list is a transient interface to the underlying
 | |
|     dictionaries.  mutations here will change the underlying element's
 | |
|     dictionary"""
 | |
|     def __init__(self, attrs, attrsNS):
 | |
|         self._attrs = attrs
 | |
|         self._attrsNS = attrsNS
 | |
|         self.length = len(self._attrs.keys())
 | |
| 
 | |
|     def item(self, index):
 | |
|         try:
 | |
|             return self[self.keys()[index]]
 | |
|         except IndexError:
 | |
|             return None
 | |
| 
 | |
|     def items(self):
 | |
|         return map(lambda node: (node.tagName, node.value),
 | |
|                    self._attrs.values())
 | |
| 
 | |
|     def itemsNS(self):
 | |
|         return map(lambda node: ((node.URI, node.localName), node.value),
 | |
|                    self._attrs.values())
 | |
|     
 | |
|     def keys(self):
 | |
|         return self._attrs.keys()
 | |
| 
 | |
|     def keysNS(self):
 | |
|         return self._attrsNS.keys()
 | |
| 
 | |
|     def values(self):
 | |
|         return self._attrs.values()
 | |
| 
 | |
|     def __len__(self):
 | |
|         return self.length
 | |
| 
 | |
|     def __cmp__(self, other):
 | |
|         if self._attrs is getattr(other, "_attrs", None):
 | |
|             return 0
 | |
|         else: 
 | |
|             return cmp(id(self), id(other))
 | |
| 
 | |
|     #FIXME: is it appropriate to return .value?
 | |
|     def __getitem__(self, attname_or_tuple):
 | |
|         if type(attname_or_tuple) is types.TupleType:
 | |
|             return self._attrsNS[attname_or_tuple]
 | |
|         else:
 | |
|             return self._attrs[attname_or_tuple]
 | |
| 
 | |
|     # same as set
 | |
|     def __setitem__(self, attname, value):
 | |
|         if type(value) is types.StringType:
 | |
|             node = Attr(attname)
 | |
|             node.value=value
 | |
|         else:
 | |
|             assert isinstance(value, Attr) or type(value) is types.StringType
 | |
|             node = value
 | |
|         old = self._attrs.get(attname, None)
 | |
|         if old:
 | |
|             old.unlink()
 | |
|         self._attrs[node.name] = node
 | |
|         self._attrsNS[(node.namespaceURI, node.localName)] = node
 | |
| 
 | |
|     def __delitem__(self, attname_or_tuple):
 | |
|         node = self[attname_or_tuple]
 | |
|         node.unlink()
 | |
|         del self._attrs[node.name]
 | |
|         del self._attrsNS[(node.namespaceURI, node.localName)]
 | |
| 
 | |
| class Element( Node ):
 | |
|     nodeType = Node.ELEMENT_NODE
 | |
| 
 | |
|     def __init__(self, tagName, namespaceURI="", prefix="",
 | |
|                  localName=None):
 | |
|         Node.__init__(self)
 | |
|         self.tagName = self.nodeName = tagName
 | |
|         self.localName = localName or tagName
 | |
|         self.prefix = prefix
 | |
|         self.namespaceURI = namespaceURI
 | |
|         self.nodeValue = None
 | |
| 
 | |
|         self._attrs={}  # attributes are double-indexed:
 | |
|         self._attrsNS={}#    tagName -> Attribute
 | |
|                 #    URI,localName -> Attribute
 | |
|                 # in the future: consider lazy generation of attribute objects
 | |
|                 #                this is too tricky for now because of headaches
 | |
|                 #                with namespaces.
 | |
| 
 | |
|     def getAttribute(self, attname):
 | |
|         return self._attrs[attname].value
 | |
| 
 | |
|     def getAttributeNS(self, namespaceURI, localName):
 | |
|         return self._attrsNS[(namespaceURI, localName)].value
 | |
| 
 | |
|     def setAttribute(self, attname, value):
 | |
|         attr = Attr(attname)
 | |
|         # for performance
 | |
|         attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
 | |
|         self.setAttributeNode(attr)
 | |
| 
 | |
|     def setAttributeNS(self, namespaceURI, qualifiedName, value):
 | |
|         prefix, localname = _nssplit(qualifiedName)
 | |
|         # for performance
 | |
|         attr = Attr(qualifiedName, namespaceURI, localname, prefix)
 | |
|         attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
 | |
|         self.setAttributeNode(attr)
 | |
| 
 | |
|     def getAttributeNode(self, attrname):
 | |
|         return self._attrs.get(attrname)
 | |
| 
 | |
|     def getAttributeNodeNS(self, namespaceURI, localName):
 | |
|         return self._attrsNS[(namespaceURI, localName)]
 | |
| 
 | |
|     def setAttributeNode(self, attr):
 | |
|         old = self._attrs.get(attr.name, None)
 | |
|         if old:
 | |
|             old.unlink()
 | |
|         self._attrs[attr.name] = attr
 | |
|         self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
 | |
| 
 | |
|     def removeAttribute(self, name):
 | |
|         attr = self._attrs[name]
 | |
|         self.removeAttributeNode(attr)
 | |
| 
 | |
|     def removeAttributeNS(self, namespaceURI, localName):
 | |
|         attr = self._attrsNS[(namespaceURI, localName)]
 | |
|         self.removeAttributeNode(attr)
 | |
| 
 | |
|     def removeAttributeNode(self, node):
 | |
|         node.unlink()
 | |
|         del self._attrs[node.name]
 | |
|         del self._attrsNS[(node.namespaceURI, node.localName)]
 | |
|         
 | |
|     def getElementsByTagName(self, name):
 | |
|         return _getElementsByTagNameHelper(self, name, [])
 | |
| 
 | |
|     def getElementsByTagNameNS(self, namespaceURI, localName):
 | |
|         _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "<DOM Element: %s at %s>" % (self.tagName, id(self))
 | |
| 
 | |
|     def writexml(self, writer):
 | |
|         writer.write("<" + self.tagName)
 | |
|             
 | |
|         a_names = self._get_attributes().keys()
 | |
|         a_names.sort()
 | |
| 
 | |
|         for a_name in a_names:
 | |
|             writer.write(" %s=\"" % a_name)
 | |
|             _write_data(writer, self._get_attributes()[a_name].value)
 | |
|             writer.write("\"")
 | |
|         if self.childNodes:
 | |
|             writer.write(">")
 | |
|             for node in self.childNodes:
 | |
|                 node.writexml(writer)
 | |
|             writer.write("</%s>" % self.tagName)
 | |
|         else:
 | |
|             writer.write("/>")
 | |
| 
 | |
|     def _get_attributes(self):
 | |
|         return AttributeList(self._attrs, self._attrsNS)
 | |
| 
 | |
| class Comment(Node):
 | |
|     nodeType = Node.COMMENT_NODE
 | |
| 
 | |
|     def __init__(self, data):
 | |
|         Node.__init__(self)
 | |
|         self.data = self.nodeValue = data
 | |
|         self.nodeName = "#comment"
 | |
|         self.attributes = None
 | |
| 
 | |
|     def writexml(self, writer):
 | |
|         writer.write("<!--%s-->" % self.data)
 | |
| 
 | |
| class ProcessingInstruction(Node):
 | |
|     nodeType = Node.PROCESSING_INSTRUCTION_NODE
 | |
| 
 | |
|     def __init__(self, target, data):
 | |
|         Node.__init__(self)
 | |
|         self.target = self.nodeName = target
 | |
|         self.data = self.nodeValue = data
 | |
|         self.attributes = None
 | |
| 
 | |
|     def writexml(self, writer):
 | |
|         writer.write("<?%s %s?>" % (self.target, self.data))
 | |
| 
 | |
| class Text(Node):
 | |
|     nodeType = Node.TEXT_NODE
 | |
|     nodeName = "#text"
 | |
| 
 | |
|     def __init__(self, data):
 | |
|         Node.__init__(self)
 | |
|         self.data = self.nodeValue = data
 | |
|         self.attributes = None
 | |
| 
 | |
|     def __repr__(self):
 | |
|         if len(self.data) > 10:
 | |
|             dotdotdot = "..."
 | |
|         else:
 | |
|             dotdotdot = ""
 | |
|         return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
 | |
| 
 | |
|     def writexml(self, writer):
 | |
|         _write_data(writer, self.data)
 | |
| 
 | |
| def _nssplit(qualifiedName):
 | |
|     fields = qualifiedName.split(':', 1)
 | |
|     if len(fields) == 2:
 | |
|         return fields
 | |
|     elif len(fields) == 1:
 | |
|         return ('', fields[0])
 | |
| 
 | |
| class Document(Node):
 | |
|     nodeType = Node.DOCUMENT_NODE
 | |
|     documentElement = None
 | |
| 
 | |
|     def __init__(self):
 | |
|         Node.__init__(self)
 | |
|         self.attributes = None
 | |
|         self.nodeName = "#document"
 | |
|         self.nodeValue = None
 | |
| 
 | |
|     def appendChild(self, node):
 | |
|         if node.nodeType == Node.ELEMENT_NODE:
 | |
|             if self.documentElement:
 | |
|                 raise TypeError, "Two document elements disallowed"
 | |
|             else:
 | |
|                 self.documentElement = node
 | |
|         Node.appendChild(self, node)
 | |
|         return node
 | |
| 
 | |
|     createElement = Element
 | |
| 
 | |
|     createTextNode = Text
 | |
| 
 | |
|     createComment = Comment
 | |
| 
 | |
|     createProcessingInstruction = ProcessingInstruction
 | |
| 
 | |
|     createAttribute = Attr
 | |
| 
 | |
|     def createElementNS(self, namespaceURI, qualifiedName):
 | |
|         prefix,localName = _nssplit(qualifiedName)
 | |
|         return Element(qualifiedName, namespaceURI, prefix, localName)
 | |
| 
 | |
|     def createAttributeNS(self, namespaceURI, qualifiedName):
 | |
|         prefix,localName = _nssplit(qualifiedName)
 | |
|         return Attr(qualifiedName, namespaceURI, localName, prefix)
 | |
| 
 | |
|     def getElementsByTagNameNS(self, namespaceURI, localName):
 | |
|         _getElementsByTagNameNSHelper(self, namespaceURI, localName)
 | |
| 
 | |
|     def unlink(self):
 | |
|         self.documentElement = None
 | |
|         Node.unlink(self)
 | |
| 
 | |
|     def getElementsByTagName(self, name):
 | |
|         rc = []
 | |
|         _getElementsByTagNameHelper(self, name, rc)
 | |
|         return rc
 | |
| 
 | |
|     def writexml(self, writer):
 | |
|         for node in self.childNodes:
 | |
|             node.writexml(writer)
 | |
| 
 | |
| def _doparse(func, args, kwargs):
 | |
|     events = apply(func, args, kwargs)
 | |
|     toktype, rootNode = events.getEvent()
 | |
|     events.expandNode(rootNode)
 | |
|     return rootNode
 | |
| 
 | |
| def parse(*args, **kwargs):
 | |
|     "Parse a file into a DOM by filename or file object"
 | |
|     return _doparse(pulldom.parse, args, kwargs)
 | |
| 
 | |
| def parseString(*args, **kwargs):
 | |
|     "Parse a file into a DOM from a string"
 | |
|     return _doparse(pulldom.parseString, args, kwargs)
 | |
| 
 |