mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			454 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			454 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pulldom
 | |
| import string
 | |
| from StringIO import StringIO
 | |
| import types
 | |
| 
 | |
| """
 | |
| minidom.py -- a lightweight DOM implementation based on SAX.
 | |
| 
 | |
| parse( "foo.xml" )
 | |
| 
 | |
| parseString( "<foo><bar/></foo>" )
 | |
| 
 | |
| Todo:
 | |
| =====
 | |
|  * convenience methods for getting elements and text.
 | |
|  * more testing
 | |
|  * bring some of the writer and linearizer code into conformance with this
 | |
|         interface
 | |
|  * SAX 2 namespaces
 | |
| """
 | |
| 
 | |
| class Node:
 | |
|     ELEMENT_NODE                = 1
 | |
|     ATTRIBUTE_NODE              = 2
 | |
|     TEXT_NODE                   = 3
 | |
|     CDATA_SECTION_NODE          = 4
 | |
|     ENTITY_REFERENCE_NODE       = 5
 | |
|     ENTITY_NODE                 = 6
 | |
|     PROCESSING_INSTRUCTION_NODE = 7
 | |
|     COMMENT_NODE                = 8
 | |
|     DOCUMENT_NODE               = 9
 | |
|     DOCUMENT_TYPE_NODE          = 10
 | |
|     DOCUMENT_FRAGMENT_NODE      = 11
 | |
|     NOTATION_NODE               = 12
 | |
| 
 | |
|     allnodes={}
 | |
|     _debug=0
 | |
|     _makeParentNodes=1
 | |
|     debug=None
 | |
| 
 | |
|     def __init__( self ):
 | |
|         self.childNodes=[]
 | |
|         if Node._debug: 
 | |
|             index=repr( id( self ))+repr( self.__class__ )
 | |
|             Node.allnodes[index]=repr( self.__dict__ )
 | |
|             if Node.debug==None:
 | |
|                 Node.debug=StringIO()
 | |
|                 #open( "debug4.out", "w" )
 | |
|             Node.debug.write( "create %s\n"%index )
 | |
| 
 | |
|     def __getattr__( self, key ):
 | |
|         if key[0:2]=="__": raise AttributeError
 | |
|         # getattr should never call getattr!
 | |
|         if self.__dict__.has_key("inGetAttr"): 
 | |
|             del self.inGetAttr
 | |
|             raise AttributeError, key
 | |
| 
 | |
|         prefix,attrname=key[:5],key[5:]
 | |
|         if prefix=="_get_":
 | |
|             self.inGetAttr=1
 | |
|             if hasattr( self, attrname ): 
 | |
|                 del self.inGetAttr
 | |
|                 return (lambda self=self, attrname=attrname: 
 | |
|                                 getattr( self, attrname ))
 | |
|             else:
 | |
|                 del self.inGetAttr
 | |
|                 raise AttributeError, key
 | |
|         else:
 | |
|             self.inGetAttr=1
 | |
|             try:
 | |
|                 func = getattr( self, "_get_"+key )
 | |
|             except AttributeError:
 | |
|                 raise AttributeError, key
 | |
|             del self.inGetAttr
 | |
|             return func()
 | |
| 
 | |
|     def __nonzero__(self): return 1
 | |
| 
 | |
|     def toxml( self ):
 | |
|         writer=StringIO()
 | |
|         self.writexml( writer )
 | |
|         return writer.getvalue()
 | |
| 
 | |
|     def hasChildNodes( self ):
 | |
|         if self.childNodes: return 1
 | |
|         else: return 0
 | |
| 
 | |
|     def _get_firstChild( self ):
 | |
|         return self.childNodes[0]
 | |
| 
 | |
|     def _get_lastChild( self ):
 | |
|         return self.childNodes[-1]
 | |
| 
 | |
|     def insertBefore( self, newChild, refChild):
 | |
|         index=self.childNodes.index( refChild )
 | |
|         self.childNodes.insert( index, newChild )
 | |
|         if self._makeParentNodes:
 | |
|             newChild.parentNode=self
 | |
| 
 | |
|     def appendChild( self, node ):
 | |
|         self.childNodes.append( node )
 | |
|         return node
 | |
| 
 | |
|     def replaceChild( self, newChild, oldChild ):
 | |
|         index=self.childNodes.index( oldChild )
 | |
|         self.childNodes[index]=oldChild
 | |
| 
 | |
|     def removeChild( self, oldChild ):
 | |
|         index=self.childNodes.index( oldChild )
 | |
|         del self.childNodes[index]
 | |
| 
 | |
|     def cloneNode( self, deep ):
 | |
|         import new
 | |
|         clone=new.instance( self.__class__, self.__dict__ )
 | |
|         clone.attributes=self.attributes.copy()
 | |
|         if not deep:
 | |
|             clone.childNodes=[]
 | |
|         else:
 | |
|             clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
 | |
|         return clone
 | |
| 
 | |
|     def unlink( self ):
 | |
|         self.parentNode=None
 | |
|         while self.childNodes:
 | |
|             self.childNodes[-1].unlink()
 | |
|             del self.childNodes[-1] # probably not most efficient!
 | |
|         self.childNodes=None
 | |
|         if self.attributes:
 | |
|             for attr in self._attrs.values():
 | |
|                 self.removeAttributeNode( attr )
 | |
|             assert not len( self._attrs )
 | |
|             assert not len( self._attrsNS )
 | |
|         if Node._debug:
 | |
|             index=repr( id( self ))+repr( self.__class__ )
 | |
|             self.debug.write( "Deleting: %s\n" % index )
 | |
|             del Node.allnodes[index]
 | |
| 
 | |
| def _write_data( writer, data):
 | |
|     "Writes datachars to writer."
 | |
|     data=string.replace(data,"&","&")
 | |
|     data=string.replace(data,"<","<")
 | |
|     data=string.replace(data,"\"",""")
 | |
|     data=string.replace(data,">",">")
 | |
|     writer.write(data)
 | |
| 
 | |
| def _getElementsByTagNameHelper( parent, name, rc ):
 | |
|     for node in parent.childNodes:
 | |
|         if node.nodeType==Node.ELEMENT_NODE and\
 | |
|             (name=="*" or node.tagName==name):
 | |
|             rc.append( node )
 | |
|         _getElementsByTagNameHelper( node, name, rc )
 | |
|     return rc
 | |
| 
 | |
| def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
 | |
|     for node in parent.childNodes:
 | |
|         if (node.nodeType==Node.ELEMENT_NODE ):
 | |
|             if ((localName=="*" or node.tagName==localName) and
 | |
|             (nsURI=="*" or node.namespaceURI==nsURI)):
 | |
|                 rc.append( node )
 | |
|             _getElementsByTagNameNSHelper( node, name, rc )
 | |
| 
 | |
| class Attr(Node):
 | |
|     nodeType=Node.ATTRIBUTE_NODE
 | |
|     def __init__( self, qName, namespaceURI="", localName=None,
 | |
| prefix=None ):
 | |
|         # skip setattr for performance
 | |
|         self.__dict__["localName"]=localName or qName
 | |
|         self.__dict__["nodeName"] = self.__dict__["name"] = qName
 | |
|         self.__dict__["namespaceURI"]=namespaceURI
 | |
|         self.__dict__["prefix"]=prefix
 | |
|         self.attributes=None
 | |
|         Node.__init__( self )
 | |
|         # nodeValue and value are set elsewhere
 | |
| 
 | |
|     def __setattr__( self, name, value ):
 | |
|         if name in ("value", "nodeValue" ):
 | |
|             self.__dict__["value"]=self.__dict__["nodeValue"]=value
 | |
|         else:
 | |
|             self.__dict__[name]=value
 | |
| 
 | |
| class AttributeList:
 | |
|     """the attribute list is a transient interface to the underlying
 | |
| dictionaries.  mutations here will change the underlying element's
 | |
| dictionary"""
 | |
|     def __init__( self, attrs, attrsNS ):
 | |
|         self._attrs=attrs
 | |
|         self._attrsNS=attrsNS
 | |
|         self.length=len( self._attrs.keys() )
 | |
| 
 | |
|     def item( self, index ):
 | |
|         try:
 | |
|             return self[self.keys()[index]]
 | |
|         except IndexError:
 | |
|             return None
 | |
|         
 | |
|     def items( self ):
 | |
|         return map( lambda node: (node.tagName, node.value),
 | |
|                     self._attrs.values() )
 | |
| 
 | |
|     def itemsNS( self ):
 | |
|         return map( lambda node: ((node.URI, node.localName), node.value),
 | |
|                     self._attrs.values() )
 | |
|     
 | |
|     def keys( self ):
 | |
|         return self._attrs.keys()
 | |
| 
 | |
|     def keysNS( self ):
 | |
|         return self._attrsNS.keys()
 | |
| 
 | |
|     def values( self ):
 | |
|         return self._attrs.values()
 | |
| 
 | |
|     def __len__( self ):
 | |
|         return self.length
 | |
| 
 | |
|     def __cmp__( self, other ):
 | |
|         if self._attrs is getattr( other, "_attrs", None ):
 | |
|             return 0
 | |
|         else: 
 | |
|             return cmp( id( self ), id( other ) )
 | |
| 
 | |
|     #FIXME: is it appropriate to return .value?
 | |
|     def __getitem__( self, attname_or_tuple ):
 | |
|         if type( attname_or_tuple ) == types.TupleType:
 | |
|             return self._attrsNS[attname_or_tuple]
 | |
|         else:
 | |
|             return self._attrs[attname_or_tuple]
 | |
| 
 | |
|     # same as set
 | |
|     def __setitem__( self, attname, value ):
 | |
|         if type( value ) == types.StringType:
 | |
|             node=Attr( attname )
 | |
|             node.value=value
 | |
|         else:
 | |
|             assert isinstance( value, Attr ) or type( value )==types.StringType
 | |
|             node=value
 | |
|         old=self._attrs.get( attname, None)
 | |
|         if old:
 | |
|             old.unlink()
 | |
|         self._attrs[node.name]=node
 | |
|         self._attrsNS[(node.namespaceURI,node.localName)]=node
 | |
| 
 | |
|     def __delitem__( self, attname_or_tuple ):
 | |
|         node=self[attname_or_tuple]
 | |
|         node.unlink()
 | |
|         del self._attrs[node.name]
 | |
|         del self._attrsNS[(node.namespaceURI, node.localName)]
 | |
|  
 | |
| class Element( Node ):
 | |
|     nodeType=Node.ELEMENT_NODE
 | |
|     def __init__( self, tagName, namespaceURI="", prefix="",
 | |
|                   localName=None ):
 | |
|         Node.__init__( self )
 | |
|         self.tagName = self.nodeName = tagName
 | |
|         self.localName=localName or tagName
 | |
|         self.prefix=prefix
 | |
|         self.namespaceURI=namespaceURI
 | |
|         self.nodeValue=None
 | |
| 
 | |
|         self._attrs={}  # attributes are double-indexed:
 | |
|         self._attrsNS={}#    tagName -> Attribute
 | |
|                 #    URI,localName -> Attribute
 | |
|                 # in the future: consider lazy generation of attribute objects
 | |
|                 #                this is too tricky for now because of headaches
 | |
|                 #                with namespaces.
 | |
| 
 | |
|     def getAttribute( self, attname ):
 | |
|         return self._attrs[attname].value
 | |
| 
 | |
|     def getAttributeNS( self, namespaceURI, localName ):
 | |
|         return self._attrsNS[(namespaceURI, localName)].value
 | |
|     
 | |
|     def setAttribute( self, attname, value ):
 | |
|         attr=Attr( attname )
 | |
|         # for performance
 | |
|         attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
 | |
|         self.setAttributeNode( attr )
 | |
| 
 | |
|     def setAttributeNS( self, namespaceURI, qualifiedName, value ):
 | |
|         prefix,localname=_nssplit( qualifiedName )
 | |
|         # for performance
 | |
|         attr = Attr( qualifiedName, namespaceURI, localname, prefix )
 | |
|         attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
 | |
|         self.setAttributeNode( attr )
 | |
| 
 | |
|     def getAttributeNode( self, attrname ):
 | |
|         return self._attrs.get( attrname )
 | |
| 
 | |
|     def getAttributeNodeNS( self, namespaceURI, localName ):
 | |
|         return self._attrsNS[(namespaceURI, localName)]
 | |
| 
 | |
|     def setAttributeNode( self, attr ):
 | |
|         old=self._attrs.get( attr.name, None)
 | |
|         if old:
 | |
|             old.unlink()
 | |
|         self._attrs[attr.name]=attr
 | |
|         self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
 | |
| 
 | |
|     def removeAttribute( self, name ):
 | |
|         attr = self._attrs[name]
 | |
|         self.removeAttributeNode( attr )
 | |
| 
 | |
|     def removeAttributeNS( self, namespaceURI, localName ):
 | |
|         attr = self._attrsNS[(namespaceURI, localName)]
 | |
|         self.removeAttributeNode( attr )
 | |
| 
 | |
|     def removeAttributeNode( self, node ):
 | |
|         node.unlink()
 | |
|         del self._attrs[node.name]
 | |
|         del self._attrsNS[(node.namespaceURI, node.localName)]
 | |
|         
 | |
|     def getElementsByTagName( self, name ):
 | |
|         return _getElementsByTagNameHelper( self, name, [] )
 | |
| 
 | |
|     def getElementsByTagNameNS(self,namespaceURI,localName):
 | |
|         _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
 | |
| 
 | |
|     def __repr__( self ):
 | |
|         return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
 | |
| 
 | |
|     def writexml(self, writer):
 | |
|         writer.write("<"+self.tagName)
 | |
|             
 | |
|         a_names=self._get_attributes().keys()
 | |
|         a_names.sort()
 | |
| 
 | |
|         for a_name in a_names:
 | |
|             writer.write(" "+a_name+"=\"")
 | |
|             _write_data(writer, self._get_attributes()[a_name])
 | |
|             writer.write("\"")
 | |
|         if self.childNodes:
 | |
|             writer.write(">")
 | |
|             for node in self.childNodes:
 | |
|                 node.writexml( writer )
 | |
|             writer.write("</"+self.tagName+">")
 | |
|         else:
 | |
|             writer.write("/>")
 | |
| 
 | |
|     def _get_attributes( self ):
 | |
|         return AttributeList( self._attrs, self._attrsNS )
 | |
| 
 | |
| class Comment( Node ):
 | |
|     nodeType=Node.COMMENT_NODE
 | |
|     def __init__(self, data ):
 | |
|         Node.__init__( self )
 | |
|         self.data=self.nodeValue=data
 | |
|         self.nodeName="#comment"
 | |
|         self.attributes=None
 | |
| 
 | |
|     def writexml( self, writer ):
 | |
|         writer.write( "<!--" + self.data + "-->" )
 | |
| 
 | |
| class ProcessingInstruction( Node ):
 | |
|     nodeType=Node.PROCESSING_INSTRUCTION_NODE
 | |
|     def __init__(self, target, data ):
 | |
|         Node.__init__( self )
 | |
|         self.target = self.nodeName = target
 | |
|         self.data = self.nodeValue = data
 | |
|         self.attributes=None
 | |
| 
 | |
|     def writexml( self, writer ):
 | |
|         writer.write( "<?" + self.target +" " + self.data+ "?>" )
 | |
| 
 | |
| class Text( Node ):
 | |
|     nodeType=Node.TEXT_NODE
 | |
|     nodeName="#text"
 | |
|     def __init__(self, data ):
 | |
|         Node.__init__( self )
 | |
|         self.data = self.nodeValue = data
 | |
|         self.attributes=None
 | |
| 
 | |
|     def __repr__(self):
 | |
|         if len( self.data )> 10:
 | |
|             dotdotdot="..."
 | |
|         else:
 | |
|             dotdotdot=""
 | |
|         return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
 | |
| 
 | |
|     def writexml( self, writer ):
 | |
|         _write_data( writer, self.data )
 | |
| 
 | |
| def _nssplit( qualifiedName ):
 | |
|     fields = string.split(qualifiedName, ':')
 | |
|     if len(fields) == 2:
 | |
|         return fields
 | |
|     elif len(fields) == 1:
 | |
|         return( '', fields[0] )
 | |
| 
 | |
| class Document( Node ):
 | |
|     nodeType=Node.DOCUMENT_NODE
 | |
|     documentElement=None
 | |
|     def __init__( self ):
 | |
|         Node.__init__( self )
 | |
|         self.attributes=None
 | |
|         self.nodeName="#document"
 | |
|         self.nodeValue=None
 | |
| 
 | |
|     def appendChild( self, node ):
 | |
|         if node.nodeType==Node.ELEMENT_NODE:
 | |
|             if self.documentElement:
 | |
|                 raise TypeError, "Two document elements disallowed"
 | |
|             else:
 | |
|                 self.documentElement=node
 | |
|         Node.appendChild( self, node )
 | |
|         return node
 | |
| 
 | |
|     createElement=Element
 | |
| 
 | |
|     createTextNode=Text
 | |
| 
 | |
|     createComment=Comment
 | |
| 
 | |
|     createProcessingInstruction=ProcessingInstruction
 | |
| 
 | |
|     createAttribute=Attr
 | |
| 
 | |
|     def createElementNS(self, namespaceURI, qualifiedName):
 | |
|         prefix,localName=_nssplit( qualifiedName )
 | |
|         return Element(qualifiedName, namespaceURI, prefix, localName)
 | |
| 
 | |
|     def createAttributeNS(self, namespaceURI, qualifiedName):
 | |
|         prefix,localName=_nssplit( qualifiedName )
 | |
|         return Attr(namespaceURI, qualifiedName, localName, prefix)
 | |
| 
 | |
|     def getElementsByTagNameNS(self,namespaceURI,localName):
 | |
|         _getElementsByTagNameNSHelper( self, namespaceURI, localName )
 | |
| 
 | |
|     def unlink( self ):
 | |
|         self.documentElement=None
 | |
|         Node.unlink( self )
 | |
| 
 | |
|     def getElementsByTagName( self, name ):
 | |
|         rc=[]
 | |
|         _getElementsByTagNameHelper( self, name, rc )
 | |
|         return rc
 | |
| 
 | |
|     def writexml( self, writer ):
 | |
|         for node in self.childNodes:
 | |
|             node.writexml( writer )
 | |
| 
 | |
| def _doparse( func, args, kwargs ):
 | |
|     events=apply( func, args, kwargs )
 | |
|     (toktype, rootNode)=events.getEvent()
 | |
|     events.expandNode( rootNode )
 | |
|     return rootNode
 | |
| 
 | |
| def parse( *args, **kwargs ):
 | |
|     "Parse a file into a DOM by filename or file object"
 | |
|     return _doparse( pulldom.parse, args, kwargs )
 | |
| 
 | |
| def parseString( *args, **kwargs ):
 | |
|     "Parse a file into a DOM from a string"
 | |
|     return _doparse( pulldom.parseString, args, kwargs )
 | |
| 
 | 
