| 
									
										
										
										
											2024-05-08 15:34:40 -04:00
										 |  |  | :mod:`!xml.dom.pulldom` --- Support for building partial DOM trees
 | 
					
						
							|  |  |  | ==================================================================
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | .. module:: xml.dom.pulldom
 | 
					
						
							|  |  |  |    :synopsis: Support for building partial DOM trees from SAX events.
 | 
					
						
							| 
									
										
										
										
											2016-06-11 15:02:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | .. moduleauthor:: Paul Prescod <paul@prescod.net>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-02-10 08:09:36 +00:00
										 |  |  | **Source code:** :source:`Lib/xml/dom/pulldom.py`
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | --------------
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  | The :mod:`xml.dom.pulldom` module provides a "pull parser" which can also be
 | 
					
						
							|  |  |  | asked to produce DOM-accessible fragments of the document where necessary. The
 | 
					
						
							|  |  |  | basic concept involves pulling "events" from a stream of incoming XML and
 | 
					
						
							|  |  |  | processing them. In contrast to SAX which also employs an event-driven
 | 
					
						
							|  |  |  | processing model together with callbacks, the user of a pull parser is
 | 
					
						
							|  |  |  | responsible for explicitly pulling events from the stream, looping over those
 | 
					
						
							|  |  |  | events until either processing is finished or an error condition occurs.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-03-26 17:35:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-07-07 11:03:07 +02:00
										 |  |  | .. note::
 | 
					
						
							| 
									
										
										
										
											2013-03-26 17:35:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-07-07 11:03:07 +02:00
										 |  |  |    If you need to parse untrusted or unauthenticated data, see
 | 
					
						
							|  |  |  |    :ref:`xml-security`.
 | 
					
						
							| 
									
										
										
										
											2013-03-26 17:35:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-19 15:29:04 +02:00
										 |  |  | .. versionchanged:: 3.7.1
 | 
					
						
							| 
									
										
										
										
											2018-09-23 09:50:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |    The SAX parser no longer processes general external entities by default to
 | 
					
						
							|  |  |  |    increase security by default. To enable processing of external entities,
 | 
					
						
							|  |  |  |    pass a custom parser instance in::
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       from xml.dom.pulldom import parse
 | 
					
						
							|  |  |  |       from xml.sax import make_parser
 | 
					
						
							|  |  |  |       from xml.sax.handler import feature_external_ges
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       parser = make_parser()
 | 
					
						
							|  |  |  |       parser.setFeature(feature_external_ges, True)
 | 
					
						
							|  |  |  |       parse(filename, parser=parser)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-03-26 17:35:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  | Example::
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |    from xml.dom import pulldom
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |    doc = pulldom.parse('sales_items.xml')
 | 
					
						
							|  |  |  |    for event, node in doc:
 | 
					
						
							|  |  |  |        if event == pulldom.START_ELEMENT and node.tagName == 'item':
 | 
					
						
							|  |  |  |            if int(node.getAttribute('price')) > 50:
 | 
					
						
							|  |  |  |                doc.expandNode(node)
 | 
					
						
							|  |  |  |                print(node.toxml())
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  | ``event`` is a constant and can be one of:
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | * :data:`START_ELEMENT`
 | 
					
						
							|  |  |  | * :data:`END_ELEMENT`
 | 
					
						
							|  |  |  | * :data:`COMMENT`
 | 
					
						
							|  |  |  | * :data:`START_DOCUMENT`
 | 
					
						
							|  |  |  | * :data:`END_DOCUMENT`
 | 
					
						
							|  |  |  | * :data:`CHARACTERS`
 | 
					
						
							|  |  |  | * :data:`PROCESSING_INSTRUCTION`
 | 
					
						
							|  |  |  | * :data:`IGNORABLE_WHITESPACE`
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-02 03:37:02 +00:00
										 |  |  | ``node`` is an object of type :class:`xml.dom.minidom.Document`,
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  | :class:`xml.dom.minidom.Element` or :class:`xml.dom.minidom.Text`.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Since the document is treated as a "flat" stream of events, the document "tree"
 | 
					
						
							|  |  |  | is implicitly traversed and the desired elements are found regardless of their
 | 
					
						
							| 
									
										
										
										
											2012-03-16 16:49:58 +02:00
										 |  |  | depth in the tree. In other words, one does not need to consider hierarchical
 | 
					
						
							|  |  |  | issues such as recursive searching of the document nodes, although if the
 | 
					
						
							|  |  |  | context of elements were important, one would either need to maintain some
 | 
					
						
							|  |  |  | context-related state (i.e. remembering where one is in the document at any
 | 
					
						
							|  |  |  | given point) or to make use of the :func:`DOMEventStream.expandNode` method
 | 
					
						
							|  |  |  | and switch to DOM-related processing.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  | .. class:: PullDom(documentFactory=None)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Subclass of :class:`xml.sax.handler.ContentHandler`.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-16 15:58:14 +00:00
										 |  |  | .. class:: SAX2DOM(documentFactory=None)
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |    Subclass of :class:`xml.sax.handler.ContentHandler`.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-16 15:58:14 +00:00
										 |  |  | .. function:: parse(stream_or_string, parser=None, bufsize=None)
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |    Return a :class:`DOMEventStream` from the given input. *stream_or_string* may be
 | 
					
						
							| 
									
										
										
										
											2016-08-20 08:03:06 +00:00
										 |  |  |    either a file name, or a file-like object. *parser*, if given, must be an
 | 
					
						
							| 
									
										
										
										
											2013-08-29 10:28:44 +03:00
										 |  |  |    :class:`~xml.sax.xmlreader.XMLReader` object. This function will change the
 | 
					
						
							|  |  |  |    document handler of the
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |    parser and activate namespace support; other parser configuration (like
 | 
					
						
							|  |  |  |    setting an entity resolver) must have been done in advance.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If you have XML in a string, you can use the :func:`parseString` function instead:
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-16 15:58:14 +00:00
										 |  |  | .. function:: parseString(string, parser=None)
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 16:49:58 +02:00
										 |  |  |    Return a :class:`DOMEventStream` that represents the (Unicode) *string*.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | .. data:: default_bufsize
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Default value for the *bufsize* parameter to :func:`parse`.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-01 13:51:09 +00:00
										 |  |  |    The value of this variable can be changed before calling :func:`parse` and
 | 
					
						
							|  |  |  |    the new value will take effect.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | .. _domeventstream-objects:
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DOMEventStream Objects
 | 
					
						
							|  |  |  | ----------------------
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  | .. class:: DOMEventStream(stream, parser, bufsize)
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-08 13:07:40 +03:00
										 |  |  |    .. versionchanged:: 3.11
 | 
					
						
							| 
									
										
										
										
											2023-10-19 16:05:05 +01:00
										 |  |  |       Support for :meth:`~object.__getitem__` method has been removed.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 16:49:58 +02:00
										 |  |  |    .. method:: getEvent()
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |       Return a tuple containing *event* and the current *node* as
 | 
					
						
							| 
									
										
										
										
											2012-03-16 16:49:58 +02:00
										 |  |  |       :class:`xml.dom.minidom.Document` if event equals :data:`START_DOCUMENT`,
 | 
					
						
							|  |  |  |       :class:`xml.dom.minidom.Element` if event equals :data:`START_ELEMENT` or
 | 
					
						
							|  |  |  |       :data:`END_ELEMENT` or :class:`xml.dom.minidom.Text` if event equals
 | 
					
						
							|  |  |  |       :data:`CHARACTERS`.
 | 
					
						
							| 
									
										
										
										
											2017-05-19 14:37:57 -06:00
										 |  |  |       The current node does not contain information about its children, unless
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |       :func:`expandNode` is called.
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 16:49:58 +02:00
										 |  |  |    .. method:: expandNode(node)
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |       Expands all children of *node* into *node*. Example::
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-30 16:28:43 +03:00
										 |  |  |           from xml.dom import pulldom
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |           xml = '<html><title>Foo</title> <p>Some text <div>and more</div></p> </html>'
 | 
					
						
							|  |  |  |           doc = pulldom.parseString(xml)
 | 
					
						
							|  |  |  |           for event, node in doc:
 | 
					
						
							|  |  |  |               if event == pulldom.START_ELEMENT and node.tagName == 'p':
 | 
					
						
							|  |  |  |                   # Following statement only prints '<p/>'
 | 
					
						
							|  |  |  |                   print(node.toxml())
 | 
					
						
							| 
									
										
										
										
											2016-03-30 16:28:43 +03:00
										 |  |  |                   doc.expandNode(node)
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |                   # Following statement prints node with all its children '<p>Some text <div>and more</div></p>'
 | 
					
						
							|  |  |  |                   print(node.toxml())
 | 
					
						
							| 
									
										
										
										
											2007-08-15 14:28:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-03-16 14:37:14 +02:00
										 |  |  |    .. method:: DOMEventStream.reset()
 |