| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | #! /usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | """Perform massive transformations on a document tree created from the LaTeX
 | 
					
						
							|  |  |  | of the Python documentation, and dump the ESIS data for the transformed tree. | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | """
 | 
					
						
							|  |  |  | __version__ = '$Revision$' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import errno | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  | import esistools | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | import string | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import xml.dom.core | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | from xml.dom.core import \ | 
					
						
							|  |  |  |      ELEMENT, \ | 
					
						
							|  |  |  |      TEXT | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-14 19:45:38 +00:00
										 |  |  | class ConversionError(Exception): | 
					
						
							|  |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  | ewrite = sys.stderr.write | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     # We can only do this trick on Unix (if tput is on $PATH)! | 
					
						
							|  |  |  |     if sys.platform != "posix" or not sys.stderr.isatty(): | 
					
						
							|  |  |  |         raise ImportError | 
					
						
							|  |  |  |     import curses | 
					
						
							|  |  |  |     import commands | 
					
						
							|  |  |  | except ImportError: | 
					
						
							|  |  |  |     bwrite = ewrite | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     def bwrite(s, BOLDON=commands.getoutput("tput bold"), | 
					
						
							|  |  |  |                BOLDOFF=commands.getoutput("tput sgr0")): | 
					
						
							|  |  |  |         ewrite("%s%s%s" % (BOLDON, s, BOLDOFF)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  | PARA_ELEMENT = "para" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | DEBUG_PARA_FIXER = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | if DEBUG_PARA_FIXER: | 
					
						
							|  |  |  |     def para_msg(s): | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |         ewrite("*** %s\n" % s) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | else: | 
					
						
							|  |  |  |     def para_msg(s): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | # Workaround to deal with invalid documents (multiple root elements).  This | 
					
						
							|  |  |  | # does not indicate a bug in the DOM implementation. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def get_documentElement(doc): | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     docelem = None | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     for n in doc.childNodes: | 
					
						
							|  |  |  |         if n.nodeType == ELEMENT: | 
					
						
							|  |  |  |             docelem = n | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     return docelem | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | xml.dom.core.Document.get_documentElement = get_documentElement | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Replace get_childNodes for the Document class; without this, children | 
					
						
							|  |  |  | # accessed from the Document object via .childNodes (no matter how many | 
					
						
							|  |  |  | # levels of access are used) will be given an ownerDocument of None. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def get_childNodes(doc): | 
					
						
							|  |  |  |     return xml.dom.core.NodeList(doc._node.children, doc._node) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | xml.dom.core.Document.get_childNodes = get_childNodes | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_first_element(doc, gi): | 
					
						
							|  |  |  |     for n in doc.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if n.nodeType == ELEMENT and n.tagName == gi: | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |             return n | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def extract_first_element(doc, gi): | 
					
						
							|  |  |  |     node = get_first_element(doc, gi) | 
					
						
							|  |  |  |     if node is not None: | 
					
						
							|  |  |  |         doc.removeChild(node) | 
					
						
							|  |  |  |     return node | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | def find_all_elements(doc, gi): | 
					
						
							|  |  |  |     nodes = [] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     if doc.nodeType == ELEMENT and doc.tagName == gi: | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         nodes.append(doc) | 
					
						
							|  |  |  |     for child in doc.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |             if child.tagName == gi: | 
					
						
							|  |  |  |                 nodes.append(child) | 
					
						
							|  |  |  |             for node in child.getElementsByTagName(gi): | 
					
						
							|  |  |  |                 nodes.append(node) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     return nodes | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  | def find_all_child_elements(doc, gi): | 
					
						
							|  |  |  |     nodes = [] | 
					
						
							|  |  |  |     for child in doc.childNodes: | 
					
						
							|  |  |  |         if child.nodeType == ELEMENT: | 
					
						
							|  |  |  |             if child.tagName == gi: | 
					
						
							|  |  |  |                 nodes.append(child) | 
					
						
							|  |  |  |     return nodes | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def find_all_elements_from_set(doc, gi_set): | 
					
						
							|  |  |  |     return __find_all_elements_from_set(doc, gi_set, []) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def __find_all_elements_from_set(doc, gi_set, nodes): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     if doc.nodeType == ELEMENT and doc.tagName in gi_set: | 
					
						
							|  |  |  |         nodes.append(doc) | 
					
						
							|  |  |  |     for child in doc.childNodes: | 
					
						
							|  |  |  |         if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |             __find_all_elements_from_set(child, gi_set, nodes) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     return nodes | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def simplify(doc, fragment): | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     # Try to rationalize the document a bit, since these things are simply | 
					
						
							|  |  |  |     # not valid SGML/XML documents as they stand, and need a little work. | 
					
						
							|  |  |  |     documentclass = "document" | 
					
						
							|  |  |  |     inputs = [] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     node = extract_first_element(fragment, "documentclass") | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     if node is not None: | 
					
						
							|  |  |  |         documentclass = node.getAttribute("classname") | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     node = extract_first_element(fragment, "title") | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     if node is not None: | 
					
						
							|  |  |  |         inputs.append(node) | 
					
						
							|  |  |  |     # update the name of the root element | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     node = get_first_element(fragment, "document") | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     if node is not None: | 
					
						
							|  |  |  |         node._node.name = documentclass | 
					
						
							|  |  |  |     while 1: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         node = extract_first_element(fragment, "input") | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |         if node is None: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         inputs.append(node) | 
					
						
							|  |  |  |     if inputs: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         docelem = get_documentElement(fragment) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |         inputs.reverse() | 
					
						
							|  |  |  |         for node in inputs: | 
					
						
							|  |  |  |             text = doc.createTextNode("\n") | 
					
						
							|  |  |  |             docelem.insertBefore(text, docelem.firstChild) | 
					
						
							|  |  |  |             docelem.insertBefore(node, text) | 
					
						
							|  |  |  |         docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     while fragment.firstChild and fragment.firstChild.nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         fragment.removeChild(fragment.firstChild) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def cleanup_root_text(doc): | 
					
						
							|  |  |  |     discards = [] | 
					
						
							|  |  |  |     skip = 0 | 
					
						
							|  |  |  |     for n in doc.childNodes: | 
					
						
							|  |  |  |         prevskip = skip | 
					
						
							|  |  |  |         skip = 0 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if n.nodeType == TEXT and not prevskip: | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |             discards.append(n) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         elif n.nodeType == ELEMENT and n.tagName == "COMMENT": | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |             skip = 1 | 
					
						
							|  |  |  |     for node in discards: | 
					
						
							|  |  |  |         doc.removeChild(node) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  | DESCRIPTOR_ELEMENTS = ( | 
					
						
							|  |  |  |     "cfuncdesc", "cvardesc", "ctypedesc", | 
					
						
							|  |  |  |     "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", | 
					
						
							|  |  |  |     "excdesc", "funcdesc", "funcdescni", "opcodedesc", | 
					
						
							|  |  |  |     "datadesc", "datadescni", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def fixup_descriptors(doc, fragment): | 
					
						
							|  |  |  |     sections = find_all_elements(fragment, "section") | 
					
						
							| 
									
										
										
										
											1999-01-29 21:31:12 +00:00
										 |  |  |     for section in sections: | 
					
						
							|  |  |  |         find_and_fix_descriptors(doc, section) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def find_and_fix_descriptors(doc, container): | 
					
						
							|  |  |  |     children = container.childNodes | 
					
						
							|  |  |  |     for child in children: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-29 21:31:12 +00:00
										 |  |  |             tagName = child.tagName | 
					
						
							|  |  |  |             if tagName in DESCRIPTOR_ELEMENTS: | 
					
						
							|  |  |  |                 rewrite_descriptor(doc, child) | 
					
						
							|  |  |  |             elif tagName == "subsection": | 
					
						
							|  |  |  |                 find_and_fix_descriptors(doc, child) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def rewrite_descriptor(doc, descriptor): | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Do these things: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |     #   1. Add an "index='no'" attribute to the element if the tagName | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     #      ends in 'ni', removing the 'ni' from the name. | 
					
						
							|  |  |  |     #   2. Create a <signature> from the name attribute and <args>. | 
					
						
							|  |  |  |     #   3. Create additional <signature>s from <*line{,ni}> elements, | 
					
						
							|  |  |  |     #      if found. | 
					
						
							| 
									
										
										
										
											1999-01-29 22:12:29 +00:00
										 |  |  |     #   4. If a <versionadded> is found, move it to an attribute on the | 
					
						
							|  |  |  |     #      descriptor. | 
					
						
							|  |  |  |     #   5. Move remaining child nodes to a <description> element. | 
					
						
							|  |  |  |     #   6. Put it back together. | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     # | 
					
						
							|  |  |  |     descname = descriptor.tagName | 
					
						
							|  |  |  |     index = 1 | 
					
						
							|  |  |  |     if descname[-2:] == "ni": | 
					
						
							|  |  |  |         descname = descname[:-2] | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |         descriptor.setAttribute("index", "no") | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |         descriptor._node.name = descname | 
					
						
							|  |  |  |         index = 0 | 
					
						
							|  |  |  |     desctype = descname[:-4] # remove 'desc' | 
					
						
							|  |  |  |     linename = desctype + "line" | 
					
						
							|  |  |  |     if not index: | 
					
						
							|  |  |  |         linename = linename + "ni" | 
					
						
							|  |  |  |     # 2. | 
					
						
							|  |  |  |     signature = doc.createElement("signature") | 
					
						
							|  |  |  |     name = doc.createElement("name") | 
					
						
							|  |  |  |     signature.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |     signature.appendChild(name) | 
					
						
							|  |  |  |     name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) | 
					
						
							|  |  |  |     descriptor.removeAttribute("name") | 
					
						
							|  |  |  |     if descriptor.attributes.has_key("var"): | 
					
						
							|  |  |  |         variable = descriptor.getAttribute("var") | 
					
						
							|  |  |  |         if variable: | 
					
						
							|  |  |  |             args = doc.createElement("args") | 
					
						
							|  |  |  |             args.appendChild(doc.createTextNode(variable)) | 
					
						
							|  |  |  |             signature.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |             signature.appendChild(args) | 
					
						
							|  |  |  |         descriptor.removeAttribute("var") | 
					
						
							|  |  |  |     newchildren = [signature] | 
					
						
							|  |  |  |     children = descriptor.childNodes | 
					
						
							|  |  |  |     pos = skip_leading_nodes(children, 0) | 
					
						
							|  |  |  |     if pos < len(children): | 
					
						
							|  |  |  |         child = children[pos] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == ELEMENT and child.tagName == "args": | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |             # create an <args> in <signature>: | 
					
						
							|  |  |  |             args = doc.createElement("args") | 
					
						
							|  |  |  |             argchildren = [] | 
					
						
							|  |  |  |             map(argchildren.append, child.childNodes) | 
					
						
							|  |  |  |             for n in argchildren: | 
					
						
							|  |  |  |                 child.removeChild(n) | 
					
						
							|  |  |  |                 args.appendChild(n) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |             signature.appendChild(doc.createTextNode("\n    ")) | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |             signature.appendChild(args) | 
					
						
							|  |  |  |     signature.appendChild(doc.createTextNode("\n  ")) | 
					
						
							| 
									
										
										
										
											1999-01-29 22:12:29 +00:00
										 |  |  |     # 3, 4. | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     pos = skip_leading_nodes(children, pos + 1) | 
					
						
							|  |  |  |     while pos < len(children) \ | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |           and children[pos].nodeType == ELEMENT \ | 
					
						
							| 
									
										
										
										
											1999-01-29 22:12:29 +00:00
										 |  |  |           and children[pos].tagName in (linename, "versionadded"): | 
					
						
							|  |  |  |         if children[pos].tagName == linename: | 
					
						
							|  |  |  |             # this is really a supplemental signature, create <signature> | 
					
						
							|  |  |  |             sig = methodline_to_signature(doc, children[pos]) | 
					
						
							|  |  |  |             newchildren.append(sig) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # <versionadded added=...> | 
					
						
							|  |  |  |             descriptor.setAttribute( | 
					
						
							|  |  |  |                 "added", children[pos].getAttribute("version")) | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |         pos = skip_leading_nodes(children, pos + 1) | 
					
						
							| 
									
										
										
										
											1999-01-29 22:12:29 +00:00
										 |  |  |     # 5. | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     description = doc.createElement("description") | 
					
						
							|  |  |  |     description.appendChild(doc.createTextNode("\n")) | 
					
						
							|  |  |  |     newchildren.append(description) | 
					
						
							|  |  |  |     move_children(descriptor, description, pos) | 
					
						
							|  |  |  |     last = description.childNodes[-1] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     if last.nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |         last.data = string.rstrip(last.data) + "\n  " | 
					
						
							| 
									
										
										
										
											1999-01-29 22:12:29 +00:00
										 |  |  |     # 6. | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     # should have nothing but whitespace and signature lines in <descriptor>; | 
					
						
							|  |  |  |     # discard them | 
					
						
							|  |  |  |     while descriptor.childNodes: | 
					
						
							|  |  |  |         descriptor.removeChild(descriptor.childNodes[0]) | 
					
						
							|  |  |  |     for node in newchildren: | 
					
						
							|  |  |  |         descriptor.appendChild(doc.createTextNode("\n  ")) | 
					
						
							|  |  |  |         descriptor.appendChild(node) | 
					
						
							|  |  |  |     descriptor.appendChild(doc.createTextNode("\n")) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def methodline_to_signature(doc, methodline): | 
					
						
							|  |  |  |     signature = doc.createElement("signature") | 
					
						
							|  |  |  |     signature.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |     name = doc.createElement("name") | 
					
						
							|  |  |  |     name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     methodline.removeAttribute("name") | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     signature.appendChild(name) | 
					
						
							|  |  |  |     if len(methodline.childNodes): | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |         args = doc.createElement("args") | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         signature.appendChild(doc.createTextNode("\n    ")) | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |         signature.appendChild(args) | 
					
						
							|  |  |  |         move_children(methodline, args) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     signature.appendChild(doc.createTextNode("\n  ")) | 
					
						
							|  |  |  |     return signature | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  | def move_children(origin, dest, start=0): | 
					
						
							|  |  |  |     children = origin.childNodes | 
					
						
							|  |  |  |     while start < len(children): | 
					
						
							|  |  |  |         node = children[start] | 
					
						
							|  |  |  |         origin.removeChild(node) | 
					
						
							|  |  |  |         dest.appendChild(node) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def handle_appendix(doc, fragment): | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |     # must be called after simplfy() if document is multi-rooted to begin with | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     docelem = get_documentElement(fragment) | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |     toplevel = docelem.tagName == "manual" and "chapter" or "section" | 
					
						
							|  |  |  |     appendices = 0 | 
					
						
							|  |  |  |     nodes = [] | 
					
						
							|  |  |  |     for node in docelem.childNodes: | 
					
						
							|  |  |  |         if appendices: | 
					
						
							|  |  |  |             nodes.append(node) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         elif node.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |             appnodes = node.getElementsByTagName("appendix") | 
					
						
							|  |  |  |             if appnodes: | 
					
						
							|  |  |  |                 appendices = 1 | 
					
						
							|  |  |  |                 parent = appnodes[0].parentNode | 
					
						
							|  |  |  |                 parent.removeChild(appnodes[0]) | 
					
						
							|  |  |  |                 parent.normalize() | 
					
						
							|  |  |  |     if nodes: | 
					
						
							|  |  |  |         map(docelem.removeChild, nodes) | 
					
						
							|  |  |  |         docelem.appendChild(doc.createTextNode("\n\n\n")) | 
					
						
							|  |  |  |         back = doc.createElement("back-matter") | 
					
						
							|  |  |  |         docelem.appendChild(back) | 
					
						
							|  |  |  |         back.appendChild(doc.createTextNode("\n")) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         while nodes and nodes[0].nodeType == TEXT \ | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |               and not string.strip(nodes[0].data): | 
					
						
							|  |  |  |             del nodes[0] | 
					
						
							|  |  |  |         map(back.appendChild, nodes) | 
					
						
							|  |  |  |         docelem.appendChild(doc.createTextNode("\n")) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  | def handle_labels(doc, fragment): | 
					
						
							|  |  |  |     for label in find_all_elements(fragment, "label"): | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         id = label.getAttribute("id") | 
					
						
							|  |  |  |         if not id: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         parent = label.parentNode | 
					
						
							|  |  |  |         if parent.tagName == "title": | 
					
						
							|  |  |  |             parent.parentNode.setAttribute("id", id) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             parent.setAttribute("id", id) | 
					
						
							|  |  |  |         # now, remove <label id="..."/> from parent: | 
					
						
							|  |  |  |         parent.removeChild(label) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |         if parent.tagName == "title": | 
					
						
							|  |  |  |             parent.normalize() | 
					
						
							|  |  |  |             children = parent.childNodes | 
					
						
							|  |  |  |             if children[-1].nodeType == TEXT: | 
					
						
							|  |  |  |                 children[-1].data = string.rstrip(children[-1].data) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  | def fixup_trailing_whitespace(doc, wsmap): | 
					
						
							|  |  |  |     queue = [doc] | 
					
						
							|  |  |  |     while queue: | 
					
						
							|  |  |  |         node = queue[0] | 
					
						
							|  |  |  |         del queue[0] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if node.nodeType == ELEMENT \ | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |            and wsmap.has_key(node.tagName): | 
					
						
							|  |  |  |             ws = wsmap[node.tagName] | 
					
						
							|  |  |  |             children = node.childNodes | 
					
						
							|  |  |  |             children.reverse() | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |             if children[0].nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |                 data = string.rstrip(children[0].data) + ws | 
					
						
							|  |  |  |                 children[0].data = data | 
					
						
							|  |  |  |             children.reverse() | 
					
						
							|  |  |  |             # hack to get the title in place: | 
					
						
							|  |  |  |             if node.tagName == "title" \ | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |                and node.parentNode.firstChild.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |                 node.parentNode.insertBefore(doc.createText("\n  "), | 
					
						
							|  |  |  |                                              node.parentNode.firstChild) | 
					
						
							|  |  |  |         for child in node.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |             if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |                 queue.append(child) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def normalize(doc): | 
					
						
							|  |  |  |     for node in doc.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if node.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |             node.normalize() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def cleanup_trailing_parens(doc, element_names): | 
					
						
							|  |  |  |     d = {} | 
					
						
							|  |  |  |     for gi in element_names: | 
					
						
							|  |  |  |         d[gi] = gi | 
					
						
							|  |  |  |     rewrite_element = d.has_key | 
					
						
							|  |  |  |     queue = [] | 
					
						
							|  |  |  |     for node in doc.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if node.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |             queue.append(node) | 
					
						
							|  |  |  |     while queue: | 
					
						
							|  |  |  |         node = queue[0] | 
					
						
							|  |  |  |         del queue[0] | 
					
						
							|  |  |  |         if rewrite_element(node.tagName): | 
					
						
							|  |  |  |             children = node.childNodes | 
					
						
							|  |  |  |             if len(children) == 1 \ | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |                and children[0].nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |                 data = children[0].data | 
					
						
							|  |  |  |                 if data[-2:] == "()": | 
					
						
							|  |  |  |                     children[0].data = data[:-2] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             for child in node.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |                 if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |                     queue.append(child) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  | def contents_match(left, right): | 
					
						
							|  |  |  |     left_children = left.childNodes | 
					
						
							|  |  |  |     right_children = right.childNodes | 
					
						
							|  |  |  |     if len(left_children) != len(right_children): | 
					
						
							|  |  |  |         return 0 | 
					
						
							|  |  |  |     for l, r in map(None, left_children, right_children): | 
					
						
							|  |  |  |         nodeType = l.nodeType | 
					
						
							|  |  |  |         if nodeType != r.nodeType: | 
					
						
							|  |  |  |             return 0 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |             if l.tagName != r.tagName: | 
					
						
							|  |  |  |                 return 0 | 
					
						
							|  |  |  |             # should check attributes, but that's not a problem here | 
					
						
							|  |  |  |             if not contents_match(l, r): | 
					
						
							|  |  |  |                 return 0 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         elif nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |             if l.data != r.data: | 
					
						
							|  |  |  |                 return 0 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # not quite right, but good enough | 
					
						
							|  |  |  |             return 0 | 
					
						
							|  |  |  |     return 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_module_info(doc, section): | 
					
						
							|  |  |  |     # Heavy. | 
					
						
							|  |  |  |     node = extract_first_element(section, "modulesynopsis") | 
					
						
							|  |  |  |     if node is None: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |     node._node.name = "synopsis" | 
					
						
							|  |  |  |     lastchild = node.childNodes[-1] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     if lastchild.nodeType == TEXT \ | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |        and lastchild.data[-1:] == ".": | 
					
						
							|  |  |  |         lastchild.data = lastchild.data[:-1] | 
					
						
							| 
									
										
										
										
											1999-01-19 23:09:31 +00:00
										 |  |  |     modauthor = extract_first_element(section, "moduleauthor") | 
					
						
							|  |  |  |     if modauthor: | 
					
						
							|  |  |  |         modauthor._node.name = "author" | 
					
						
							|  |  |  |         modauthor.appendChild(doc.createTextNode( | 
					
						
							|  |  |  |             modauthor.getAttribute("name"))) | 
					
						
							|  |  |  |         modauthor.removeAttribute("name") | 
					
						
							| 
									
										
										
										
											1999-03-11 17:35:12 +00:00
										 |  |  |     platform = extract_first_element(section, "platform") | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |     if section.tagName == "section": | 
					
						
							|  |  |  |         modinfo_pos = 2 | 
					
						
							|  |  |  |         modinfo = doc.createElement("moduleinfo") | 
					
						
							|  |  |  |         moddecl = extract_first_element(section, "declaremodule") | 
					
						
							|  |  |  |         name = None | 
					
						
							|  |  |  |         if moddecl: | 
					
						
							|  |  |  |             modinfo.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |             name = moddecl.attributes["name"].value | 
					
						
							|  |  |  |             namenode = doc.createElement("name") | 
					
						
							|  |  |  |             namenode.appendChild(doc.createTextNode(name)) | 
					
						
							|  |  |  |             modinfo.appendChild(namenode) | 
					
						
							|  |  |  |             type = moddecl.attributes.get("type") | 
					
						
							|  |  |  |             if type: | 
					
						
							|  |  |  |                 type = type.value | 
					
						
							|  |  |  |                 modinfo.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |                 typenode = doc.createElement("type") | 
					
						
							|  |  |  |                 typenode.appendChild(doc.createTextNode(type)) | 
					
						
							|  |  |  |                 modinfo.appendChild(typenode) | 
					
						
							| 
									
										
										
										
											1999-01-29 22:12:29 +00:00
										 |  |  |         versionadded = extract_first_element(section, "versionadded") | 
					
						
							|  |  |  |         if versionadded: | 
					
						
							|  |  |  |             modinfo.setAttribute("added", versionadded.getAttribute("version")) | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |         title = get_first_element(section, "title") | 
					
						
							|  |  |  |         if title: | 
					
						
							|  |  |  |             children = title.childNodes | 
					
						
							|  |  |  |             if len(children) >= 2 \ | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |                and children[0].nodeType == ELEMENT \ | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |                and children[0].tagName == "module" \ | 
					
						
							|  |  |  |                and children[0].childNodes[0].data == name: | 
					
						
							|  |  |  |                 # this is it; morph the <title> into <short-synopsis> | 
					
						
							|  |  |  |                 first_data = children[1] | 
					
						
							|  |  |  |                 if first_data.data[:4] == " ---": | 
					
						
							|  |  |  |                     first_data.data = string.lstrip(first_data.data[4:]) | 
					
						
							|  |  |  |                 title._node.name = "short-synopsis" | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |                 if children[-1].nodeType == TEXT \ | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |                    and children[-1].data[-1:] == ".": | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |                     children[-1].data = children[-1].data[:-1] | 
					
						
							|  |  |  |                 section.removeChild(title) | 
					
						
							|  |  |  |                 section.removeChild(section.childNodes[0]) | 
					
						
							|  |  |  |                 title.removeChild(children[0]) | 
					
						
							|  |  |  |                 modinfo_pos = 0 | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |                 ewrite("module name in title doesn't match" | 
					
						
							|  |  |  |                        " <declaremodule/>; no <short-synopsis/>\n") | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |             ewrite("Unexpected condition: <section/> without <title/>\n") | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |         modinfo.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |         modinfo.appendChild(node) | 
					
						
							|  |  |  |         if title and not contents_match(title, node): | 
					
						
							|  |  |  |             # The short synopsis is actually different, | 
					
						
							|  |  |  |             # and needs to be stored: | 
					
						
							|  |  |  |             modinfo.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |             modinfo.appendChild(title) | 
					
						
							| 
									
										
										
										
											1999-01-19 23:09:31 +00:00
										 |  |  |         if modauthor: | 
					
						
							|  |  |  |             modinfo.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |             modinfo.appendChild(modauthor) | 
					
						
							| 
									
										
										
										
											1999-03-11 17:35:12 +00:00
										 |  |  |         if platform: | 
					
						
							|  |  |  |             modinfo.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |             modinfo.appendChild(platform) | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  |         modinfo.appendChild(doc.createTextNode("\n  ")) | 
					
						
							|  |  |  |         section.insertBefore(modinfo, section.childNodes[modinfo_pos]) | 
					
						
							|  |  |  |         section.insertBefore(doc.createTextNode("\n  "), modinfo) | 
					
						
							| 
									
										
										
										
											1999-03-11 17:35:12 +00:00
										 |  |  |         # | 
					
						
							|  |  |  |         # The rest of this removes extra newlines from where we cut out | 
					
						
							|  |  |  |         # a lot of elements.  A lot of code for minimal value, but keeps | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |         # keeps the generated *ML from being too funny looking. | 
					
						
							| 
									
										
										
										
											1999-03-11 17:35:12 +00:00
										 |  |  |         # | 
					
						
							|  |  |  |         section.normalize() | 
					
						
							|  |  |  |         children = section.childNodes | 
					
						
							|  |  |  |         for i in range(len(children)): | 
					
						
							|  |  |  |             node = children[i] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |             if node.nodeType == ELEMENT \ | 
					
						
							| 
									
										
										
										
											1999-03-11 17:35:12 +00:00
										 |  |  |                and node.tagName == "moduleinfo": | 
					
						
							|  |  |  |                 nextnode = children[i+1] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |                 if nextnode.nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-03-11 17:35:12 +00:00
										 |  |  |                     data = nextnode.data | 
					
						
							|  |  |  |                     if len(string.lstrip(data)) < (len(data) - 4): | 
					
						
							|  |  |  |                         nextnode.data = "\n\n\n" + string.lstrip(data) | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  | def cleanup_synopses(doc, fragment): | 
					
						
							|  |  |  |     for node in find_all_elements(fragment, "section"): | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         create_module_info(doc, node) | 
					
						
							| 
									
										
										
										
											1998-12-10 20:25:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def fixup_table_structures(doc, fragment): | 
					
						
							|  |  |  |     for table in find_all_elements(fragment, "table"): | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         fixup_table(doc, table) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-14 19:45:38 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def fixup_table(doc, table): | 
					
						
							|  |  |  |     # create the table head | 
					
						
							|  |  |  |     thead = doc.createElement("thead") | 
					
						
							|  |  |  |     row = doc.createElement("row") | 
					
						
							|  |  |  |     move_elements_by_name(doc, table, row, "entry") | 
					
						
							|  |  |  |     thead.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |     thead.appendChild(row) | 
					
						
							|  |  |  |     thead.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |     # create the table body | 
					
						
							|  |  |  |     tbody = doc.createElement("tbody") | 
					
						
							|  |  |  |     prev_row = None | 
					
						
							|  |  |  |     last_was_hline = 0 | 
					
						
							|  |  |  |     children = table.childNodes | 
					
						
							|  |  |  |     for child in children: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-14 19:45:38 +00:00
										 |  |  |             tagName = child.tagName | 
					
						
							|  |  |  |             if tagName == "hline" and prev_row is not None: | 
					
						
							|  |  |  |                 prev_row.setAttribute("rowsep", "1") | 
					
						
							|  |  |  |             elif tagName == "row": | 
					
						
							|  |  |  |                 prev_row = child | 
					
						
							|  |  |  |     # save the rows: | 
					
						
							|  |  |  |     tbody.appendChild(doc.createTextNode("\n    ")) | 
					
						
							|  |  |  |     move_elements_by_name(doc, table, tbody, "row", sep="\n    ") | 
					
						
							|  |  |  |     # and toss the rest: | 
					
						
							|  |  |  |     while children: | 
					
						
							|  |  |  |         child = children[0] | 
					
						
							|  |  |  |         nodeType = child.nodeType | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-01-14 19:45:38 +00:00
										 |  |  |             if string.strip(child.data): | 
					
						
							|  |  |  |                 raise ConversionError("unexpected free data in table") | 
					
						
							|  |  |  |             table.removeChild(child) | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-14 19:45:38 +00:00
										 |  |  |             if child.tagName != "hline": | 
					
						
							|  |  |  |                 raise ConversionError( | 
					
						
							|  |  |  |                     "unexpected <%s> in table" % child.tagName) | 
					
						
							|  |  |  |             table.removeChild(child) | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         raise ConversionError( | 
					
						
							|  |  |  |             "unexpected %s node in table" % child.__class__.__name__) | 
					
						
							|  |  |  |     # nothing left in the <table>; add the <thead> and <tbody> | 
					
						
							|  |  |  |     tgroup = doc.createElement("tgroup") | 
					
						
							|  |  |  |     tgroup.appendChild(doc.createTextNode("\n  ")) | 
					
						
							|  |  |  |     tgroup.appendChild(thead) | 
					
						
							|  |  |  |     tgroup.appendChild(doc.createTextNode("\n  ")) | 
					
						
							|  |  |  |     tgroup.appendChild(tbody) | 
					
						
							|  |  |  |     tgroup.appendChild(doc.createTextNode("\n  ")) | 
					
						
							|  |  |  |     table.appendChild(tgroup) | 
					
						
							|  |  |  |     # now make the <entry>s look nice: | 
					
						
							|  |  |  |     for row in table.getElementsByTagName("row"): | 
					
						
							|  |  |  |         fixup_row(doc, row) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fixup_row(doc, row): | 
					
						
							|  |  |  |     entries = [] | 
					
						
							|  |  |  |     map(entries.append, row.childNodes[1:]) | 
					
						
							|  |  |  |     for entry in entries: | 
					
						
							|  |  |  |         row.insertBefore(doc.createTextNode("\n         "), entry) | 
					
						
							|  |  |  | #    row.appendChild(doc.createTextNode("\n      ")) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def move_elements_by_name(doc, source, dest, name, sep=None): | 
					
						
							|  |  |  |     nodes = [] | 
					
						
							|  |  |  |     for child in source.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == ELEMENT and child.tagName == name: | 
					
						
							| 
									
										
										
										
											1999-01-14 19:45:38 +00:00
										 |  |  |             nodes.append(child) | 
					
						
							|  |  |  |     for node in nodes: | 
					
						
							|  |  |  |         source.removeChild(node) | 
					
						
							|  |  |  |         dest.appendChild(node) | 
					
						
							|  |  |  |         if sep: | 
					
						
							|  |  |  |             dest.appendChild(doc.createTextNode(sep)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | RECURSE_INTO_PARA_CONTAINERS = ( | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     "chapter", "abstract", "enumerate", | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     "section", "subsection", "subsubsection", | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     "paragraph", "subparagraph", "back-matter", | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     "howto", "manual", | 
					
						
							| 
									
										
										
										
											1999-08-03 15:32:48 +00:00
										 |  |  |     "item", "itemize", "fulllineitems", "enumeration", "descriptionlist", | 
					
						
							|  |  |  |     "definitionlist", "definition", | 
					
						
							| 
									
										
										
										
											1999-01-19 23:09:31 +00:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | PARA_LEVEL_ELEMENTS = ( | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     "moduleinfo", "title", "verbatim", "enumerate", "item", | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     "interpreter-session", "back-matter", "interactive-session", | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     "opcodedesc", "classdesc", "datadesc", | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni", | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     "funcdescni", "methoddescni", "excdescni", | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     "tableii", "tableiii", "tableiv", "localmoduletable", | 
					
						
							| 
									
										
										
										
											1999-08-03 15:32:48 +00:00
										 |  |  |     "sectionauthor", "seealso", "itemize", | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     # include <para>, so we can just do it again to get subsequent paras: | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     PARA_ELEMENT, | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PARA_LEVEL_PRECEEDERS = ( | 
					
						
							| 
									
										
										
										
											1999-08-03 15:32:48 +00:00
										 |  |  |     "setindexsubitem", | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     "stindex", "obindex", "COMMENT", "label", "input", "title", | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     "versionadded", "versionchanged", "declaremodule", "modulesynopsis", | 
					
						
							| 
									
										
										
										
											1999-08-03 15:32:48 +00:00
										 |  |  |     "moduleauthor", "indexterm", "leader", | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def fixup_paras(doc, fragment): | 
					
						
							|  |  |  |     for child in fragment.childNodes: | 
					
						
							|  |  |  |         if child.nodeType == ELEMENT \ | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |            and child.tagName in RECURSE_INTO_PARA_CONTAINERS: | 
					
						
							|  |  |  |             # | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |             fixup_paras_helper(doc, child) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     descriptions = find_all_elements(fragment, "description") | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     for description in descriptions: | 
					
						
							|  |  |  |         fixup_paras_helper(doc, description) | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | def fixup_paras_helper(doc, container, depth=0): | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     # document is already normalized | 
					
						
							|  |  |  |     children = container.childNodes | 
					
						
							|  |  |  |     start = 0 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     while len(children) > start: | 
					
						
							|  |  |  |         start = skip_leading_nodes(children, start) | 
					
						
							|  |  |  |         if start >= len(children): | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Either paragraph material or something to recurse into: | 
					
						
							|  |  |  |         # | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if (children[start].nodeType == ELEMENT) \ | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |            and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS): | 
					
						
							|  |  |  |             fixup_paras_helper(doc, children[start]) | 
					
						
							|  |  |  |             start = skip_leading_nodes(children, start + 1) | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # paragraph material: | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         build_para(doc, container, start, len(children)) | 
					
						
							|  |  |  |         if DEBUG_PARA_FIXER and depth == 10: | 
					
						
							|  |  |  |             sys.exit(1) | 
					
						
							|  |  |  |         start = start + 1 | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def build_para(doc, parent, start, i): | 
					
						
							|  |  |  |     children = parent.childNodes | 
					
						
							|  |  |  |     after = start + 1 | 
					
						
							|  |  |  |     have_last = 0 | 
					
						
							| 
									
										
										
										
											1999-01-29 20:55:07 +00:00
										 |  |  |     BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     # Collect all children until \n\n+ is found in a text node or a | 
					
						
							|  |  |  |     # member of BREAK_ELEMENTS is found. | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     for j in range(start, i): | 
					
						
							|  |  |  |         after = j + 1 | 
					
						
							|  |  |  |         child = children[j] | 
					
						
							|  |  |  |         nodeType = child.nodeType | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |             if child.tagName in BREAK_ELEMENTS: | 
					
						
							|  |  |  |                 after = j | 
					
						
							|  |  |  |                 break | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         elif nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |             pos = string.find(child.data, "\n\n") | 
					
						
							|  |  |  |             if pos == 0: | 
					
						
							|  |  |  |                 after = j | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             if pos >= 1: | 
					
						
							|  |  |  |                 child.splitText(pos) | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         have_last = 1 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     if (start + 1) > after: | 
					
						
							|  |  |  |         raise ConversionError( | 
					
						
							|  |  |  |             "build_para() could not identify content to turn into a paragraph") | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     if children[after - 1].nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |         # we may need to split off trailing white space: | 
					
						
							|  |  |  |         child = children[after - 1] | 
					
						
							|  |  |  |         data = child.data | 
					
						
							|  |  |  |         if string.rstrip(data) != data: | 
					
						
							|  |  |  |             have_last = 0 | 
					
						
							|  |  |  |             child.splitText(len(string.rstrip(data))) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     para = doc.createElement(PARA_ELEMENT) | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     prev = None | 
					
						
							|  |  |  |     indexes = range(start, after) | 
					
						
							|  |  |  |     indexes.reverse() | 
					
						
							|  |  |  |     for j in indexes: | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         node = parent.childNodes[j] | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |         parent.removeChild(node) | 
					
						
							|  |  |  |         para.insertBefore(node, prev) | 
					
						
							|  |  |  |         prev = node | 
					
						
							|  |  |  |     if have_last: | 
					
						
							|  |  |  |         parent.appendChild(para) | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |         parent.appendChild(doc.createTextNode("\n\n")) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         return len(parent.childNodes) | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |         nextnode = parent.childNodes[start] | 
					
						
							|  |  |  |         if nextnode.nodeType == TEXT: | 
					
						
							|  |  |  |             if nextnode.data and nextnode.data[0] != "\n": | 
					
						
							|  |  |  |                 nextnode.data = "\n" + nextnode.data | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             newnode = doc.createTextNode("\n") | 
					
						
							|  |  |  |             parent.insertBefore(newnode, nextnode) | 
					
						
							|  |  |  |             nextnode = newnode | 
					
						
							|  |  |  |             start = start + 1 | 
					
						
							|  |  |  |         parent.insertBefore(para, nextnode) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         return start + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  | def skip_leading_nodes(children, start): | 
					
						
							|  |  |  |     """Return index into children of a node at which paragraph building should
 | 
					
						
							|  |  |  |     begin or a recursive call to fixup_paras_helper() should be made (for | 
					
						
							|  |  |  |     subsections, etc.). | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |     When the return value >= len(children), we've built all the paras we can | 
					
						
							|  |  |  |     from this list of children. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     i = len(children) | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |     while i > start: | 
					
						
							|  |  |  |         # skip over leading comments and whitespace: | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         child = children[start] | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |         nodeType = child.nodeType | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |             data = child.data | 
					
						
							|  |  |  |             shortened = string.lstrip(data) | 
					
						
							|  |  |  |             if shortened: | 
					
						
							|  |  |  |                 if data != shortened: | 
					
						
							|  |  |  |                     # break into two nodes: whitespace and non-whitespace | 
					
						
							|  |  |  |                     child.splitText(len(data) - len(shortened)) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |                     return start + 1 | 
					
						
							|  |  |  |                 return start | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  |             # all whitespace, just skip | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         elif nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |             tagName = child.tagName | 
					
						
							|  |  |  |             if tagName in RECURSE_INTO_PARA_CONTAINERS: | 
					
						
							|  |  |  |                 return start | 
					
						
							|  |  |  |             if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: | 
					
						
							|  |  |  |                 return start | 
					
						
							|  |  |  |         start = start + 1 | 
					
						
							|  |  |  |     return start | 
					
						
							| 
									
										
										
										
											1998-12-10 05:07:09 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def fixup_rfc_references(doc, fragment): | 
					
						
							|  |  |  |     for rfcnode in find_all_elements(fragment, "rfc"): | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         rfcnode.appendChild(doc.createTextNode( | 
					
						
							|  |  |  |             "RFC " + rfcnode.getAttribute("num"))) | 
					
						
							| 
									
										
										
										
											1999-01-14 21:18:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def fixup_signatures(doc, fragment): | 
					
						
							|  |  |  |     for child in fragment.childNodes: | 
					
						
							|  |  |  |         if child.nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-01-14 21:18:03 +00:00
										 |  |  |             args = child.getElementsByTagName("args") | 
					
						
							|  |  |  |             for arg in args: | 
					
						
							|  |  |  |                 fixup_args(doc, arg) | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |                 arg.normalize() | 
					
						
							| 
									
										
										
										
											1999-01-14 21:18:03 +00:00
										 |  |  |             args = child.getElementsByTagName("constructor-args") | 
					
						
							|  |  |  |             for arg in args: | 
					
						
							|  |  |  |                 fixup_args(doc, arg) | 
					
						
							|  |  |  |                 arg.normalize() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fixup_args(doc, arglist): | 
					
						
							|  |  |  |     for child in arglist.childNodes: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == ELEMENT \ | 
					
						
							| 
									
										
										
										
											1999-01-14 21:18:03 +00:00
										 |  |  |            and child.tagName == "optional": | 
					
						
							|  |  |  |             # found it; fix and return | 
					
						
							|  |  |  |             arglist.insertBefore(doc.createTextNode("["), child) | 
					
						
							|  |  |  |             optkids = child.childNodes | 
					
						
							|  |  |  |             while optkids: | 
					
						
							|  |  |  |                 k = optkids[0] | 
					
						
							|  |  |  |                 child.removeChild(k) | 
					
						
							|  |  |  |                 arglist.insertBefore(k, child) | 
					
						
							|  |  |  |             arglist.insertBefore(doc.createTextNode("]"), child) | 
					
						
							|  |  |  |             arglist.removeChild(child) | 
					
						
							|  |  |  |             return fixup_args(doc, arglist) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  | def fixup_sectionauthors(doc, fragment): | 
					
						
							|  |  |  |     for sectauth in find_all_elements(fragment, "sectionauthor"): | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |         section = sectauth.parentNode | 
					
						
							|  |  |  |         section.removeChild(sectauth) | 
					
						
							|  |  |  |         sectauth._node.name = "author" | 
					
						
							|  |  |  |         sectauth.appendChild(doc.createTextNode( | 
					
						
							|  |  |  |             sectauth.getAttribute("name"))) | 
					
						
							|  |  |  |         sectauth.removeAttribute("name") | 
					
						
							|  |  |  |         after = section.childNodes[2] | 
					
						
							|  |  |  |         title = section.childNodes[1] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if title.nodeType == ELEMENT and title.tagName != "title": | 
					
						
							| 
									
										
										
										
											1999-01-28 23:59:58 +00:00
										 |  |  |             after = section.childNodes[0] | 
					
						
							|  |  |  |         section.insertBefore(doc.createTextNode("\n  "), after) | 
					
						
							|  |  |  |         section.insertBefore(sectauth, after) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:32:21 +00:00
										 |  |  | def fixup_verbatims(doc): | 
					
						
							|  |  |  |     for verbatim in find_all_elements(doc, "verbatim"): | 
					
						
							|  |  |  |         child = verbatim.childNodes[0] | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if child.nodeType == TEXT \ | 
					
						
							| 
									
										
										
										
											1999-02-18 16:32:21 +00:00
										 |  |  |            and string.lstrip(child.data)[:3] == ">>>": | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |             verbatim._node.name = "interactive-session" | 
					
						
							| 
									
										
										
										
											1999-02-18 16:32:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  | def add_node_ids(fragment, counter=0): | 
					
						
							|  |  |  |     fragment._node.node_id = counter | 
					
						
							|  |  |  |     for node in fragment.childNodes: | 
					
						
							|  |  |  |         counter = counter + 1 | 
					
						
							|  |  |  |         if node.nodeType == ELEMENT: | 
					
						
							|  |  |  |             counter = add_node_ids(node, counter) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             node._node.node_id = counter | 
					
						
							|  |  |  |     return counter + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex', | 
					
						
							|  |  |  |                         'refexmodindex', 'refstmodindex') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fixup_refmodindexes(fragment): | 
					
						
							|  |  |  |     # Locate <ref*modindex>...</> co-located with <module>...</>, and | 
					
						
							|  |  |  |     # remove the <ref*modindex>, replacing it with index=index on the | 
					
						
							|  |  |  |     # <module> element. | 
					
						
							|  |  |  |     nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS) | 
					
						
							|  |  |  |     d = {} | 
					
						
							|  |  |  |     for node in nodes: | 
					
						
							|  |  |  |         parent = node.parentNode | 
					
						
							|  |  |  |         d[parent._node.node_id] = parent | 
					
						
							|  |  |  |     del nodes | 
					
						
							|  |  |  |     map(fixup_refmodindexes_chunk, d.values()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fixup_refmodindexes_chunk(container): | 
					
						
							|  |  |  |     # node is probably a <para>; let's see how often it isn't: | 
					
						
							|  |  |  |     if container.tagName != PARA_ELEMENT: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |         bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     module_entries = find_all_elements(container, "module") | 
					
						
							|  |  |  |     if not module_entries: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |     index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS) | 
					
						
							|  |  |  |     removes = [] | 
					
						
							|  |  |  |     for entry in index_entries: | 
					
						
							|  |  |  |         children = entry.childNodes | 
					
						
							|  |  |  |         if len(children) != 0: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |             bwrite("--- unexpected number of children for %s node:\n" | 
					
						
							|  |  |  |                    % entry.tagName) | 
					
						
							|  |  |  |             ewrite(entry.toxml() + "\n") | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         found = 0 | 
					
						
							| 
									
										
										
										
											1999-08-03 15:32:48 +00:00
										 |  |  |         module_name = entry.getAttribute("module") | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |         for node in module_entries: | 
					
						
							|  |  |  |             if len(node.childNodes) != 1: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             this_name = node.childNodes[0].data | 
					
						
							|  |  |  |             if this_name == module_name: | 
					
						
							|  |  |  |                 found = 1 | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |                 node.setAttribute("index", "yes") | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |         if found: | 
					
						
							|  |  |  |             removes.append(entry) | 
					
						
							|  |  |  |     for node in removes: | 
					
						
							|  |  |  |         container.removeChild(node) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fixup_bifuncindexes(fragment): | 
					
						
							|  |  |  |     nodes = find_all_elements(fragment, 'bifuncindex') | 
					
						
							|  |  |  |     d = {} | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |     # make sure that each parent is only processed once: | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     for node in nodes: | 
					
						
							|  |  |  |         parent = node.parentNode | 
					
						
							|  |  |  |         d[parent._node.node_id] = parent | 
					
						
							|  |  |  |     del nodes | 
					
						
							|  |  |  |     map(fixup_bifuncindexes_chunk, d.values()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fixup_bifuncindexes_chunk(container): | 
					
						
							|  |  |  |     removes = [] | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |     entries = find_all_child_elements(container, "bifuncindex") | 
					
						
							|  |  |  |     function_entries = find_all_child_elements(container, "function") | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     for entry in entries: | 
					
						
							|  |  |  |         function_name = entry.getAttribute("name") | 
					
						
							|  |  |  |         found = 0 | 
					
						
							|  |  |  |         for func_entry in function_entries: | 
					
						
							|  |  |  |             t2 = func_entry.childNodes[0].data | 
					
						
							|  |  |  |             if t2[-2:] != "()": | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             t2 = t2[:-2] | 
					
						
							|  |  |  |             if t2 == function_name: | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |                 func_entry.setAttribute("index", "yes") | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |                 func_entry.setAttribute("module", "__builtin__") | 
					
						
							|  |  |  |                 if not found: | 
					
						
							|  |  |  |                     found = 1 | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |                     removes.append(entry) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     for entry in removes: | 
					
						
							|  |  |  |         container.removeChild(entry) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  | _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") | 
					
						
							| 
									
										
										
										
											1999-01-06 22:50:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  | def write_esis(doc, ofp, knownempty): | 
					
						
							|  |  |  |     for node in doc.childNodes: | 
					
						
							|  |  |  |         nodeType = node.nodeType | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         if nodeType == ELEMENT: | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |             gi = node.tagName | 
					
						
							|  |  |  |             if knownempty(gi): | 
					
						
							|  |  |  |                 if node.hasChildNodes(): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |                     raise ValueError, \ | 
					
						
							|  |  |  |                           "declared-empty node <%s> has children" % gi | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |                 ofp.write("e\n") | 
					
						
							|  |  |  |             for k, v in node.attributes.items(): | 
					
						
							|  |  |  |                 value = v.value | 
					
						
							|  |  |  |                 if _token_rx.match(value): | 
					
						
							|  |  |  |                     dtype = "TOKEN" | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     dtype = "CDATA" | 
					
						
							|  |  |  |                 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value))) | 
					
						
							|  |  |  |             ofp.write("(%s\n" % gi) | 
					
						
							|  |  |  |             write_esis(node, ofp, knownempty) | 
					
						
							|  |  |  |             ofp.write(")%s\n" % gi) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         elif nodeType == TEXT: | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |             ofp.write("-%s\n" % esistools.encode(node.data)) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             raise RuntimeError, "unsupported node type: %s" % nodeType | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  | def convert(ifp, ofp): | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |     p = esistools.ExtendedEsisBuilder() | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     p.feed(ifp.read()) | 
					
						
							|  |  |  |     doc = p.document | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     fragment = p.fragment | 
					
						
							|  |  |  |     normalize(fragment) | 
					
						
							|  |  |  |     simplify(doc, fragment) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     handle_labels(doc, fragment) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     handle_appendix(doc, fragment) | 
					
						
							| 
									
										
										
										
											1998-11-23 23:10:35 +00:00
										 |  |  |     fixup_trailing_whitespace(doc, { | 
					
						
							|  |  |  |         "abstract": "\n", | 
					
						
							|  |  |  |         "title": "", | 
					
						
							|  |  |  |         "chapter": "\n\n", | 
					
						
							|  |  |  |         "section": "\n\n", | 
					
						
							|  |  |  |         "subsection": "\n\n", | 
					
						
							|  |  |  |         "subsubsection": "\n\n", | 
					
						
							|  |  |  |         "paragraph": "\n\n", | 
					
						
							|  |  |  |         "subparagraph": "\n\n", | 
					
						
							|  |  |  |         }) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     cleanup_root_text(doc) | 
					
						
							| 
									
										
										
										
											1999-08-02 14:46:15 +00:00
										 |  |  |     cleanup_trailing_parens(fragment, ["function", "method", "cfunction"]) | 
					
						
							|  |  |  |     cleanup_synopses(doc, fragment) | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |     fixup_descriptors(doc, fragment) | 
					
						
							|  |  |  |     fixup_verbatims(fragment) | 
					
						
							|  |  |  |     normalize(fragment) | 
					
						
							|  |  |  |     fixup_paras(doc, fragment) | 
					
						
							|  |  |  |     fixup_sectionauthors(doc, fragment) | 
					
						
							|  |  |  |     fixup_table_structures(doc, fragment) | 
					
						
							|  |  |  |     fixup_rfc_references(doc, fragment) | 
					
						
							|  |  |  |     fixup_signatures(doc, fragment) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:23:19 +00:00
										 |  |  |     add_node_ids(fragment) | 
					
						
							|  |  |  |     fixup_refmodindexes(fragment) | 
					
						
							|  |  |  |     fixup_bifuncindexes(fragment) | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |     # | 
					
						
							|  |  |  |     d = {} | 
					
						
							|  |  |  |     for gi in p.get_empties(): | 
					
						
							|  |  |  |         d[gi] = gi | 
					
						
							| 
									
										
										
										
											1999-01-14 21:18:03 +00:00
										 |  |  |     if d.has_key("rfc"): | 
					
						
							|  |  |  |         del d["rfc"] | 
					
						
							| 
									
										
										
										
											1998-12-01 19:03:01 +00:00
										 |  |  |     knownempty = d.has_key | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:52 +00:00
										 |  |  |         write_esis(fragment, ofp, knownempty) | 
					
						
							| 
									
										
										
										
											1998-11-23 17:02:03 +00:00
										 |  |  |     except IOError, (err, msg): | 
					
						
							|  |  |  |         # Ignore EPIPE; it just means that whoever we're writing to stopped | 
					
						
							|  |  |  |         # reading.  The rest of the output would be ignored.  All other errors | 
					
						
							|  |  |  |         # should still be reported, | 
					
						
							|  |  |  |         if err != errno.EPIPE: | 
					
						
							|  |  |  |             raise | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							|  |  |  |     if len(sys.argv) == 1: | 
					
						
							|  |  |  |         ifp = sys.stdin | 
					
						
							|  |  |  |         ofp = sys.stdout | 
					
						
							|  |  |  |     elif len(sys.argv) == 2: | 
					
						
							|  |  |  |         ifp = open(sys.argv[1]) | 
					
						
							|  |  |  |         ofp = sys.stdout | 
					
						
							|  |  |  |     elif len(sys.argv) == 3: | 
					
						
							|  |  |  |         ifp = open(sys.argv[1]) | 
					
						
							|  |  |  |         ofp = open(sys.argv[2], "w") | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         usage() | 
					
						
							|  |  |  |         sys.exit(2) | 
					
						
							|  |  |  |     convert(ifp, ofp) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |