mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1033 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			1033 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env python
 | |
| 
 | |
| """Perform massive transformations on a document tree created from the LaTeX
 | |
| of the Python documentation, and dump the ESIS data for the transformed tree.
 | |
| """
 | |
| 
 | |
| 
 | |
| import errno
 | |
| import esistools
 | |
| import re
 | |
| import string
 | |
| import sys
 | |
| import xml.dom
 | |
| import xml.dom.minidom
 | |
| 
 | |
| ELEMENT = xml.dom.Node.ELEMENT_NODE
 | |
| ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
 | |
| TEXT = xml.dom.Node.TEXT_NODE
 | |
| 
 | |
| 
 | |
| class ConversionError(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| ewrite = sys.stderr.write
 | |
| try:
 | |
|     # We can only do this trick on Unix (if tput is on $PATH)!
 | |
|     if sys.platform != "posix" or not sys.stderr.isatty():
 | |
|         raise ImportError
 | |
|     import commands
 | |
| except ImportError:
 | |
|     bwrite = ewrite
 | |
| else:
 | |
|     def bwrite(s, BOLDON=commands.getoutput("tput bold"),
 | |
|                BOLDOFF=commands.getoutput("tput sgr0")):
 | |
|         ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
 | |
| 
 | |
| 
 | |
| PARA_ELEMENT = "para"
 | |
| 
 | |
| DEBUG_PARA_FIXER = 0
 | |
| 
 | |
| if DEBUG_PARA_FIXER:
 | |
|     def para_msg(s):
 | |
|         ewrite("*** %s\n" % s)
 | |
| else:
 | |
|     def para_msg(s):
 | |
|         pass
 | |
| 
 | |
| 
 | |
| def get_first_element(doc, gi):
 | |
|     for n in doc.childNodes:
 | |
|         if n.nodeName == gi:
 | |
|             return n
 | |
| 
 | |
| def extract_first_element(doc, gi):
 | |
|     node = get_first_element(doc, gi)
 | |
|     if node is not None:
 | |
|         doc.removeChild(node)
 | |
|     return node
 | |
| 
 | |
| 
 | |
| def get_documentElement(node):
 | |
|     result = None
 | |
|     for child in node.childNodes:
 | |
|         if child.nodeType == ELEMENT:
 | |
|             result = child
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def set_tagName(elem, gi):
 | |
|     elem.nodeName = elem.tagName = gi
 | |
| 
 | |
| 
 | |
| def find_all_elements(doc, gi):
 | |
|     nodes = []
 | |
|     if doc.nodeName == gi:
 | |
|         nodes.append(doc)
 | |
|     for child in doc.childNodes:
 | |
|         if child.nodeType == ELEMENT:
 | |
|             if child.tagName == gi:
 | |
|                 nodes.append(child)
 | |
|             for node in child.getElementsByTagName(gi):
 | |
|                 nodes.append(node)
 | |
|     return nodes
 | |
| 
 | |
| def find_all_child_elements(doc, gi):
 | |
|     nodes = []
 | |
|     for child in doc.childNodes:
 | |
|         if child.nodeName == gi:
 | |
|             nodes.append(child)
 | |
|     return nodes
 | |
| 
 | |
| 
 | |
| def find_all_elements_from_set(doc, gi_set):
 | |
|     return __find_all_elements_from_set(doc, gi_set, [])
 | |
| 
 | |
| def __find_all_elements_from_set(doc, gi_set, nodes):
 | |
|     if doc.nodeName in gi_set:
 | |
|         nodes.append(doc)
 | |
|     for child in doc.childNodes:
 | |
|         if child.nodeType == ELEMENT:
 | |
|             __find_all_elements_from_set(child, gi_set, nodes)
 | |
|     return nodes
 | |
| 
 | |
| 
 | |
| def simplify(doc, fragment):
 | |
|     # Try to rationalize the document a bit, since these things are simply
 | |
|     # not valid SGML/XML documents as they stand, and need a little work.
 | |
|     documentclass = "document"
 | |
|     inputs = []
 | |
|     node = extract_first_element(fragment, "documentclass")
 | |
|     if node is not None:
 | |
|         documentclass = node.getAttribute("classname")
 | |
|     node = extract_first_element(fragment, "title")
 | |
|     if node is not None:
 | |
|         inputs.append(node)
 | |
|     # update the name of the root element
 | |
|     node = get_first_element(fragment, "document")
 | |
|     if node is not None:
 | |
|         set_tagName(node, documentclass)
 | |
|     while 1:
 | |
|         node = extract_first_element(fragment, "input")
 | |
|         if node is None:
 | |
|             break
 | |
|         inputs.append(node)
 | |
|     if inputs:
 | |
|         docelem = get_documentElement(fragment)
 | |
|         inputs.reverse()
 | |
|         for node in inputs:
 | |
|             text = doc.createTextNode("\n")
 | |
|             docelem.insertBefore(text, docelem.firstChild)
 | |
|             docelem.insertBefore(node, text)
 | |
|         docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
 | |
|     while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
 | |
|         fragment.removeChild(fragment.firstChild)
 | |
| 
 | |
| 
 | |
| def cleanup_root_text(doc):
 | |
|     discards = []
 | |
|     skip = 0
 | |
|     for n in doc.childNodes:
 | |
|         prevskip = skip
 | |
|         skip = 0
 | |
|         if n.nodeType == TEXT and not prevskip:
 | |
|             discards.append(n)
 | |
|         elif n.nodeName == "COMMENT":
 | |
|             skip = 1
 | |
|     for node in discards:
 | |
|         doc.removeChild(node)
 | |
| 
 | |
| 
 | |
| DESCRIPTOR_ELEMENTS = (
 | |
|     "cfuncdesc", "cvardesc", "ctypedesc",
 | |
|     "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
 | |
|     "excdesc", "funcdesc", "funcdescni", "opcodedesc",
 | |
|     "datadesc", "datadescni",
 | |
|     )
 | |
| 
 | |
| def fixup_descriptors(doc, fragment):
 | |
|     sections = find_all_elements(fragment, "section")
 | |
|     for section in sections:
 | |
|         find_and_fix_descriptors(doc, section)
 | |
| 
 | |
| 
 | |
| def find_and_fix_descriptors(doc, container):
 | |
|     children = container.childNodes
 | |
|     for child in children:
 | |
|         if child.nodeType == ELEMENT:
 | |
|             tagName = child.tagName
 | |
|             if tagName in DESCRIPTOR_ELEMENTS:
 | |
|                 rewrite_descriptor(doc, child)
 | |
|             elif tagName == "subsection":
 | |
|                 find_and_fix_descriptors(doc, child)
 | |
| 
 | |
| 
 | |
| def rewrite_descriptor(doc, descriptor):
 | |
|     #
 | |
|     # Do these things:
 | |
|     #   1. Add an "index='no'" attribute to the element if the tagName
 | |
|     #      ends in 'ni', removing the 'ni' from the name.
 | |
|     #   2. Create a <signature> from the name attribute
 | |
|     #   2a.Create an <args> if it appears to be available.
 | |
|     #   3. Create additional <signature>s from <*line{,ni}> elements,
 | |
|     #      if found.
 | |
|     #   4. If a <versionadded> is found, move it to an attribute on the
 | |
|     #      descriptor.
 | |
|     #   5. Move remaining child nodes to a <description> element.
 | |
|     #   6. Put it back together.
 | |
|     #
 | |
|     # 1.
 | |
|     descname = descriptor.tagName
 | |
|     index = 1
 | |
|     if descname[-2:] == "ni":
 | |
|         descname = descname[:-2]
 | |
|         descriptor.setAttribute("index", "no")
 | |
|         set_tagName(descriptor, descname)
 | |
|         index = 0
 | |
|     desctype = descname[:-4] # remove 'desc'
 | |
|     linename = desctype + "line"
 | |
|     if not index:
 | |
|         linename = linename + "ni"
 | |
|     # 2.
 | |
|     signature = doc.createElement("signature")
 | |
|     name = doc.createElement("name")
 | |
|     signature.appendChild(doc.createTextNode("\n    "))
 | |
|     signature.appendChild(name)
 | |
|     name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
 | |
|     descriptor.removeAttribute("name")
 | |
|     # 2a.
 | |
|     if descriptor.hasAttribute("var"):
 | |
|         if descname != "opcodedesc":
 | |
|             raise RuntimeError, \
 | |
|                   "got 'var' attribute on descriptor other than opcodedesc"
 | |
|         variable = descriptor.getAttribute("var")
 | |
|         if variable:
 | |
|             args = doc.createElement("args")
 | |
|             args.appendChild(doc.createTextNode(variable))
 | |
|             signature.appendChild(doc.createTextNode("\n    "))
 | |
|             signature.appendChild(args)
 | |
|         descriptor.removeAttribute("var")
 | |
|     newchildren = [signature]
 | |
|     children = descriptor.childNodes
 | |
|     pos = skip_leading_nodes(children)
 | |
|     if pos < len(children):
 | |
|         child = children[pos]
 | |
|         if child.nodeName == "args":
 | |
|             # move <args> to <signature>, or remove if empty:
 | |
|             child.parentNode.removeChild(child)
 | |
|             if len(child.childNodes):
 | |
|                 signature.appendChild(doc.createTextNode("\n    "))
 | |
|                 signature.appendChild(child)
 | |
|     signature.appendChild(doc.createTextNode("\n  "))
 | |
|     # 3, 4.
 | |
|     pos = skip_leading_nodes(children, pos)
 | |
|     while pos < len(children) \
 | |
|           and children[pos].nodeName in (linename, "versionadded"):
 | |
|         if children[pos].tagName == linename:
 | |
|             # this is really a supplemental signature, create <signature>
 | |
|             oldchild = children[pos].cloneNode(1)
 | |
|             try:
 | |
|                 sig = methodline_to_signature(doc, children[pos])
 | |
|             except KeyError:
 | |
|                 print oldchild.toxml()
 | |
|                 raise
 | |
|             newchildren.append(sig)
 | |
|         else:
 | |
|             # <versionadded added=...>
 | |
|             descriptor.setAttribute(
 | |
|                 "added", children[pos].getAttribute("version"))
 | |
|         pos = skip_leading_nodes(children, pos + 1)
 | |
|     # 5.
 | |
|     description = doc.createElement("description")
 | |
|     description.appendChild(doc.createTextNode("\n"))
 | |
|     newchildren.append(description)
 | |
|     move_children(descriptor, description, pos)
 | |
|     last = description.childNodes[-1]
 | |
|     if last.nodeType == TEXT:
 | |
|         last.data = string.rstrip(last.data) + "\n  "
 | |
|     # 6.
 | |
|     # should have nothing but whitespace and signature lines in <descriptor>;
 | |
|     # discard them
 | |
|     while descriptor.childNodes:
 | |
|         descriptor.removeChild(descriptor.childNodes[0])
 | |
|     for node in newchildren:
 | |
|         descriptor.appendChild(doc.createTextNode("\n  "))
 | |
|         descriptor.appendChild(node)
 | |
|     descriptor.appendChild(doc.createTextNode("\n"))
 | |
| 
 | |
| 
 | |
| def methodline_to_signature(doc, methodline):
 | |
|     signature = doc.createElement("signature")
 | |
|     signature.appendChild(doc.createTextNode("\n    "))
 | |
|     name = doc.createElement("name")
 | |
|     name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
 | |
|     methodline.removeAttribute("name")
 | |
|     signature.appendChild(name)
 | |
|     if len(methodline.childNodes):
 | |
|         args = doc.createElement("args")
 | |
|         signature.appendChild(doc.createTextNode("\n    "))
 | |
|         signature.appendChild(args)
 | |
|         move_children(methodline, args)
 | |
|     signature.appendChild(doc.createTextNode("\n  "))
 | |
|     return signature
 | |
| 
 | |
| 
 | |
| def move_children(origin, dest, start=0):
 | |
|     children = origin.childNodes
 | |
|     while start < len(children):
 | |
|         node = children[start]
 | |
|         origin.removeChild(node)
 | |
|         dest.appendChild(node)
 | |
| 
 | |
| 
 | |
| def handle_appendix(doc, fragment):
 | |
|     # must be called after simplfy() if document is multi-rooted to begin with
 | |
|     docelem = get_documentElement(fragment)
 | |
|     toplevel = docelem.tagName == "manual" and "chapter" or "section"
 | |
|     appendices = 0
 | |
|     nodes = []
 | |
|     for node in docelem.childNodes:
 | |
|         if appendices:
 | |
|             nodes.append(node)
 | |
|         elif node.nodeType == ELEMENT:
 | |
|             appnodes = node.getElementsByTagName("appendix")
 | |
|             if appnodes:
 | |
|                 appendices = 1
 | |
|                 parent = appnodes[0].parentNode
 | |
|                 parent.removeChild(appnodes[0])
 | |
|                 parent.normalize()
 | |
|     if nodes:
 | |
|         map(docelem.removeChild, nodes)
 | |
|         docelem.appendChild(doc.createTextNode("\n\n\n"))
 | |
|         back = doc.createElement("back-matter")
 | |
|         docelem.appendChild(back)
 | |
|         back.appendChild(doc.createTextNode("\n"))
 | |
|         while nodes and nodes[0].nodeType == TEXT \
 | |
|               and not string.strip(nodes[0].data):
 | |
|             del nodes[0]
 | |
|         map(back.appendChild, nodes)
 | |
|         docelem.appendChild(doc.createTextNode("\n"))
 | |
| 
 | |
| 
 | |
| def handle_labels(doc, fragment):
 | |
|     for label in find_all_elements(fragment, "label"):
 | |
|         id = label.getAttribute("id")
 | |
|         if not id:
 | |
|             continue
 | |
|         parent = label.parentNode
 | |
|         parentTagName = parent.tagName
 | |
|         if parentTagName == "title":
 | |
|             parent.parentNode.setAttribute("id", id)
 | |
|         else:
 | |
|             parent.setAttribute("id", id)
 | |
|         # now, remove <label id="..."/> from parent:
 | |
|         parent.removeChild(label)
 | |
|         if parentTagName == "title":
 | |
|             parent.normalize()
 | |
|             children = parent.childNodes
 | |
|             if children[-1].nodeType == TEXT:
 | |
|                 children[-1].data = string.rstrip(children[-1].data)
 | |
| 
 | |
| 
 | |
| def fixup_trailing_whitespace(doc, wsmap):
 | |
|     queue = [doc]
 | |
|     while queue:
 | |
|         node = queue[0]
 | |
|         del queue[0]
 | |
|         if wsmap.has_key(node.nodeName):
 | |
|             ws = wsmap[node.tagName]
 | |
|             children = node.childNodes
 | |
|             children.reverse()
 | |
|             if children[0].nodeType == TEXT:
 | |
|                 data = string.rstrip(children[0].data) + ws
 | |
|                 children[0].data = data
 | |
|             children.reverse()
 | |
|             # hack to get the title in place:
 | |
|             if node.tagName == "title" \
 | |
|                and node.parentNode.firstChild.nodeType == ELEMENT:
 | |
|                 node.parentNode.insertBefore(doc.createText("\n  "),
 | |
|                                              node.parentNode.firstChild)
 | |
|         for child in node.childNodes:
 | |
|             if child.nodeType == ELEMENT:
 | |
|                 queue.append(child)
 | |
| 
 | |
| 
 | |
| def normalize(doc):
 | |
|     for node in doc.childNodes:
 | |
|         if node.nodeType == ELEMENT:
 | |
|             node.normalize()
 | |
| 
 | |
| 
 | |
| def cleanup_trailing_parens(doc, element_names):
 | |
|     d = {}
 | |
|     for gi in element_names:
 | |
|         d[gi] = gi
 | |
|     rewrite_element = d.has_key
 | |
|     queue = []
 | |
|     for node in doc.childNodes:
 | |
|         if node.nodeType == ELEMENT:
 | |
|             queue.append(node)
 | |
|     while queue:
 | |
|         node = queue[0]
 | |
|         del queue[0]
 | |
|         if rewrite_element(node.tagName):
 | |
|             children = node.childNodes
 | |
|             if len(children) == 1 \
 | |
|                and children[0].nodeType == TEXT:
 | |
|                 data = children[0].data
 | |
|                 if data[-2:] == "()":
 | |
|                     children[0].data = data[:-2]
 | |
|         else:
 | |
|             for child in node.childNodes:
 | |
|                 if child.nodeType == ELEMENT:
 | |
|                     queue.append(child)
 | |
| 
 | |
| 
 | |
| def contents_match(left, right):
 | |
|     left_children = left.childNodes
 | |
|     right_children = right.childNodes
 | |
|     if len(left_children) != len(right_children):
 | |
|         return 0
 | |
|     for l, r in map(None, left_children, right_children):
 | |
|         nodeType = l.nodeType
 | |
|         if nodeType != r.nodeType:
 | |
|             return 0
 | |
|         if nodeType == ELEMENT:
 | |
|             if l.tagName != r.tagName:
 | |
|                 return 0
 | |
|             # should check attributes, but that's not a problem here
 | |
|             if not contents_match(l, r):
 | |
|                 return 0
 | |
|         elif nodeType == TEXT:
 | |
|             if l.data != r.data:
 | |
|                 return 0
 | |
|         else:
 | |
|             # not quite right, but good enough
 | |
|             return 0
 | |
|     return 1
 | |
| 
 | |
| 
 | |
| def create_module_info(doc, section):
 | |
|     # Heavy.
 | |
|     node = extract_first_element(section, "modulesynopsis")
 | |
|     if node is None:
 | |
|         return
 | |
|     set_tagName(node, "synopsis")
 | |
|     lastchild = node.childNodes[-1]
 | |
|     if lastchild.nodeType == TEXT \
 | |
|        and lastchild.data[-1:] == ".":
 | |
|         lastchild.data = lastchild.data[:-1]
 | |
|     modauthor = extract_first_element(section, "moduleauthor")
 | |
|     if modauthor:
 | |
|         set_tagName(modauthor, "author")
 | |
|         modauthor.appendChild(doc.createTextNode(
 | |
|             modauthor.getAttribute("name")))
 | |
|         modauthor.removeAttribute("name")
 | |
|     platform = extract_first_element(section, "platform")
 | |
|     if section.tagName == "section":
 | |
|         modinfo_pos = 2
 | |
|         modinfo = doc.createElement("moduleinfo")
 | |
|         moddecl = extract_first_element(section, "declaremodule")
 | |
|         name = None
 | |
|         if moddecl:
 | |
|             modinfo.appendChild(doc.createTextNode("\n    "))
 | |
|             name = moddecl.attributes["name"].value
 | |
|             namenode = doc.createElement("name")
 | |
|             namenode.appendChild(doc.createTextNode(name))
 | |
|             modinfo.appendChild(namenode)
 | |
|             type = moddecl.attributes.get("type")
 | |
|             if type:
 | |
|                 type = type.value
 | |
|                 modinfo.appendChild(doc.createTextNode("\n    "))
 | |
|                 typenode = doc.createElement("type")
 | |
|                 typenode.appendChild(doc.createTextNode(type))
 | |
|                 modinfo.appendChild(typenode)
 | |
|         versionadded = extract_first_element(section, "versionadded")
 | |
|         if versionadded:
 | |
|             modinfo.setAttribute("added", versionadded.getAttribute("version"))
 | |
|         title = get_first_element(section, "title")
 | |
|         if title:
 | |
|             children = title.childNodes
 | |
|             if len(children) >= 2 \
 | |
|                and children[0].nodeName == "module" \
 | |
|                and children[0].childNodes[0].data == name:
 | |
|                 # this is it; morph the <title> into <short-synopsis>
 | |
|                 first_data = children[1]
 | |
|                 if first_data.data[:4] == " ---":
 | |
|                     first_data.data = string.lstrip(first_data.data[4:])
 | |
|                 set_tagName(title, "short-synopsis")
 | |
|                 if children[-1].nodeType == TEXT \
 | |
|                    and children[-1].data[-1:] == ".":
 | |
|                     children[-1].data = children[-1].data[:-1]
 | |
|                 section.removeChild(title)
 | |
|                 section.removeChild(section.childNodes[0])
 | |
|                 title.removeChild(children[0])
 | |
|                 modinfo_pos = 0
 | |
|             else:
 | |
|                 ewrite("module name in title doesn't match"
 | |
|                        " <declaremodule/>; no <short-synopsis/>\n")
 | |
|         else:
 | |
|             ewrite("Unexpected condition: <section/> without <title/>\n")
 | |
|         modinfo.appendChild(doc.createTextNode("\n    "))
 | |
|         modinfo.appendChild(node)
 | |
|         if title and not contents_match(title, node):
 | |
|             # The short synopsis is actually different,
 | |
|             # and needs to be stored:
 | |
|             modinfo.appendChild(doc.createTextNode("\n    "))
 | |
|             modinfo.appendChild(title)
 | |
|         if modauthor:
 | |
|             modinfo.appendChild(doc.createTextNode("\n    "))
 | |
|             modinfo.appendChild(modauthor)
 | |
|         if platform:
 | |
|             modinfo.appendChild(doc.createTextNode("\n    "))
 | |
|             modinfo.appendChild(platform)
 | |
|         modinfo.appendChild(doc.createTextNode("\n  "))
 | |
|         section.insertBefore(modinfo, section.childNodes[modinfo_pos])
 | |
|         section.insertBefore(doc.createTextNode("\n  "), modinfo)
 | |
|         #
 | |
|         # The rest of this removes extra newlines from where we cut out
 | |
|         # a lot of elements.  A lot of code for minimal value, but keeps
 | |
|         # keeps the generated *ML from being too funny looking.
 | |
|         #
 | |
|         section.normalize()
 | |
|         children = section.childNodes
 | |
|         for i in range(len(children)):
 | |
|             node = children[i]
 | |
|             if node.nodeName == "moduleinfo":
 | |
|                 nextnode = children[i+1]
 | |
|                 if nextnode.nodeType == TEXT:
 | |
|                     data = nextnode.data
 | |
|                     if len(string.lstrip(data)) < (len(data) - 4):
 | |
|                         nextnode.data = "\n\n\n" + string.lstrip(data)
 | |
| 
 | |
| 
 | |
| def cleanup_synopses(doc, fragment):
 | |
|     for node in find_all_elements(fragment, "section"):
 | |
|         create_module_info(doc, node)
 | |
| 
 | |
| 
 | |
| def fixup_table_structures(doc, fragment):
 | |
|     for table in find_all_elements(fragment, "table"):
 | |
|         fixup_table(doc, table)
 | |
| 
 | |
| 
 | |
| def fixup_table(doc, table):
 | |
|     # create the table head
 | |
|     thead = doc.createElement("thead")
 | |
|     row = doc.createElement("row")
 | |
|     move_elements_by_name(doc, table, row, "entry")
 | |
|     thead.appendChild(doc.createTextNode("\n    "))
 | |
|     thead.appendChild(row)
 | |
|     thead.appendChild(doc.createTextNode("\n    "))
 | |
|     # create the table body
 | |
|     tbody = doc.createElement("tbody")
 | |
|     prev_row = None
 | |
|     last_was_hline = 0
 | |
|     children = table.childNodes
 | |
|     for child in children:
 | |
|         if child.nodeType == ELEMENT:
 | |
|             tagName = child.tagName
 | |
|             if tagName == "hline" and prev_row is not None:
 | |
|                 prev_row.setAttribute("rowsep", "1")
 | |
|             elif tagName == "row":
 | |
|                 prev_row = child
 | |
|     # save the rows:
 | |
|     tbody.appendChild(doc.createTextNode("\n    "))
 | |
|     move_elements_by_name(doc, table, tbody, "row", sep="\n    ")
 | |
|     # and toss the rest:
 | |
|     while children:
 | |
|         child = children[0]
 | |
|         nodeType = child.nodeType
 | |
|         if nodeType == TEXT:
 | |
|             if string.strip(child.data):
 | |
|                 raise ConversionError("unexpected free data in <%s>: %r"
 | |
|                                       % (table.tagName, child.data))
 | |
|             table.removeChild(child)
 | |
|             continue
 | |
|         if nodeType == ELEMENT:
 | |
|             if child.tagName != "hline":
 | |
|                 raise ConversionError(
 | |
|                     "unexpected <%s> in table" % child.tagName)
 | |
|             table.removeChild(child)
 | |
|             continue
 | |
|         raise ConversionError(
 | |
|             "unexpected %s node in table" % child.__class__.__name__)
 | |
|     # nothing left in the <table>; add the <thead> and <tbody>
 | |
|     tgroup = doc.createElement("tgroup")
 | |
|     tgroup.appendChild(doc.createTextNode("\n  "))
 | |
|     tgroup.appendChild(thead)
 | |
|     tgroup.appendChild(doc.createTextNode("\n  "))
 | |
|     tgroup.appendChild(tbody)
 | |
|     tgroup.appendChild(doc.createTextNode("\n  "))
 | |
|     table.appendChild(tgroup)
 | |
|     # now make the <entry>s look nice:
 | |
|     for row in table.getElementsByTagName("row"):
 | |
|         fixup_row(doc, row)
 | |
| 
 | |
| 
 | |
| def fixup_row(doc, row):
 | |
|     entries = []
 | |
|     map(entries.append, row.childNodes[1:])
 | |
|     for entry in entries:
 | |
|         row.insertBefore(doc.createTextNode("\n         "), entry)
 | |
| #    row.appendChild(doc.createTextNode("\n      "))
 | |
| 
 | |
| 
 | |
| def move_elements_by_name(doc, source, dest, name, sep=None):
 | |
|     nodes = []
 | |
|     for child in source.childNodes:
 | |
|         if child.nodeName == name:
 | |
|             nodes.append(child)
 | |
|     for node in nodes:
 | |
|         source.removeChild(node)
 | |
|         dest.appendChild(node)
 | |
|         if sep:
 | |
|             dest.appendChild(doc.createTextNode(sep))
 | |
| 
 | |
| 
 | |
| RECURSE_INTO_PARA_CONTAINERS = (
 | |
|     "chapter", "abstract", "enumerate",
 | |
|     "section", "subsection", "subsubsection",
 | |
|     "paragraph", "subparagraph", "back-matter",
 | |
|     "howto", "manual",
 | |
|     "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
 | |
|     "definitionlist", "definition",
 | |
|     )
 | |
| 
 | |
| PARA_LEVEL_ELEMENTS = (
 | |
|     "moduleinfo", "title", "verbatim", "enumerate", "item",
 | |
|     "interpreter-session", "back-matter", "interactive-session",
 | |
|     "opcodedesc", "classdesc", "datadesc",
 | |
|     "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
 | |
|     "funcdescni", "methoddescni", "excdescni",
 | |
|     "tableii", "tableiii", "tableiv", "localmoduletable",
 | |
|     "sectionauthor", "seealso", "itemize",
 | |
|     # include <para>, so we can just do it again to get subsequent paras:
 | |
|     PARA_ELEMENT,
 | |
|     )
 | |
| 
 | |
| PARA_LEVEL_PRECEEDERS = (
 | |
|     "setindexsubitem", "author",
 | |
|     "stindex", "obindex", "COMMENT", "label", "input", "title",
 | |
|     "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
 | |
|     "moduleauthor", "indexterm", "leader",
 | |
|     )
 | |
| 
 | |
| 
 | |
| def fixup_paras(doc, fragment):
 | |
|     for child in fragment.childNodes:
 | |
|         if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
 | |
|             fixup_paras_helper(doc, child)
 | |
|     descriptions = find_all_elements(fragment, "description")
 | |
|     for description in descriptions:
 | |
|         fixup_paras_helper(doc, description)
 | |
| 
 | |
| 
 | |
| def fixup_paras_helper(doc, container, depth=0):
 | |
|     # document is already normalized
 | |
|     children = container.childNodes
 | |
|     start = skip_leading_nodes(children)
 | |
|     while len(children) > start:
 | |
|         if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
 | |
|             # Something to recurse into:
 | |
|             fixup_paras_helper(doc, children[start])
 | |
|         else:
 | |
|             # Paragraph material:
 | |
|             build_para(doc, container, start, len(children))
 | |
|             if DEBUG_PARA_FIXER and depth == 10:
 | |
|                 sys.exit(1)
 | |
|         start = skip_leading_nodes(children, start + 1)
 | |
| 
 | |
| 
 | |
| def build_para(doc, parent, start, i):
 | |
|     children = parent.childNodes
 | |
|     after = start + 1
 | |
|     have_last = 0
 | |
|     BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
 | |
|     # Collect all children until \n\n+ is found in a text node or a
 | |
|     # member of BREAK_ELEMENTS is found.
 | |
|     for j in range(start, i):
 | |
|         after = j + 1
 | |
|         child = children[j]
 | |
|         nodeType = child.nodeType
 | |
|         if nodeType == ELEMENT:
 | |
|             if child.tagName in BREAK_ELEMENTS:
 | |
|                 after = j
 | |
|                 break
 | |
|         elif nodeType == TEXT:
 | |
|             pos = string.find(child.data, "\n\n")
 | |
|             if pos == 0:
 | |
|                 after = j
 | |
|                 break
 | |
|             if pos >= 1:
 | |
|                 child.splitText(pos)
 | |
|                 break
 | |
|     else:
 | |
|         have_last = 1
 | |
|     if (start + 1) > after:
 | |
|         raise ConversionError(
 | |
|             "build_para() could not identify content to turn into a paragraph")
 | |
|     if children[after - 1].nodeType == TEXT:
 | |
|         # we may need to split off trailing white space:
 | |
|         child = children[after - 1]
 | |
|         data = child.data
 | |
|         if string.rstrip(data) != data:
 | |
|             have_last = 0
 | |
|             child.splitText(len(string.rstrip(data)))
 | |
|     para = doc.createElement(PARA_ELEMENT)
 | |
|     prev = None
 | |
|     indexes = range(start, after)
 | |
|     indexes.reverse()
 | |
|     for j in indexes:
 | |
|         node = parent.childNodes[j]
 | |
|         parent.removeChild(node)
 | |
|         para.insertBefore(node, prev)
 | |
|         prev = node
 | |
|     if have_last:
 | |
|         parent.appendChild(para)
 | |
|         parent.appendChild(doc.createTextNode("\n\n"))
 | |
|         return len(parent.childNodes)
 | |
|     else:
 | |
|         nextnode = parent.childNodes[start]
 | |
|         if nextnode.nodeType == TEXT:
 | |
|             if nextnode.data and nextnode.data[0] != "\n":
 | |
|                 nextnode.data = "\n" + nextnode.data
 | |
|         else:
 | |
|             newnode = doc.createTextNode("\n")
 | |
|             parent.insertBefore(newnode, nextnode)
 | |
|             nextnode = newnode
 | |
|             start = start + 1
 | |
|         parent.insertBefore(para, nextnode)
 | |
|         return start + 1
 | |
| 
 | |
| 
 | |
| def skip_leading_nodes(children, start=0):
 | |
|     """Return index into children of a node at which paragraph building should
 | |
|     begin or a recursive call to fixup_paras_helper() should be made (for
 | |
|     subsections, etc.).
 | |
| 
 | |
|     When the return value >= len(children), we've built all the paras we can
 | |
|     from this list of children.
 | |
|     """
 | |
|     i = len(children)
 | |
|     while i > start:
 | |
|         # skip over leading comments and whitespace:
 | |
|         child = children[start]
 | |
|         nodeType = child.nodeType
 | |
|         if nodeType == TEXT:
 | |
|             data = child.data
 | |
|             shortened = string.lstrip(data)
 | |
|             if shortened:
 | |
|                 if data != shortened:
 | |
|                     # break into two nodes: whitespace and non-whitespace
 | |
|                     child.splitText(len(data) - len(shortened))
 | |
|                     return start + 1
 | |
|                 return start
 | |
|             # all whitespace, just skip
 | |
|         elif nodeType == ELEMENT:
 | |
|             tagName = child.tagName
 | |
|             if tagName in RECURSE_INTO_PARA_CONTAINERS:
 | |
|                 return start
 | |
|             if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
 | |
|                 return start
 | |
|         start = start + 1
 | |
|     return start
 | |
| 
 | |
| 
 | |
| def fixup_rfc_references(doc, fragment):
 | |
|     for rfcnode in find_all_elements(fragment, "rfc"):
 | |
|         rfcnode.appendChild(doc.createTextNode(
 | |
|             "RFC " + rfcnode.getAttribute("num")))
 | |
| 
 | |
| 
 | |
| def fixup_signatures(doc, fragment):
 | |
|     for child in fragment.childNodes:
 | |
|         if child.nodeType == ELEMENT:
 | |
|             args = child.getElementsByTagName("args")
 | |
|             for arg in args:
 | |
|                 fixup_args(doc, arg)
 | |
|                 arg.normalize()
 | |
|             args = child.getElementsByTagName("constructor-args")
 | |
|             for arg in args:
 | |
|                 fixup_args(doc, arg)
 | |
|                 arg.normalize()
 | |
| 
 | |
| 
 | |
| def fixup_args(doc, arglist):
 | |
|     for child in arglist.childNodes:
 | |
|         if child.nodeName == "optional":
 | |
|             # found it; fix and return
 | |
|             arglist.insertBefore(doc.createTextNode("["), child)
 | |
|             optkids = child.childNodes
 | |
|             while optkids:
 | |
|                 k = optkids[0]
 | |
|                 child.removeChild(k)
 | |
|                 arglist.insertBefore(k, child)
 | |
|             arglist.insertBefore(doc.createTextNode("]"), child)
 | |
|             arglist.removeChild(child)
 | |
|             return fixup_args(doc, arglist)
 | |
| 
 | |
| 
 | |
| def fixup_sectionauthors(doc, fragment):
 | |
|     for sectauth in find_all_elements(fragment, "sectionauthor"):
 | |
|         section = sectauth.parentNode
 | |
|         section.removeChild(sectauth)
 | |
|         set_tagName(sectauth, "author")
 | |
|         sectauth.appendChild(doc.createTextNode(
 | |
|             sectauth.getAttribute("name")))
 | |
|         sectauth.removeAttribute("name")
 | |
|         after = section.childNodes[2]
 | |
|         title = section.childNodes[1]
 | |
|         if title.nodeName != "title":
 | |
|             after = section.childNodes[0]
 | |
|         section.insertBefore(doc.createTextNode("\n  "), after)
 | |
|         section.insertBefore(sectauth, after)
 | |
| 
 | |
| 
 | |
| def fixup_verbatims(doc):
 | |
|     for verbatim in find_all_elements(doc, "verbatim"):
 | |
|         child = verbatim.childNodes[0]
 | |
|         if child.nodeType == TEXT \
 | |
|            and string.lstrip(child.data)[:3] == ">>>":
 | |
|             set_tagName(verbatim, "interactive-session")
 | |
| 
 | |
| 
 | |
| def add_node_ids(fragment, counter=0):
 | |
|     fragment.node_id = counter
 | |
|     for node in fragment.childNodes:
 | |
|         counter = counter + 1
 | |
|         if node.nodeType == ELEMENT:
 | |
|             counter = add_node_ids(node, counter)
 | |
|         else:
 | |
|             node.node_id = counter
 | |
|     return counter + 1
 | |
| 
 | |
| 
 | |
| REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
 | |
|                         'refexmodindex', 'refstmodindex')
 | |
| 
 | |
| def fixup_refmodindexes(fragment):
 | |
|     # Locate <ref*modindex>...</> co-located with <module>...</>, and
 | |
|     # remove the <ref*modindex>, replacing it with index=index on the
 | |
|     # <module> element.
 | |
|     nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
 | |
|     d = {}
 | |
|     for node in nodes:
 | |
|         parent = node.parentNode
 | |
|         d[parent.node_id] = parent
 | |
|     del nodes
 | |
|     map(fixup_refmodindexes_chunk, d.values())
 | |
| 
 | |
| 
 | |
| def fixup_refmodindexes_chunk(container):
 | |
|     # node is probably a <para>; let's see how often it isn't:
 | |
|     if container.tagName != PARA_ELEMENT:
 | |
|         bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
 | |
|     module_entries = find_all_elements(container, "module")
 | |
|     if not module_entries:
 | |
|         return
 | |
|     index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
 | |
|     removes = []
 | |
|     for entry in index_entries:
 | |
|         children = entry.childNodes
 | |
|         if len(children) != 0:
 | |
|             bwrite("--- unexpected number of children for %s node:\n"
 | |
|                    % entry.tagName)
 | |
|             ewrite(entry.toxml() + "\n")
 | |
|             continue
 | |
|         found = 0
 | |
|         module_name = entry.getAttribute("module")
 | |
|         for node in module_entries:
 | |
|             if len(node.childNodes) != 1:
 | |
|                 continue
 | |
|             this_name = node.childNodes[0].data
 | |
|             if this_name == module_name:
 | |
|                 found = 1
 | |
|                 node.setAttribute("index", "yes")
 | |
|         if found:
 | |
|             removes.append(entry)
 | |
|     for node in removes:
 | |
|         container.removeChild(node)
 | |
| 
 | |
| 
 | |
| def fixup_bifuncindexes(fragment):
 | |
|     nodes = find_all_elements(fragment, 'bifuncindex')
 | |
|     d = {}
 | |
|     # make sure that each parent is only processed once:
 | |
|     for node in nodes:
 | |
|         parent = node.parentNode
 | |
|         d[parent.node_id] = parent
 | |
|     del nodes
 | |
|     map(fixup_bifuncindexes_chunk, d.values())
 | |
| 
 | |
| 
 | |
| def fixup_bifuncindexes_chunk(container):
 | |
|     removes = []
 | |
|     entries = find_all_child_elements(container, "bifuncindex")
 | |
|     function_entries = find_all_child_elements(container, "function")
 | |
|     for entry in entries:
 | |
|         function_name = entry.getAttribute("name")
 | |
|         found = 0
 | |
|         for func_entry in function_entries:
 | |
|             t2 = func_entry.childNodes[0].data
 | |
|             if t2[-2:] != "()":
 | |
|                 continue
 | |
|             t2 = t2[:-2]
 | |
|             if t2 == function_name:
 | |
|                 func_entry.setAttribute("index", "yes")
 | |
|                 func_entry.setAttribute("module", "__builtin__")
 | |
|                 if not found:
 | |
|                     found = 1
 | |
|                     removes.append(entry)
 | |
|     for entry in removes:
 | |
|         container.removeChild(entry)
 | |
| 
 | |
| 
 | |
| def join_adjacent_elements(container, gi):
 | |
|     queue = [container]
 | |
|     while queue:
 | |
|         parent = queue.pop()
 | |
|         i = 0
 | |
|         children = parent.childNodes
 | |
|         nchildren = len(children)
 | |
|         while i < (nchildren - 1):
 | |
|             child = children[i]
 | |
|             if child.nodeName == gi:
 | |
|                 if children[i+1].nodeName == gi:
 | |
|                     ewrite("--- merging two <%s/> elements\n" % gi)
 | |
|                     child = children[i]
 | |
|                     nextchild = children[i+1]
 | |
|                     nextchildren = nextchild.childNodes
 | |
|                     while len(nextchildren):
 | |
|                         node = nextchildren[0]
 | |
|                         nextchild.removeChild(node)
 | |
|                         child.appendChild(node)
 | |
|                     parent.removeChild(nextchild)
 | |
|                     continue
 | |
|             if child.nodeType == ELEMENT:
 | |
|                 queue.append(child)
 | |
|             i = i + 1
 | |
| 
 | |
| 
 | |
| _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
 | |
| 
 | |
| def write_esis(doc, ofp, knownempty):
 | |
|     for node in doc.childNodes:
 | |
|         nodeType = node.nodeType
 | |
|         if nodeType == ELEMENT:
 | |
|             gi = node.tagName
 | |
|             if knownempty(gi):
 | |
|                 if node.hasChildNodes():
 | |
|                     raise ValueError, \
 | |
|                           "declared-empty node <%s> has children" % gi
 | |
|                 ofp.write("e\n")
 | |
|             for k, value in node.attributes.items():
 | |
|                 if _token_rx.match(value):
 | |
|                     dtype = "TOKEN"
 | |
|                 else:
 | |
|                     dtype = "CDATA"
 | |
|                 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
 | |
|             ofp.write("(%s\n" % gi)
 | |
|             write_esis(node, ofp, knownempty)
 | |
|             ofp.write(")%s\n" % gi)
 | |
|         elif nodeType == TEXT:
 | |
|             ofp.write("-%s\n" % esistools.encode(node.data))
 | |
|         elif nodeType == ENTITY_REFERENCE:
 | |
|             ofp.write("&%s\n" % node.nodeName)
 | |
|         else:
 | |
|             raise RuntimeError, "unsupported node type: %s" % nodeType
 | |
| 
 | |
| 
 | |
| def convert(ifp, ofp):
 | |
|     events = esistools.parse(ifp)
 | |
|     toktype, doc = events.getEvent()
 | |
|     fragment = doc.createDocumentFragment()
 | |
|     events.expandNode(fragment)
 | |
| 
 | |
|     normalize(fragment)
 | |
|     simplify(doc, fragment)
 | |
|     handle_labels(doc, fragment)
 | |
|     handle_appendix(doc, fragment)
 | |
|     fixup_trailing_whitespace(doc, {
 | |
|         "abstract": "\n",
 | |
|         "title": "",
 | |
|         "chapter": "\n\n",
 | |
|         "section": "\n\n",
 | |
|         "subsection": "\n\n",
 | |
|         "subsubsection": "\n\n",
 | |
|         "paragraph": "\n\n",
 | |
|         "subparagraph": "\n\n",
 | |
|         })
 | |
|     cleanup_root_text(doc)
 | |
|     cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
 | |
|     cleanup_synopses(doc, fragment)
 | |
|     fixup_descriptors(doc, fragment)
 | |
|     fixup_verbatims(fragment)
 | |
|     normalize(fragment)
 | |
|     fixup_paras(doc, fragment)
 | |
|     fixup_sectionauthors(doc, fragment)
 | |
|     fixup_table_structures(doc, fragment)
 | |
|     fixup_rfc_references(doc, fragment)
 | |
|     fixup_signatures(doc, fragment)
 | |
|     add_node_ids(fragment)
 | |
|     fixup_refmodindexes(fragment)
 | |
|     fixup_bifuncindexes(fragment)
 | |
|     # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
 | |
|     # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
 | |
|     join_adjacent_elements(fragment, "option")
 | |
|     #
 | |
|     d = {}
 | |
|     for gi in events.parser.get_empties():
 | |
|         d[gi] = gi
 | |
|     if d.has_key("author"):
 | |
|         del d["author"]
 | |
|     if d.has_key("rfc"):
 | |
|         del d["rfc"]
 | |
|     knownempty = d.has_key
 | |
|     #
 | |
|     try:
 | |
|         write_esis(fragment, ofp, knownempty)
 | |
|     except IOError, (err, msg):
 | |
|         # Ignore EPIPE; it just means that whoever we're writing to stopped
 | |
|         # reading.  The rest of the output would be ignored.  All other errors
 | |
|         # should still be reported,
 | |
|         if err != errno.EPIPE:
 | |
|             raise
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     if len(sys.argv) == 1:
 | |
|         ifp = sys.stdin
 | |
|         ofp = sys.stdout
 | |
|     elif len(sys.argv) == 2:
 | |
|         ifp = open(sys.argv[1])
 | |
|         ofp = sys.stdout
 | |
|     elif len(sys.argv) == 3:
 | |
|         ifp = open(sys.argv[1])
 | |
|         import StringIO
 | |
|         ofp = StringIO.StringIO()
 | |
|     else:
 | |
|         usage()
 | |
|         sys.exit(2)
 | |
|     convert(ifp, ofp)
 | |
|     if len(sys.argv) == 3:
 | |
|         fp = open(sys.argv[2], "w")
 | |
|         fp.write(ofp.getvalue())
 | |
|         fp.close()
 | |
|         ofp.close()
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | 
