mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			328 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			328 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env python
 | |
| 
 | |
| """Promote the IDs from <label/> elements to the enclosing section / chapter /
 | |
| whatever, then remove the <label/> elements.  This allows *ML style internal
 | |
| linking rather than the bogus LaTeX model.
 | |
| 
 | |
| Note that <label/>s in <title> elements are promoted two steps, since the
 | |
| <title> elements are artificially created from the section parameter, and the
 | |
| label really refers to the sectioning construct.
 | |
| """
 | |
| __version__ = '$Revision$'
 | |
| 
 | |
| 
 | |
| import errno
 | |
| import esistools
 | |
| import re
 | |
| import string
 | |
| import sys
 | |
| import xml.dom.core
 | |
| import xml.dom.esis_builder
 | |
| 
 | |
| 
 | |
| # Workaround to deal with invalid documents (multiple root elements).  This
 | |
| # does not indicate a bug in the DOM implementation.
 | |
| #
 | |
| def get_documentElement(self):
 | |
|     docelem = None
 | |
|     for n in self._node.children:
 | |
|         if n.type == xml.dom.core.ELEMENT:
 | |
|             docelem = xml.dom.core.Element(n, self, self)
 | |
|     return docelem
 | |
| 
 | |
| xml.dom.core.Document.get_documentElement = get_documentElement
 | |
| 
 | |
| 
 | |
| # Replace get_childNodes for the Document class; without this, children
 | |
| # accessed from the Document object via .childNodes (no matter how many
 | |
| # levels of access are used) will be given an ownerDocument of None.
 | |
| #
 | |
| def get_childNodes(self):
 | |
|     return xml.dom.core.NodeList(self._node.children, self, self)
 | |
| 
 | |
| xml.dom.core.Document.get_childNodes = get_childNodes
 | |
| 
 | |
| 
 | |
| def get_first_element(doc, gi):
 | |
|     for n in doc.childNodes:
 | |
|         if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
 | |
|             return n
 | |
| 
 | |
| def extract_first_element(doc, gi):
 | |
|     node = get_first_element(doc, gi)
 | |
|     if node is not None:
 | |
|         doc.removeChild(node)
 | |
|     return node
 | |
| 
 | |
| 
 | |
| def simplify(doc):
 | |
|     # Try to rationalize the document a bit, since these things are simply
 | |
|     # not valid SGML/XML documents as they stand, and need a little work.
 | |
|     documentclass = "document"
 | |
|     inputs = []
 | |
|     node = extract_first_element(doc, "documentclass")
 | |
|     if node is not None:
 | |
|         documentclass = node.getAttribute("classname")
 | |
|     node = extract_first_element(doc, "title")
 | |
|     if node is not None:
 | |
|         inputs.append(node)
 | |
|     # update the name of the root element
 | |
|     node = get_first_element(doc, "document")
 | |
|     if node is not None:
 | |
|         node._node.name = documentclass
 | |
|     while 1:
 | |
|         node = extract_first_element(doc, "input")
 | |
|         if node is None:
 | |
|             break
 | |
|         inputs.append(node)
 | |
|     if inputs:
 | |
|         docelem = doc.documentElement
 | |
|         inputs.reverse()
 | |
|         for node in inputs:
 | |
|             text = doc.createTextNode("\n")
 | |
|             docelem.insertBefore(text, docelem.firstChild)
 | |
|             docelem.insertBefore(node, text)
 | |
|         docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
 | |
|     while doc.firstChild.nodeType == xml.dom.core.TEXT:
 | |
|         doc.removeChild(doc.firstChild)
 | |
| 
 | |
| 
 | |
| def cleanup_root_text(doc):
 | |
|     discards = []
 | |
|     skip = 0
 | |
|     for n in doc.childNodes:
 | |
|         prevskip = skip
 | |
|         skip = 0
 | |
|         if n.nodeType == xml.dom.core.TEXT and not prevskip:
 | |
|             discards.append(n)
 | |
|         elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
 | |
|             skip = 1
 | |
|     for node in discards:
 | |
|         doc.removeChild(node)
 | |
| 
 | |
| 
 | |
| def rewrite_desc_entries(doc, argname_gi):
 | |
|     argnodes = doc.getElementsByTagName(argname_gi)
 | |
|     for node in argnodes:
 | |
|         parent = node.parentNode
 | |
|         nodes = []
 | |
|         for n in parent.childNodes:
 | |
|             if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
 | |
|                 nodes.append(n)
 | |
|         desc = doc.createElement("description")
 | |
|         for n in nodes:
 | |
|             parent.removeChild(n)
 | |
|             desc.appendChild(n)
 | |
|         if node.childNodes:
 | |
|             # keep the <args>...</args>, newline & indent
 | |
|             parent.insertBefore(doc.createText("\n  "), node)
 | |
|         else:
 | |
|             # no arguments, remove the <args/> node
 | |
|             parent.removeChild(node)
 | |
|         parent.appendChild(doc.createText("\n  "))
 | |
|         parent.appendChild(desc)
 | |
|         parent.appendChild(doc.createText("\n"))
 | |
| 
 | |
| def handle_args(doc):
 | |
|     rewrite_desc_entries(doc, "args")
 | |
|     rewrite_desc_entries(doc, "constructor-args")
 | |
| 
 | |
| 
 | |
| def handle_appendix(doc):
 | |
|     # must be called after simplfy() if document is multi-rooted to begin with
 | |
|     docelem = doc.documentElement
 | |
|     toplevel = docelem.tagName == "manual" and "chapter" or "section"
 | |
|     appendices = 0
 | |
|     nodes = []
 | |
|     for node in docelem.childNodes:
 | |
|         if appendices:
 | |
|             nodes.append(node)
 | |
|         elif node.nodeType == xml.dom.core.ELEMENT:
 | |
|             appnodes = node.getElementsByTagName("appendix")
 | |
|             if appnodes:
 | |
|                 appendices = 1
 | |
|                 parent = appnodes[0].parentNode
 | |
|                 parent.removeChild(appnodes[0])
 | |
|                 parent.normalize()
 | |
|     if nodes:
 | |
|         map(docelem.removeChild, nodes)
 | |
|         docelem.appendChild(doc.createTextNode("\n\n\n"))
 | |
|         back = doc.createElement("back-matter")
 | |
|         docelem.appendChild(back)
 | |
|         back.appendChild(doc.createTextNode("\n"))
 | |
|         while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
 | |
|               and not string.strip(nodes[0].data):
 | |
|             del nodes[0]
 | |
|         map(back.appendChild, nodes)
 | |
|         docelem.appendChild(doc.createTextNode("\n"))
 | |
| 
 | |
| 
 | |
| def handle_labels(doc):
 | |
|     labels = doc.getElementsByTagName("label")
 | |
|     for label in labels:
 | |
|         id = label.getAttribute("id")
 | |
|         if not id:
 | |
|             continue
 | |
|         parent = label.parentNode
 | |
|         if parent.tagName == "title":
 | |
|             parent.parentNode.setAttribute("id", id)
 | |
|         else:
 | |
|             parent.setAttribute("id", id)
 | |
|         # now, remove <label id="..."/> from parent:
 | |
|         parent.removeChild(label)
 | |
| 
 | |
| 
 | |
| def fixup_trailing_whitespace(doc, wsmap):
 | |
|     queue = [doc]
 | |
|     while queue:
 | |
|         node = queue[0]
 | |
|         del queue[0]
 | |
|         if node.nodeType == xml.dom.core.ELEMENT \
 | |
|            and wsmap.has_key(node.tagName):
 | |
|             ws = wsmap[node.tagName]
 | |
|             children = node.childNodes
 | |
|             children.reverse()
 | |
|             if children[0].nodeType == xml.dom.core.TEXT:
 | |
|                 data = string.rstrip(children[0].data) + ws
 | |
|                 children[0].data = data
 | |
|             children.reverse()
 | |
|             # hack to get the title in place:
 | |
|             if node.tagName == "title" \
 | |
|                and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
 | |
|                 node.parentNode.insertBefore(doc.createText("\n  "),
 | |
|                                              node.parentNode.firstChild)
 | |
|         for child in node.childNodes:
 | |
|             if child.nodeType == xml.dom.core.ELEMENT:
 | |
|                 queue.append(child)
 | |
| 
 | |
| 
 | |
| def normalize(doc):
 | |
|     for node in doc.childNodes:
 | |
|         if node.nodeType == xml.dom.core.ELEMENT:
 | |
|             node.normalize()
 | |
| 
 | |
| 
 | |
| def cleanup_trailing_parens(doc, element_names):
 | |
|     d = {}
 | |
|     for gi in element_names:
 | |
|         d[gi] = gi
 | |
|     rewrite_element = d.has_key
 | |
|     queue = []
 | |
|     for node in doc.childNodes:
 | |
|         if node.nodeType == xml.dom.core.ELEMENT:
 | |
|             queue.append(node)
 | |
|     while queue:
 | |
|         node = queue[0]
 | |
|         del queue[0]
 | |
|         if rewrite_element(node.tagName):
 | |
|             children = node.childNodes
 | |
|             if len(children) == 1 \
 | |
|                and children[0].nodeType == xml.dom.core.TEXT:
 | |
|                 data = children[0].data
 | |
|                 if data[-2:] == "()":
 | |
|                     children[0].data = data[:-2]
 | |
|         else:
 | |
|             for child in node.childNodes:
 | |
|                 if child.nodeType == xml.dom.core.ELEMENT:
 | |
|                     queue.append(child)
 | |
| 
 | |
| 
 | |
| def cleanup_synopses(doc):
 | |
|     # Actually, this should build a "moduleinfo" element from various
 | |
|     # parts of the meta-information in the section.  <moduleinfo> needs
 | |
|     # some design work before we can really do anything real.
 | |
|     synopses = doc.getElementsByTagName("modulesynopsis")
 | |
|     for node in synopses:
 | |
|         node._node.name = "synopsis"
 | |
|         parent = node.parentNode
 | |
|         if parent.tagName == "section":
 | |
|             children = parent.childNodes
 | |
|             parent.removeChild(node)
 | |
|             parent.insertBefore(node, children[2])
 | |
|             text = doc.createTextNode("\n  ")
 | |
|             parent.insertBefore(text, node)
 | |
| 
 | |
| 
 | |
| _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
 | |
|   
 | |
| def write_esis(doc, ofp, knownempty):
 | |
|     for node in doc.childNodes:
 | |
|         nodeType = node.nodeType
 | |
|         if nodeType == xml.dom.core.ELEMENT:
 | |
|             gi = node.tagName
 | |
|             if knownempty(gi):
 | |
|                 if node.hasChildNodes():
 | |
|                     raise ValueError, "declared-empty node has children"
 | |
|                 ofp.write("e\n")
 | |
|             for k, v in node.attributes.items():
 | |
|                 value = v.value
 | |
|                 if _token_rx.match(value):
 | |
|                     dtype = "TOKEN"
 | |
|                 else:
 | |
|                     dtype = "CDATA"
 | |
|                 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
 | |
|             ofp.write("(%s\n" % gi)
 | |
|             write_esis(node, ofp, knownempty)
 | |
|             ofp.write(")%s\n" % gi)
 | |
|         elif nodeType == xml.dom.core.TEXT:
 | |
|             ofp.write("-%s\n" % esistools.encode(node.data))
 | |
|         else:
 | |
|             raise RuntimeError, "unsupported node type: %s" % nodeType
 | |
| 
 | |
| 
 | |
| def convert(ifp, ofp):
 | |
|     p = esistools.ExtendedEsisBuilder()
 | |
|     p.feed(ifp.read())
 | |
|     doc = p.document
 | |
|     normalize(doc)
 | |
|     handle_args(doc)
 | |
|     simplify(doc)
 | |
|     handle_labels(doc)
 | |
|     handle_appendix(doc)
 | |
|     fixup_trailing_whitespace(doc, {
 | |
|         "abstract": "\n",
 | |
|         "title": "",
 | |
|         "chapter": "\n\n",
 | |
|         "section": "\n\n",
 | |
|         "subsection": "\n\n",
 | |
|         "subsubsection": "\n\n",
 | |
|         "paragraph": "\n\n",
 | |
|         "subparagraph": "\n\n",
 | |
|         })
 | |
|     cleanup_root_text(doc)
 | |
|     cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
 | |
|     cleanup_synopses(doc)
 | |
|     #
 | |
|     d = {}
 | |
|     for gi in p.get_empties():
 | |
|         d[gi] = gi
 | |
|     knownempty = d.has_key
 | |
|     #
 | |
|     try:
 | |
|         write_esis(doc, ofp, knownempty)
 | |
|     except IOError, (err, msg):
 | |
|         # Ignore EPIPE; it just means that whoever we're writing to stopped
 | |
|         # reading.  The rest of the output would be ignored.  All other errors
 | |
|         # should still be reported,
 | |
|         if err != errno.EPIPE:
 | |
|             raise
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     if len(sys.argv) == 1:
 | |
|         ifp = sys.stdin
 | |
|         ofp = sys.stdout
 | |
|     elif len(sys.argv) == 2:
 | |
|         ifp = open(sys.argv[1])
 | |
|         ofp = sys.stdout
 | |
|     elif len(sys.argv) == 3:
 | |
|         ifp = open(sys.argv[1])
 | |
|         ofp = open(sys.argv[2], "w")
 | |
|     else:
 | |
|         usage()
 | |
|         sys.exit(2)
 | |
|     convert(ifp, ofp)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | 
