| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | #! /usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """Convert ESIS events to SGML or XML markup.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This is limited, but seems sufficient for the ESIS generated by the | 
					
						
							|  |  |  | latex2esis.py script when run over the Python documentation. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # This should have an explicit option to indicate whether the *INPUT* was | 
					
						
							|  |  |  | # generated from an SGML or an XML application. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | import errno | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  | import esistools | 
					
						
							| 
									
										
										
										
											1999-01-19 23:03:04 +00:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | import re | 
					
						
							|  |  |  | import string | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-03-23 16:38:12 +00:00
										 |  |  | from xml.sax.saxutils import escape | 
					
						
							| 
									
										
										
										
											1999-01-14 17:06:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  | AUTOCLOSE = () | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-19 23:03:04 +00:00
										 |  |  | EMPTIES_FILENAME = "../sgml/empties.dat" | 
					
						
							|  |  |  | LIST_EMPTIES = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  | _elem_map = {} | 
					
						
							|  |  |  | _attr_map = {} | 
					
						
							|  |  |  | _token_map = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _normalize_case = str | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def map_gi(sgmlgi, map): | 
					
						
							|  |  |  |     uncased = _normalize_case(sgmlgi) | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         return map[uncased] | 
					
						
							|  |  |  |     except IndexError: | 
					
						
							|  |  |  |         map[uncased] = sgmlgi | 
					
						
							|  |  |  |         return sgmlgi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def null_map_gi(sgmlgi, map): | 
					
						
							|  |  |  |     return sgmlgi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  | def format_attrs(attrs, xml=0): | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     attrs = attrs.items() | 
					
						
							|  |  |  |     attrs.sort() | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     parts = [] | 
					
						
							|  |  |  |     append = parts.append | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     for name, value in attrs: | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  |         if xml: | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |             append('%s="%s"' % (name, escape(value))) | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  |         else: | 
					
						
							|  |  |  |             # this is a little bogus, but should do for now | 
					
						
							|  |  |  |             if name == value and isnmtoken(value): | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |                 append(value) | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  |             elif istoken(value): | 
					
						
							| 
									
										
										
										
											1999-01-29 21:35:50 +00:00
										 |  |  |                 if value == "no" + name: | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |                     append(value) | 
					
						
							| 
									
										
										
										
											1999-01-29 21:35:50 +00:00
										 |  |  |                 else: | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |                     append("%s=%s" % (name, value)) | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |                 append('%s="%s"' % (name, escape(value))) | 
					
						
							|  |  |  |     if parts: | 
					
						
							|  |  |  |         parts.insert(0, '') | 
					
						
							|  |  |  |     return string.join(parts) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-19 23:03:04 +00:00
										 |  |  | _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE) | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  | def isnmtoken(s): | 
					
						
							|  |  |  |     return _nmtoken_rx.match(s) is not None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-19 23:03:04 +00:00
										 |  |  | _token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE) | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  | def istoken(s): | 
					
						
							|  |  |  |     return _token_rx.match(s) is not None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:03:52 +00:00
										 |  |  | def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()): | 
					
						
							| 
									
										
										
										
											1999-01-20 20:35:05 +00:00
										 |  |  |     if xml: | 
					
						
							|  |  |  |         autoclose = () | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     attrs = {} | 
					
						
							|  |  |  |     lastopened = None | 
					
						
							| 
									
										
										
										
											1998-12-10 18:31:37 +00:00
										 |  |  |     knownempties = [] | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     knownempty = 0 | 
					
						
							|  |  |  |     lastempty = 0 | 
					
						
							| 
									
										
										
										
											1999-05-18 17:34:51 +00:00
										 |  |  |     inverbatim = 0 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     while 1: | 
					
						
							|  |  |  |         line = ifp.readline() | 
					
						
							|  |  |  |         if not line: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         type = line[0] | 
					
						
							|  |  |  |         data = line[1:] | 
					
						
							|  |  |  |         if data and data[-1] == "\n": | 
					
						
							|  |  |  |             data = data[:-1] | 
					
						
							|  |  |  |         if type == "-": | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |             data = esistools.decode(data) | 
					
						
							| 
									
										
										
										
											1999-05-18 17:34:51 +00:00
										 |  |  |             data = escape(data) | 
					
						
							|  |  |  |             if not inverbatim: | 
					
						
							|  |  |  |                 data = string.replace(data, "---", "—") | 
					
						
							|  |  |  |             ofp.write(data) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |             if "\n" in data: | 
					
						
							|  |  |  |                 lastopened = None | 
					
						
							|  |  |  |             knownempty = 0 | 
					
						
							|  |  |  |             lastempty = 0 | 
					
						
							|  |  |  |         elif type == "(": | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |             if data == "COMMENT": | 
					
						
							|  |  |  |                 ofp.write("<!--") | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |             data = map_gi(data, _elem_map) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |             if knownempty and xml: | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  |                 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml))) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											1999-01-19 17:10:31 +00:00
										 |  |  |                 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml))) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |             if knownempty and data not in knownempties: | 
					
						
							|  |  |  |                 # accumulate knowledge! | 
					
						
							|  |  |  |                 knownempties.append(data) | 
					
						
							|  |  |  |             attrs = {} | 
					
						
							|  |  |  |             lastopened = data | 
					
						
							|  |  |  |             lastempty = knownempty | 
					
						
							|  |  |  |             knownempty = 0 | 
					
						
							| 
									
										
										
										
											1999-05-18 17:34:51 +00:00
										 |  |  |             inverbatim = data in verbatims | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |         elif type == ")": | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |             if data == "COMMENT": | 
					
						
							|  |  |  |                 ofp.write("-->") | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |             data = map_gi(data, _elem_map) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |             if xml: | 
					
						
							|  |  |  |                 if not lastempty: | 
					
						
							|  |  |  |                     ofp.write("</%s>" % data) | 
					
						
							|  |  |  |             elif data not in knownempties: | 
					
						
							| 
									
										
										
										
											1999-01-20 20:35:05 +00:00
										 |  |  |                 if data in autoclose: | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  |                 elif lastopened == data: | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |                     ofp.write("</>") | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     ofp.write("</%s>" % data) | 
					
						
							|  |  |  |             lastopened = None | 
					
						
							|  |  |  |             lastempty = 0 | 
					
						
							| 
									
										
										
										
											1999-05-18 17:34:51 +00:00
										 |  |  |             inverbatim = 0 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |         elif type == "A": | 
					
						
							|  |  |  |             name, type, value = string.split(data, " ", 2) | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |             name = map_gi(name, _attr_map) | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |             attrs[name] = esistools.decode(value) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |         elif type == "e": | 
					
						
							|  |  |  |             knownempty = 1 | 
					
						
							| 
									
										
										
										
											1999-08-26 17:50:26 +00:00
										 |  |  |         elif type == "&": | 
					
						
							|  |  |  |             ofp.write("&%s;" % data) | 
					
						
							|  |  |  |             knownempty = 0 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             raise RuntimeError, "unrecognized ESIS event type: '%s'" % type | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-19 23:03:04 +00:00
										 |  |  |     if LIST_EMPTIES: | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |         dump_empty_element_names(knownempties) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def dump_empty_element_names(knownempties): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:03:52 +00:00
										 |  |  |     d = {} | 
					
						
							|  |  |  |     for gi in knownempties: | 
					
						
							|  |  |  |         d[gi] = gi | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     knownempties.append("") | 
					
						
							|  |  |  |     if os.path.isfile(EMPTIES_FILENAME): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:03:52 +00:00
										 |  |  |         fp = open(EMPTIES_FILENAME) | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             line = fp.readline() | 
					
						
							|  |  |  |             if not line: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             gi = string.strip(line) | 
					
						
							|  |  |  |             if gi: | 
					
						
							|  |  |  |                 d[gi] = gi | 
					
						
							|  |  |  |     fp = open(EMPTIES_FILENAME, "w") | 
					
						
							|  |  |  |     gilist = d.keys() | 
					
						
							|  |  |  |     gilist.sort() | 
					
						
							|  |  |  |     fp.write(string.join(gilist, "\n")) | 
					
						
							|  |  |  |     fp.write("\n") | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     fp.close() | 
					
						
							| 
									
										
										
										
											1999-01-19 23:03:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  | def update_gi_map(map, names, fromsgml=1): | 
					
						
							|  |  |  |     for name in string.split(names, ","): | 
					
						
							|  |  |  |         if fromsgml: | 
					
						
							|  |  |  |             uncased = string.lower(name) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             uncased = name | 
					
						
							|  |  |  |         map[uncased] = name | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |     import getopt | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     import sys | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											1999-01-20 20:35:05 +00:00
										 |  |  |     autoclose = AUTOCLOSE | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     xml = 1 | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |     xmldecl = 0 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     elem_names = '' | 
					
						
							|  |  |  |     attr_names = '' | 
					
						
							|  |  |  |     value_names = '' | 
					
						
							| 
									
										
										
										
											1999-05-18 17:34:51 +00:00
										 |  |  |     verbatims = ('verbatim', 'interactive-session') | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     opts, args = getopt.getopt(sys.argv[1:], "adesx", | 
					
						
							|  |  |  |                                ["autoclose=", "declare", "sgml", "xml", | 
					
						
							|  |  |  |                                 "elements-map=", "attributes-map", | 
					
						
							|  |  |  |                                 "values-map="]) | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |     for opt, arg in opts: | 
					
						
							|  |  |  |         if opt in ("-d", "--declare"): | 
					
						
							|  |  |  |             xmldecl = 1 | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |         elif opt == "-e": | 
					
						
							|  |  |  |             global LIST_EMPTIES | 
					
						
							|  |  |  |             LIST_EMPTIES = 1 | 
					
						
							|  |  |  |         elif opt in ("-s", "--sgml"): | 
					
						
							|  |  |  |             xml = 0 | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |         elif opt in ("-x", "--xml"): | 
					
						
							|  |  |  |             xml = 1 | 
					
						
							| 
									
										
										
										
											1999-01-20 20:35:05 +00:00
										 |  |  |         elif opt in ("-a", "--autoclose"): | 
					
						
							|  |  |  |             autoclose = string.split(arg, ",") | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |         elif opt == "--elements-map": | 
					
						
							|  |  |  |             elem_names = ("%s,%s" % (elem_names, arg))[1:] | 
					
						
							|  |  |  |         elif opt == "--attributes-map": | 
					
						
							|  |  |  |             attr_names = ("%s,%s" % (attr_names, arg))[1:] | 
					
						
							|  |  |  |         elif opt == "--values-map": | 
					
						
							|  |  |  |             value_names = ("%s,%s" % (value_names, arg))[1:] | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # open input streams: | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |     if len(args) == 0: | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |         ifp = sys.stdin | 
					
						
							|  |  |  |         ofp = sys.stdout | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |     elif len(args) == 1: | 
					
						
							|  |  |  |         ifp = open(args[0]) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |         ofp = sys.stdout | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |     elif len(args) == 2: | 
					
						
							|  |  |  |         ifp = open(args[0]) | 
					
						
							|  |  |  |         ofp = open(args[1], "w") | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     else: | 
					
						
							|  |  |  |         usage() | 
					
						
							|  |  |  |         sys.exit(2) | 
					
						
							| 
									
										
										
										
											1999-02-18 16:30:16 +00:00
										 |  |  |     # | 
					
						
							|  |  |  |     # setup the name maps: | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if elem_names or attr_names or value_names: | 
					
						
							|  |  |  |         # assume the origin was SGML; ignore case of the names from the ESIS | 
					
						
							|  |  |  |         # stream but set up conversion tables to get the case right on output | 
					
						
							|  |  |  |         global _normalize_case | 
					
						
							|  |  |  |         _normalize_case = string.lower | 
					
						
							|  |  |  |         update_gi_map(_elem_map, string.split(elem_names, ",")) | 
					
						
							|  |  |  |         update_gi_map(_attr_map, string.split(attr_names, ",")) | 
					
						
							|  |  |  |         update_gi_map(_values_map, string.split(value_names, ",")) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         global map_gi | 
					
						
							|  |  |  |         map_gi = null_map_gi | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # run the conversion: | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											1998-12-01 19:01:53 +00:00
										 |  |  |         if xml and xmldecl: | 
					
						
							|  |  |  |             opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n') | 
					
						
							| 
									
										
										
										
											1999-07-29 22:03:52 +00:00
										 |  |  |         convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |     except IOError, (err, msg): | 
					
						
							|  |  |  |         if err != errno.EPIPE: | 
					
						
							|  |  |  |             raise | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |