| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | #! /usr/bin/env python | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-19 17:37:37 +00:00
										 |  |  |  | """Generate ESIS events based on a LaTeX source document and
 | 
					
						
							|  |  |  |  | configuration data. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | The conversion is not strong enough to work with arbitrary LaTeX | 
					
						
							|  |  |  |  | documents; it has only been designed to work with the highly stylized | 
					
						
							|  |  |  |  | markup used in the standard Python documentation.  A lot of | 
					
						
							|  |  |  |  | information about specific markup is encoded in the control table | 
					
						
							|  |  |  |  | passed to the convert() function; changing this table can allow this | 
					
						
							|  |  |  |  | tool to support additional LaTeX markups. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | The format of the table is largely undocumented; see the commented | 
					
						
							|  |  |  |  | headers where the table is specified in main().  There is no provision  | 
					
						
							|  |  |  |  | to load an alternate table from an external file. | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | """
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import errno | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | import getopt | 
					
						
							|  |  |  |  | import os | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | import re | 
					
						
							|  |  |  |  | import string | 
					
						
							|  |  |  |  | import sys | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | import UserList | 
					
						
							| 
									
										
										
										
											2000-11-22 17:56:43 +00:00
										 |  |  |  | import xml.sax.saxutils | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-10 19:36:03 +00:00
										 |  |  |  | from types import ListType, StringType, TupleType | 
					
						
							| 
									
										
										
										
											1998-12-01 19:04:12 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | try: | 
					
						
							|  |  |  |  |     from xml.parsers.xmllib import XMLParser | 
					
						
							|  |  |  |  | except ImportError: | 
					
						
							|  |  |  |  |     from xmllib import XMLParser | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  | from esistools import encode | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  | DEBUG = 0 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | class LaTeXFormatError(Exception): | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  |     pass | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | class LaTeXStackError(LaTeXFormatError): | 
					
						
							|  |  |  |  |     def __init__(self, found, stack): | 
					
						
							|  |  |  |  |         msg = "environment close for %s doesn't match;\n  stack = %s" \ | 
					
						
							|  |  |  |  |               % (found, stack) | 
					
						
							|  |  |  |  |         self.found = found | 
					
						
							|  |  |  |  |         self.stack = stack[:] | 
					
						
							|  |  |  |  |         LaTeXFormatError.__init__(self, msg) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |  | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") | 
					
						
							|  |  |  |  | _end_env_rx = re.compile(r"[\\]end{([^}]*)}") | 
					
						
							| 
									
										
										
										
											1999-05-19 17:37:37 +00:00
										 |  |  |  | _begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)") | 
					
						
							| 
									
										
										
										
											1999-05-07 19:59:02 +00:00
										 |  |  |  | _comment_rx = re.compile("%+ ?(.*)\n[ \t]*") | 
					
						
							| 
									
										
										
										
											2000-11-22 17:56:43 +00:00
										 |  |  |  | _text_rx = re.compile(r"[^]~%\\{}]+") | 
					
						
							| 
									
										
										
										
											2001-07-06 21:01:19 +00:00
										 |  |  |  | _optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE) | 
					
						
							| 
									
										
										
										
											1998-12-01 19:04:12 +00:00
										 |  |  |  | # _parameter_rx is this complicated to allow {...} inside a parameter; | 
					
						
							|  |  |  |  | # this is useful to match tabular layout specifications like {c|p{24pt}} | 
					
						
							|  |  |  |  | _parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}") | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") | 
					
						
							|  |  |  |  | _start_group_rx = re.compile("[ \n]*{") | 
					
						
							|  |  |  |  | _start_optional_rx = re.compile("[ \n]*[[]") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-30 14:45:24 +00:00
										 |  |  |  | ESCAPED_CHARS = "$%#^ {}&~" | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-05-07 21:12:21 +00:00
										 |  |  |  | def dbgmsg(msg): | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  |     if DEBUG: | 
					
						
							| 
									
										
										
										
											1999-05-07 21:12:21 +00:00
										 |  |  |  |         sys.stderr.write(msg + "\n") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def pushing(name, point, depth): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     dbgmsg("pushing <%s> at %s" % (name, point)) | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | def popping(name, point, depth): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     dbgmsg("popping </%s> at %s" % (name, point)) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class _Stack(UserList.UserList): | 
					
						
							|  |  |  |  |     def append(self, entry): | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |         if type(entry) is not StringType: | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |             raise LaTeXFormatError("cannot push non-string on stack: " | 
					
						
							|  |  |  |  |                                    + `entry`) | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  |         #dbgmsg("%s<%s>" % (" "*len(self.data), entry)) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |         self.data.append(entry) | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     def pop(self, index=-1): | 
					
						
							|  |  |  |  |         entry = self.data[index] | 
					
						
							|  |  |  |  |         del self.data[index] | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  |         #dbgmsg("%s</%s>" % (" "*len(self.data), entry)) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def __delitem__(self, index): | 
					
						
							|  |  |  |  |         entry = self.data[index] | 
					
						
							|  |  |  |  |         del self.data[index] | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  |         #dbgmsg("%s</%s>" % (" "*len(self.data), entry)) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def new_stack(): | 
					
						
							|  |  |  |  |     if DEBUG: | 
					
						
							|  |  |  |  |         return _Stack() | 
					
						
							|  |  |  |  |     return [] | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |  | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  | class Conversion: | 
					
						
							|  |  |  |  |     def __init__(self, ifp, ofp, table): | 
					
						
							|  |  |  |  |         self.write = ofp.write | 
					
						
							|  |  |  |  |         self.ofp = ofp | 
					
						
							| 
									
										
										
										
											1999-05-07 19:59:02 +00:00
										 |  |  |  |         self.table = table | 
					
						
							| 
									
										
										
										
											2001-11-19 05:27:40 +00:00
										 |  |  |  |         L = [s.rstrip() for s in ifp.readlines()] | 
					
						
							|  |  |  |  |         L.append("") | 
					
						
							|  |  |  |  |         self.line = string.join(L, "\n") | 
					
						
							| 
									
										
										
										
											1999-05-07 19:59:02 +00:00
										 |  |  |  |         self.preamble = 1 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     def convert(self): | 
					
						
							|  |  |  |  |         self.subconvert() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def subconvert(self, endchar=None, depth=0): | 
					
						
							|  |  |  |  |         # | 
					
						
							|  |  |  |  |         # Parses content, including sub-structures, until the character | 
					
						
							|  |  |  |  |         # 'endchar' is found (with no open structures), or until the end | 
					
						
							|  |  |  |  |         # of the input data is endchar is None. | 
					
						
							|  |  |  |  |         # | 
					
						
							|  |  |  |  |         stack = new_stack() | 
					
						
							|  |  |  |  |         line = self.line | 
					
						
							|  |  |  |  |         while line: | 
					
						
							|  |  |  |  |             if line[0] == endchar and not stack: | 
					
						
							|  |  |  |  |                 self.line = line | 
					
						
							|  |  |  |  |                 return line | 
					
						
							|  |  |  |  |             m = _comment_rx.match(line) | 
					
						
							|  |  |  |  |             if m: | 
					
						
							|  |  |  |  |                 text = m.group(1) | 
					
						
							|  |  |  |  |                 if text: | 
					
						
							|  |  |  |  |                     self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" | 
					
						
							|  |  |  |  |                                % encode(text)) | 
					
						
							|  |  |  |  |                 line = line[m.end():] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             m = _begin_env_rx.match(line) | 
					
						
							|  |  |  |  |             if m: | 
					
						
							|  |  |  |  |                 name = m.group(1) | 
					
						
							|  |  |  |  |                 entry = self.get_env_entry(name) | 
					
						
							|  |  |  |  |                 # re-write to use the macro handler | 
					
						
							|  |  |  |  |                 line = r"\%s %s" % (name, line[m.end():]) | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             m = _end_env_rx.match(line) | 
					
						
							|  |  |  |  |             if m: | 
					
						
							|  |  |  |  |                 # end of environment | 
					
						
							|  |  |  |  |                 envname = m.group(1) | 
					
						
							|  |  |  |  |                 entry = self.get_entry(envname) | 
					
						
							|  |  |  |  |                 while stack and envname != stack[-1] \ | 
					
						
							|  |  |  |  |                       and stack[-1] in entry.endcloses: | 
					
						
							|  |  |  |  |                     self.write(")%s\n" % stack.pop()) | 
					
						
							|  |  |  |  |                 if stack and envname == stack[-1]: | 
					
						
							|  |  |  |  |                     self.write(")%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                     del stack[-1] | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     raise LaTeXStackError(envname, stack) | 
					
						
							|  |  |  |  |                 line = line[m.end():] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             m = _begin_macro_rx.match(line) | 
					
						
							|  |  |  |  |             if m: | 
					
						
							|  |  |  |  |                 # start of macro | 
					
						
							|  |  |  |  |                 macroname = m.group(1) | 
					
						
							| 
									
										
										
										
											2000-11-22 17:56:43 +00:00
										 |  |  |  |                 if macroname == "c": | 
					
						
							|  |  |  |  |                     # Ugh!  This is a combining character... | 
					
						
							|  |  |  |  |                     endpos = m.end() | 
					
						
							|  |  |  |  |                     self.combining_char("c", line[endpos]) | 
					
						
							|  |  |  |  |                     line = line[endpos + 1:] | 
					
						
							|  |  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                 entry = self.get_entry(macroname) | 
					
						
							|  |  |  |  |                 if entry.verbatim: | 
					
						
							|  |  |  |  |                     # magic case! | 
					
						
							| 
									
										
										
										
											2001-09-28 16:26:13 +00:00
										 |  |  |  |                     pos = line.find("\\end{%s}" % macroname) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                     text = line[m.end(1):pos] | 
					
						
							|  |  |  |  |                     stack.append(entry.name) | 
					
						
							|  |  |  |  |                     self.write("(%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                     self.write("-%s\n" % encode(text)) | 
					
						
							|  |  |  |  |                     self.write(")%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                     stack.pop() | 
					
						
							|  |  |  |  |                     line = line[pos + len("\\end{%s}" % macroname):] | 
					
						
							|  |  |  |  |                     continue | 
					
						
							|  |  |  |  |                 while stack and stack[-1] in entry.closes: | 
					
						
							|  |  |  |  |                     top = stack.pop() | 
					
						
							|  |  |  |  |                     topentry = self.get_entry(top) | 
					
						
							|  |  |  |  |                     if topentry.outputname: | 
					
						
							|  |  |  |  |                         self.write(")%s\n-\\n\n" % topentry.outputname) | 
					
						
							|  |  |  |  |                 # | 
					
						
							| 
									
										
										
										
											2001-09-25 20:57:36 +00:00
										 |  |  |  |                 if entry.outputname and entry.empty: | 
					
						
							|  |  |  |  |                     self.write("e\n") | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                 # | 
					
						
							| 
									
										
										
										
											2001-09-25 20:57:36 +00:00
										 |  |  |  |                 params, optional, empty = self.start_macro(macroname) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                 # rip off the macroname | 
					
						
							|  |  |  |  |                 if params: | 
					
						
							|  |  |  |  |                     line = line[m.end(1):] | 
					
						
							|  |  |  |  |                 elif empty: | 
					
						
							|  |  |  |  |                     line = line[m.end(1):] | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     line = line[m.end():] | 
					
						
							|  |  |  |  |                 opened = 0 | 
					
						
							|  |  |  |  |                 implied_content = 0 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                 # handle attribute mappings here: | 
					
						
							|  |  |  |  |                 for pentry in params: | 
					
						
							|  |  |  |  |                     if pentry.type == "attribute": | 
					
						
							|  |  |  |  |                         if pentry.optional: | 
					
						
							|  |  |  |  |                             m = _optional_rx.match(line) | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |                             if m and entry.outputname: | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                                 line = line[m.end():] | 
					
						
							|  |  |  |  |                                 self.dump_attr(pentry, m.group(1)) | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |                         elif pentry.text and entry.outputname: | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                             # value supplied by conversion spec: | 
					
						
							|  |  |  |  |                             self.dump_attr(pentry, pentry.text) | 
					
						
							|  |  |  |  |                         else: | 
					
						
							|  |  |  |  |                             m = _parameter_rx.match(line) | 
					
						
							|  |  |  |  |                             if not m: | 
					
						
							|  |  |  |  |                                 raise LaTeXFormatError( | 
					
						
							|  |  |  |  |                                     "could not extract parameter %s for %s: %s" | 
					
						
							|  |  |  |  |                                     % (pentry.name, macroname, `line[:100]`)) | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |                             if entry.outputname: | 
					
						
							|  |  |  |  |                                 self.dump_attr(pentry, m.group(1)) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                             line = line[m.end():] | 
					
						
							|  |  |  |  |                     elif pentry.type == "child": | 
					
						
							|  |  |  |  |                         if pentry.optional: | 
					
						
							|  |  |  |  |                             m = _optional_rx.match(line) | 
					
						
							|  |  |  |  |                             if m: | 
					
						
							|  |  |  |  |                                 line = line[m.end():] | 
					
						
							|  |  |  |  |                                 if entry.outputname and not opened: | 
					
						
							|  |  |  |  |                                     opened = 1 | 
					
						
							|  |  |  |  |                                     self.write("(%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                                     stack.append(macroname) | 
					
						
							|  |  |  |  |                                 stack.append(pentry.name) | 
					
						
							|  |  |  |  |                                 self.write("(%s\n" % pentry.name) | 
					
						
							|  |  |  |  |                                 self.write("-%s\n" % encode(m.group(1))) | 
					
						
							|  |  |  |  |                                 self.write(")%s\n" % pentry.name) | 
					
						
							|  |  |  |  |                                 stack.pop() | 
					
						
							|  |  |  |  |                         else: | 
					
						
							|  |  |  |  |                             if entry.outputname and not opened: | 
					
						
							|  |  |  |  |                                 opened = 1 | 
					
						
							|  |  |  |  |                                 self.write("(%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                                 stack.append(entry.name) | 
					
						
							|  |  |  |  |                             self.write("(%s\n" % pentry.name) | 
					
						
							|  |  |  |  |                             stack.append(pentry.name) | 
					
						
							|  |  |  |  |                             self.line = skip_white(line)[1:] | 
					
						
							|  |  |  |  |                             line = self.subconvert( | 
					
						
							|  |  |  |  |                                 "}", len(stack) + depth + 1)[1:] | 
					
						
							|  |  |  |  |                             self.write(")%s\n" % stack.pop()) | 
					
						
							|  |  |  |  |                     elif pentry.type == "content": | 
					
						
							|  |  |  |  |                         if pentry.implied: | 
					
						
							|  |  |  |  |                             implied_content = 1 | 
					
						
							|  |  |  |  |                         else: | 
					
						
							|  |  |  |  |                             if entry.outputname and not opened: | 
					
						
							|  |  |  |  |                                 opened = 1 | 
					
						
							|  |  |  |  |                                 self.write("(%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                                 stack.append(entry.name) | 
					
						
							|  |  |  |  |                             line = skip_white(line) | 
					
						
							|  |  |  |  |                             if line[0] != "{": | 
					
						
							|  |  |  |  |                                 raise LaTeXFormatError( | 
					
						
							|  |  |  |  |                                     "missing content for " + macroname) | 
					
						
							|  |  |  |  |                             self.line = line[1:] | 
					
						
							|  |  |  |  |                             line = self.subconvert("}", len(stack) + depth + 1) | 
					
						
							|  |  |  |  |                             if line and line[0] == "}": | 
					
						
							|  |  |  |  |                                 line = line[1:] | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |                     elif pentry.type == "text" and pentry.text: | 
					
						
							|  |  |  |  |                         if entry.outputname and not opened: | 
					
						
							|  |  |  |  |                             opened = 1 | 
					
						
							|  |  |  |  |                             stack.append(entry.name) | 
					
						
							|  |  |  |  |                             self.write("(%s\n" % entry.outputname) | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  |                         #dbgmsg("--- text: %s" % `pentry.text`) | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |                         self.write("-%s\n" % encode(pentry.text)) | 
					
						
							| 
									
										
										
										
											1999-08-26 17:54:16 +00:00
										 |  |  |  |                     elif pentry.type == "entityref": | 
					
						
							|  |  |  |  |                         self.write("&%s\n" % pentry.name) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                 if entry.outputname: | 
					
						
							|  |  |  |  |                     if not opened: | 
					
						
							|  |  |  |  |                         self.write("(%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                         stack.append(entry.name) | 
					
						
							|  |  |  |  |                     if not implied_content: | 
					
						
							|  |  |  |  |                         self.write(")%s\n" % entry.outputname) | 
					
						
							|  |  |  |  |                         stack.pop() | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if line[0] == endchar and not stack: | 
					
						
							|  |  |  |  |                 self.line = line[1:] | 
					
						
							|  |  |  |  |                 return self.line | 
					
						
							|  |  |  |  |             if line[0] == "}": | 
					
						
							|  |  |  |  |                 # end of macro or group | 
					
						
							|  |  |  |  |                 macroname = stack[-1] | 
					
						
							|  |  |  |  |                 if macroname: | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  |                     conversion = self.table[macroname] | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |                     if conversion.outputname: | 
					
						
							|  |  |  |  |                         # otherwise, it was just a bare group | 
					
						
							|  |  |  |  |                         self.write(")%s\n" % conversion.outputname) | 
					
						
							|  |  |  |  |                 del stack[-1] | 
					
						
							|  |  |  |  |                 line = line[1:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2000-11-22 17:56:43 +00:00
										 |  |  |  |             if line[0] == "~": | 
					
						
							|  |  |  |  |                 # don't worry about the "tie" aspect of this command | 
					
						
							|  |  |  |  |                 line = line[1:] | 
					
						
							|  |  |  |  |                 self.write("- \n") | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |             if line[0] == "{": | 
					
						
							|  |  |  |  |                 stack.append("") | 
					
						
							|  |  |  |  |                 line = line[1:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if line[0] == "\\" and line[1] in ESCAPED_CHARS: | 
					
						
							|  |  |  |  |                 self.write("-%s\n" % encode(line[1])) | 
					
						
							|  |  |  |  |                 line = line[2:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if line[:2] == r"\\": | 
					
						
							|  |  |  |  |                 self.write("(BREAK\n)BREAK\n") | 
					
						
							|  |  |  |  |                 line = line[2:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2000-11-22 17:56:43 +00:00
										 |  |  |  |             if line[:2] == r"\_": | 
					
						
							|  |  |  |  |                 line = "_" + line[2:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if line[:2] in (r"\'", r'\"'): | 
					
						
							|  |  |  |  |                 # combining characters... | 
					
						
							|  |  |  |  |                 self.combining_char(line[1], line[2]) | 
					
						
							|  |  |  |  |                 line = line[3:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |             m = _text_rx.match(line) | 
					
						
							|  |  |  |  |             if m: | 
					
						
							|  |  |  |  |                 text = encode(m.group()) | 
					
						
							|  |  |  |  |                 self.write("-%s\n" % text) | 
					
						
							|  |  |  |  |                 line = line[m.end():] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             # special case because of \item[] | 
					
						
							|  |  |  |  |             # XXX can we axe this??? | 
					
						
							|  |  |  |  |             if line[0] == "]": | 
					
						
							|  |  |  |  |                 self.write("-]\n") | 
					
						
							|  |  |  |  |                 line = line[1:] | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             # avoid infinite loops | 
					
						
							|  |  |  |  |             extra = "" | 
					
						
							|  |  |  |  |             if len(line) > 100: | 
					
						
							|  |  |  |  |                 extra = "..." | 
					
						
							|  |  |  |  |             raise LaTeXFormatError("could not identify markup: %s%s" | 
					
						
							|  |  |  |  |                                    % (`line[:100]`, extra)) | 
					
						
							|  |  |  |  |         while stack: | 
					
						
							|  |  |  |  |             entry = self.get_entry(stack[-1]) | 
					
						
							|  |  |  |  |             if entry.closes: | 
					
						
							|  |  |  |  |                 self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) | 
					
						
							|  |  |  |  |                 del stack[-1] | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 break | 
					
						
							|  |  |  |  |         if stack: | 
					
						
							|  |  |  |  |             raise LaTeXFormatError("elements remain on stack: " | 
					
						
							|  |  |  |  |                                    + string.join(stack, ", ")) | 
					
						
							|  |  |  |  |         # otherwise we just ran out of input here... | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-11-22 17:56:43 +00:00
										 |  |  |  |     # This is a really limited table of combinations, but it will have | 
					
						
							|  |  |  |  |     # to do for now. | 
					
						
							|  |  |  |  |     _combinations = { | 
					
						
							|  |  |  |  |         ("c", "c"): 0x00E7, | 
					
						
							|  |  |  |  |         ("'", "e"): 0x00E9, | 
					
						
							|  |  |  |  |         ('"', "o"): 0x00F6, | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def combining_char(self, prefix, char): | 
					
						
							|  |  |  |  |         ordinal = self._combinations[(prefix, char)] | 
					
						
							|  |  |  |  |         self.write("-\\%%%d;\n" % ordinal) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     def start_macro(self, name): | 
					
						
							|  |  |  |  |         conversion = self.get_entry(name) | 
					
						
							|  |  |  |  |         parameters = conversion.parameters | 
					
						
							|  |  |  |  |         optional = parameters and parameters[0].optional | 
					
						
							| 
									
										
										
										
											2001-09-25 20:57:36 +00:00
										 |  |  |  |         return parameters, optional, conversion.empty | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def get_entry(self, name): | 
					
						
							|  |  |  |  |         entry = self.table.get(name) | 
					
						
							|  |  |  |  |         if entry is None: | 
					
						
							| 
									
										
										
										
											2001-03-23 16:53:34 +00:00
										 |  |  |  |             dbgmsg("get_entry(%s) failing; building default entry!" % `name`) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |             # not defined; build a default entry: | 
					
						
							|  |  |  |  |             entry = TableEntry(name) | 
					
						
							|  |  |  |  |             entry.has_content = 1 | 
					
						
							|  |  |  |  |             entry.parameters.append(Parameter("content")) | 
					
						
							|  |  |  |  |             self.table[name] = entry | 
					
						
							|  |  |  |  |         return entry | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def get_env_entry(self, name): | 
					
						
							|  |  |  |  |         entry = self.table.get(name) | 
					
						
							|  |  |  |  |         if entry is None: | 
					
						
							|  |  |  |  |             # not defined; build a default entry: | 
					
						
							|  |  |  |  |             entry = TableEntry(name, 1) | 
					
						
							|  |  |  |  |             entry.has_content = 1 | 
					
						
							|  |  |  |  |             entry.parameters.append(Parameter("content")) | 
					
						
							|  |  |  |  |             entry.parameters[-1].implied = 1 | 
					
						
							|  |  |  |  |             self.table[name] = entry | 
					
						
							|  |  |  |  |         elif not entry.environment: | 
					
						
							|  |  |  |  |             raise LaTeXFormatError( | 
					
						
							|  |  |  |  |                 name + " is defined as a macro; expected environment") | 
					
						
							|  |  |  |  |         return entry | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def dump_attr(self, pentry, value): | 
					
						
							|  |  |  |  |         if not (pentry.name and value): | 
					
						
							|  |  |  |  |             return | 
					
						
							|  |  |  |  |         if _token_rx.match(value): | 
					
						
							|  |  |  |  |             dtype = "TOKEN" | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             dtype = "CDATA" | 
					
						
							|  |  |  |  |         self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value))) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:42:27 +00:00
										 |  |  |  | def convert(ifp, ofp, table): | 
					
						
							|  |  |  |  |     c = Conversion(ifp, ofp, table) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     try: | 
					
						
							|  |  |  |  |         c.convert() | 
					
						
							|  |  |  |  |     except IOError, (err, msg): | 
					
						
							|  |  |  |  |         if err != errno.EPIPE: | 
					
						
							|  |  |  |  |             raise | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  | def skip_white(line): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     while line and line[0] in " %\n\t\r": | 
					
						
							| 
									
										
										
										
											2001-09-28 16:26:13 +00:00
										 |  |  |  |         line = line[1:].lstrip() | 
					
						
							| 
									
										
										
										
											1999-01-14 17:38:12 +00:00
										 |  |  |  |     return line | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | class TableEntry: | 
					
						
							|  |  |  |  |     def __init__(self, name, environment=0): | 
					
						
							|  |  |  |  |         self.name = name | 
					
						
							|  |  |  |  |         self.outputname = name | 
					
						
							|  |  |  |  |         self.environment = environment | 
					
						
							|  |  |  |  |         self.empty = not environment | 
					
						
							|  |  |  |  |         self.has_content = 0 | 
					
						
							|  |  |  |  |         self.verbatim = 0 | 
					
						
							|  |  |  |  |         self.auto_close = 0 | 
					
						
							|  |  |  |  |         self.parameters = [] | 
					
						
							|  |  |  |  |         self.closes = [] | 
					
						
							|  |  |  |  |         self.endcloses = [] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class Parameter: | 
					
						
							|  |  |  |  |     def __init__(self, type, name=None, optional=0): | 
					
						
							|  |  |  |  |         self.type = type | 
					
						
							|  |  |  |  |         self.name = name | 
					
						
							|  |  |  |  |         self.optional = optional | 
					
						
							|  |  |  |  |         self.text = '' | 
					
						
							|  |  |  |  |         self.implied = 0 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class TableParser(XMLParser): | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |     def __init__(self, table=None): | 
					
						
							|  |  |  |  |         if table is None: | 
					
						
							|  |  |  |  |             table = {} | 
					
						
							|  |  |  |  |         self.__table = table | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |         self.__current = None | 
					
						
							|  |  |  |  |         self.__buffer = '' | 
					
						
							|  |  |  |  |         XMLParser.__init__(self) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def get_table(self): | 
					
						
							|  |  |  |  |         for entry in self.__table.values(): | 
					
						
							|  |  |  |  |             if entry.environment and not entry.has_content: | 
					
						
							|  |  |  |  |                 p = Parameter("content") | 
					
						
							|  |  |  |  |                 p.implied = 1 | 
					
						
							|  |  |  |  |                 entry.parameters.append(p) | 
					
						
							|  |  |  |  |                 entry.has_content = 1 | 
					
						
							|  |  |  |  |         return self.__table | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def start_environment(self, attrs): | 
					
						
							|  |  |  |  |         name = attrs["name"] | 
					
						
							|  |  |  |  |         self.__current = TableEntry(name, environment=1) | 
					
						
							|  |  |  |  |         self.__current.verbatim = attrs.get("verbatim") == "yes" | 
					
						
							|  |  |  |  |         if attrs.has_key("outputname"): | 
					
						
							|  |  |  |  |             self.__current.outputname = attrs.get("outputname") | 
					
						
							| 
									
										
										
										
											2001-09-28 16:26:13 +00:00
										 |  |  |  |         self.__current.endcloses = attrs.get("endcloses", "").split() | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     def end_environment(self): | 
					
						
							|  |  |  |  |         self.end_macro() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def start_macro(self, attrs): | 
					
						
							|  |  |  |  |         name = attrs["name"] | 
					
						
							|  |  |  |  |         self.__current = TableEntry(name) | 
					
						
							| 
									
										
										
										
											2001-09-28 16:26:13 +00:00
										 |  |  |  |         self.__current.closes = attrs.get("closes", "").split() | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |         if attrs.has_key("outputname"): | 
					
						
							|  |  |  |  |             self.__current.outputname = attrs.get("outputname") | 
					
						
							|  |  |  |  |     def end_macro(self): | 
					
						
							|  |  |  |  |         self.__table[self.__current.name] = self.__current | 
					
						
							|  |  |  |  |         self.__current = None | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def start_attribute(self, attrs): | 
					
						
							|  |  |  |  |         name = attrs.get("name") | 
					
						
							|  |  |  |  |         optional = attrs.get("optional") == "yes" | 
					
						
							|  |  |  |  |         if name: | 
					
						
							|  |  |  |  |             p = Parameter("attribute", name, optional=optional) | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             p = Parameter("attribute", optional=optional) | 
					
						
							|  |  |  |  |         self.__current.parameters.append(p) | 
					
						
							|  |  |  |  |         self.__buffer = '' | 
					
						
							|  |  |  |  |     def end_attribute(self): | 
					
						
							|  |  |  |  |         self.__current.parameters[-1].text = self.__buffer | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-26 17:54:16 +00:00
										 |  |  |  |     def start_entityref(self, attrs): | 
					
						
							|  |  |  |  |         name = attrs["name"] | 
					
						
							|  |  |  |  |         p = Parameter("entityref", name) | 
					
						
							|  |  |  |  |         self.__current.parameters.append(p) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     def start_child(self, attrs): | 
					
						
							|  |  |  |  |         name = attrs["name"] | 
					
						
							|  |  |  |  |         p = Parameter("child", name, attrs.get("optional") == "yes") | 
					
						
							|  |  |  |  |         self.__current.parameters.append(p) | 
					
						
							|  |  |  |  |         self.__current.empty = 0 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def start_content(self, attrs): | 
					
						
							|  |  |  |  |         p = Parameter("content") | 
					
						
							|  |  |  |  |         p.implied = attrs.get("implied") == "yes" | 
					
						
							|  |  |  |  |         if self.__current.environment: | 
					
						
							|  |  |  |  |             p.implied = 1 | 
					
						
							|  |  |  |  |         self.__current.parameters.append(p) | 
					
						
							|  |  |  |  |         self.__current.has_content = 1 | 
					
						
							|  |  |  |  |         self.__current.empty = 0 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def start_text(self, attrs): | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  |         self.__current.empty = 0 | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |         self.__buffer = '' | 
					
						
							|  |  |  |  |     def end_text(self): | 
					
						
							|  |  |  |  |         p = Parameter("text") | 
					
						
							|  |  |  |  |         p.text = self.__buffer | 
					
						
							|  |  |  |  |         self.__current.parameters.append(p) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def handle_data(self, data): | 
					
						
							|  |  |  |  |         self.__buffer = self.__buffer + data | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-02 14:35:25 +00:00
										 |  |  |  | def load_table(fp, table=None): | 
					
						
							|  |  |  |  |     parser = TableParser(table=table) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     parser.feed(fp.read()) | 
					
						
							|  |  |  |  |     parser.close() | 
					
						
							|  |  |  |  |     return parser.get_table() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | def main(): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     global DEBUG | 
					
						
							|  |  |  |  |     # | 
					
						
							| 
									
										
										
										
											1999-07-29 22:42:27 +00:00
										 |  |  |  |     opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"]) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     for opt, arg in opts: | 
					
						
							| 
									
										
										
										
											1999-07-29 22:42:27 +00:00
										 |  |  |  |         if opt in ("-D", "--debug"): | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |             DEBUG = DEBUG + 1 | 
					
						
							|  |  |  |  |     if len(args) == 0: | 
					
						
							|  |  |  |  |         ifp = sys.stdin | 
					
						
							|  |  |  |  |         ofp = sys.stdout | 
					
						
							|  |  |  |  |     elif len(args) == 1: | 
					
						
							|  |  |  |  |         ifp = open(args) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  |         ofp = sys.stdout | 
					
						
							| 
									
										
										
										
											1999-07-29 22:22:13 +00:00
										 |  |  |  |     elif len(args) == 2: | 
					
						
							|  |  |  |  |         ifp = open(args[0]) | 
					
						
							|  |  |  |  |         ofp = open(args[1], "w") | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  |     else: | 
					
						
							|  |  |  |  |         usage() | 
					
						
							|  |  |  |  |         sys.exit(2) | 
					
						
							| 
									
										
										
										
											1999-07-29 22:42:27 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     table = load_table(open(os.path.join(sys.path[0], 'conversion.xml'))) | 
					
						
							|  |  |  |  |     convert(ifp, ofp, table) | 
					
						
							| 
									
										
										
										
											1998-11-23 16:59:39 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |  |     main() |