| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | #! /usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The output file has an extension of '.bkm' instead of '.out', since hyperref | 
					
						
							| 
									
										
										
										
											1998-10-07 14:12:20 +00:00
										 |  |  | already uses that extension. | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-07 15:34:50 +00:00
										 |  |  | import getopt | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | import os | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | import string | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Ench item in an entry is a tuple of: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #   Section #,  Title String,  Page #,  List of Sub-entries | 
					
						
							| 
									
										
										
										
											1998-05-14 20:07:10 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # The return value of parse_toc() is such a tuple. | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | cline_re = r"""^
 | 
					
						
							|  |  |  | \\contentsline\ \{([a-z]*)}             # type of section in $1 | 
					
						
							|  |  |  | \{(?:\\numberline\ \{([0-9.A-Z]+)})?     # section number | 
					
						
							|  |  |  | (.*)}                                   # title string | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  | \{(\d+)}$"""                            # page number
 | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | cline_rx = re.compile(cline_re, re.VERBOSE) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | OUTER_TO_INNER = -1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _transition_map = { | 
					
						
							|  |  |  |     ('chapter', 'section'): OUTER_TO_INNER, | 
					
						
							|  |  |  |     ('section', 'subsection'): OUTER_TO_INNER, | 
					
						
							|  |  |  |     ('subsection', 'subsubsection'): OUTER_TO_INNER, | 
					
						
							|  |  |  |     ('subsubsection', 'subsection'): 1, | 
					
						
							|  |  |  |     ('subsection', 'section'): 1, | 
					
						
							|  |  |  |     ('section', 'chapter'): 1, | 
					
						
							|  |  |  |     ('subsection', 'chapter'): 2, | 
					
						
							|  |  |  |     ('subsubsection', 'section'): 2, | 
					
						
							|  |  |  |     ('subsubsection', 'chapter'): 3, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-10-07 14:12:20 +00:00
										 |  |  | INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-07 15:34:50 +00:00
										 |  |  | def parse_toc(fp, bigpart=None): | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     toc = top = [] | 
					
						
							|  |  |  |     stack = [toc] | 
					
						
							| 
									
										
										
										
											1998-03-07 15:34:50 +00:00
										 |  |  |     level = bigpart or 'chapter' | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     lineno = 0 | 
					
						
							|  |  |  |     while 1: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         line = fp.readline() | 
					
						
							|  |  |  |         if not line: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         lineno = lineno + 1 | 
					
						
							|  |  |  |         m = cline_rx.match(line) | 
					
						
							|  |  |  |         if m: | 
					
						
							|  |  |  |             stype, snum, title, pageno = m.group(1, 2, 3, 4) | 
					
						
							|  |  |  |             title = clean_title(title) | 
					
						
							|  |  |  |             entry = (stype, snum, title, string.atoi(pageno), []) | 
					
						
							|  |  |  |             if stype == level: | 
					
						
							|  |  |  |                 toc.append(entry) | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											1998-10-07 14:12:20 +00:00
										 |  |  |                 if stype not in INCLUDED_LEVELS: | 
					
						
							|  |  |  |                     # we don't want paragraphs & subparagraphs | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |                 direction = _transition_map[(level, stype)] | 
					
						
							|  |  |  |                 if direction == OUTER_TO_INNER: | 
					
						
							|  |  |  |                     toc = toc[-1][-1] | 
					
						
							|  |  |  |                     stack.insert(0, toc) | 
					
						
							|  |  |  |                     toc.append(entry) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     for i in range(direction): | 
					
						
							|  |  |  |                         del stack[0] | 
					
						
							|  |  |  |                         toc = stack[0] | 
					
						
							|  |  |  |                     toc.append(entry) | 
					
						
							|  |  |  |                 level = stype | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             sys.stderr.write("l.%s: " + line) | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     return top | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-10 14:02:35 +00:00
										 |  |  | hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}") | 
					
						
							|  |  |  | raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}") | 
					
						
							|  |  |  | title_rx = re.compile(r"\\([a-zA-Z])+\s+") | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | title_trans = string.maketrans("", "") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def clean_title(title): | 
					
						
							| 
									
										
										
										
											1998-03-10 14:02:35 +00:00
										 |  |  |     title = raisebox_rx.sub("", title) | 
					
						
							|  |  |  |     title = hackscore_rx.sub(r"\\_", title) | 
					
						
							|  |  |  |     pos = 0 | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     while 1: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         m = title_rx.search(title, pos) | 
					
						
							|  |  |  |         if m: | 
					
						
							|  |  |  |             start = m.start() | 
					
						
							|  |  |  |             if title[start:start+15] != "\\textunderscore": | 
					
						
							|  |  |  |                 title = title[:start] + title[m.end():] | 
					
						
							|  |  |  |             pos = start + 1 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											1998-03-10 14:02:35 +00:00
										 |  |  |     title = string.translate(title, title_trans, "{}") | 
					
						
							|  |  |  |     return title | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def write_toc(toc, fp): | 
					
						
							|  |  |  |     for entry in toc: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         write_toc_entry(entry, fp, 0) | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def write_toc_entry(entry, fp, layer): | 
					
						
							|  |  |  |     stype, snum, title, pageno, toc = entry | 
					
						
							| 
									
										
										
										
											1998-04-15 17:50:22 +00:00
										 |  |  |     s = "\\pdfoutline goto name{page%03d}" % pageno | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     if toc: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         s = "%s count -%d" % (s, len(toc)) | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     if snum: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         title = "%s %s" % (snum, title) | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  |     s = "%s {%s}\n" % (s, title) | 
					
						
							|  |  |  |     fp.write(s) | 
					
						
							|  |  |  |     for entry in toc: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         write_toc_entry(entry, fp, layer + 1) | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-03-03 19:25:56 +00:00
										 |  |  | def process(ifn, ofn, bigpart=None): | 
					
						
							|  |  |  |     toc = parse_toc(open(ifn), bigpart) | 
					
						
							|  |  |  |     write_toc(toc, open(ofn, "w")) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | def main(): | 
					
						
							| 
									
										
										
										
											1998-03-07 15:34:50 +00:00
										 |  |  |     bigpart = None | 
					
						
							|  |  |  |     opts, args = getopt.getopt(sys.argv[1:], "c:") | 
					
						
							|  |  |  |     if opts: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         bigpart = opts[0][1] | 
					
						
							| 
									
										
										
										
											1998-03-07 15:34:50 +00:00
										 |  |  |     if not args: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         usage() | 
					
						
							|  |  |  |         sys.exit(2) | 
					
						
							| 
									
										
										
										
											1998-03-07 15:34:50 +00:00
										 |  |  |     for filename in args: | 
					
						
							| 
									
										
										
										
											2000-10-07 12:50:05 +00:00
										 |  |  |         base, ext = os.path.splitext(filename) | 
					
						
							|  |  |  |         ext = ext or ".toc" | 
					
						
							| 
									
										
										
										
											1999-03-03 19:25:56 +00:00
										 |  |  |         process(base + ext, base + ".bkm", bigpart) | 
					
						
							| 
									
										
										
										
											1998-03-06 21:29:00 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |