| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Written by Raymond D. Hettinger <python at rcn.com> | 
					
						
							|  |  |  |    Copyright (c) 2003 Python Software Foundation.  All rights reserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Designed to catch common markup errors including: | 
					
						
							|  |  |  | * Unbalanced or mismatched parenthesis, brackets, and braces. | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  | * Unbalanced or mismatched \\begin and \\end blocks. | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | * Misspelled or invalid LaTeX commands. | 
					
						
							|  |  |  | * Use of forward slashes instead of backslashes for commands. | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  | * Table line size mismatches. | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  | Sample command line usage: | 
					
						
							|  |  |  |     python texcheck.py -k chapterheading -m lib/librandomtex *.tex | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Options: | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  |     -m          Munge parenthesis and brackets. [0,n) would normally mismatch. | 
					
						
							|  |  |  |     -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     -d:         Delimiter check only (useful for non-LaTeX files). | 
					
						
							|  |  |  |     -h:         Help | 
					
						
							|  |  |  |     -s lineno:  Start at lineno (useful for skipping complex sections). | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |     -v:         Verbose.  Trace the matching of //begin and //end blocks. | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | import sets | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import getopt | 
					
						
							|  |  |  | from itertools import izip, count, islice | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  | import glob | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | cmdstr = r"""
 | 
					
						
							|  |  |  |     \section \module \declaremodule \modulesynopsis \moduleauthor | 
					
						
							|  |  |  |     \sectionauthor \versionadded \code \class \method \begin | 
					
						
							|  |  |  |     \optional \var \ref \end \subsection \lineiii \hline \label | 
					
						
							|  |  |  |     \indexii \textrm \ldots \keyword \stindex \index \item \note | 
					
						
							|  |  |  |     \withsubitem \ttindex \footnote \citetitle \samp \opindex | 
					
						
							|  |  |  |     \noindent \exception \strong \dfn \ctype \obindex \character | 
					
						
							|  |  |  |     \indexiii \function \bifuncindex \refmodule \refbimodindex | 
					
						
							|  |  |  |     \subsubsection \nodename \member \chapter \emph \ASCII \UNIX | 
					
						
							|  |  |  |     \regexp \program \production \token \productioncont \term | 
					
						
							|  |  |  |     \grammartoken \lineii \seemodule \file \EOF \documentclass | 
					
						
							|  |  |  |     \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp | 
					
						
							|  |  |  |     \tableofcontents \kbd \programopt \envvar \refstmodindex | 
					
						
							|  |  |  |     \cfunction \constant \NULL \moreargs \cfuncline \cdata | 
					
						
							|  |  |  |     \textasciicircum \n \ABC \setindexsubitem \versionchanged | 
					
						
							|  |  |  |     \deprecated \seetext \newcommand \POSIX \pep \warning \rfc | 
					
						
							|  |  |  |     \verbatiminput \methodline \textgreater \seetitle \lineiv | 
					
						
							|  |  |  |     \funclineni \ulink \manpage \funcline \dataline \unspecified | 
					
						
							|  |  |  |     \textbackslash \mimetype \mailheader \seepep \textunderscore | 
					
						
							|  |  |  |     \longprogramopt \infinity \plusminus \shortversion \version | 
					
						
							|  |  |  |     \refmodindex \seerfc \makeindex \makemodindex \renewcommand | 
					
						
							| 
									
										
										
										
											2003-05-10 07:41:55 +00:00
										 |  |  |     \indexname \appendix \protect \indexiv \mbox \textasciitilde | 
					
						
							|  |  |  |     \platform \seeurl \leftmargin \labelwidth \localmoduletable | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  |     \LaTeX \copyright \memberline \backslash \pi \centerline | 
					
						
							|  |  |  |     \caption \vspace \textwidth \menuselection \textless | 
					
						
							|  |  |  |     \makevar \csimplemacro \menuselection \bfcode \sub \release | 
					
						
							|  |  |  |     \email \kwindex \refexmodindex \filenq \e \menuselection | 
					
						
							|  |  |  |     \exindex \linev \newsgroup \verbatim \setshortversion | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def matchclose(c_lineno, c_symbol, openers, pairmap): | 
					
						
							|  |  |  |     "Verify that closing delimiter matches most recent opening delimiter" | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         o_lineno, o_symbol = openers.pop() | 
					
						
							|  |  |  |     except IndexError: | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |         print "\nDelimiter mismatch.  On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) | 
					
						
							|  |  |  |         return | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     if o_symbol in pairmap.get(c_symbol, [c_symbol]): return | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |     print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) | 
					
						
							|  |  |  |     return | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def checkit(source, opts, morecmds=[]): | 
					
						
							| 
									
										
										
										
											2003-05-10 07:41:55 +00:00
										 |  |  |     """Check the LaTeX formatting in a sequence of lines.
 | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Opts is a mapping of options to option values if any: | 
					
						
							|  |  |  |         -m          munge parenthesis and brackets | 
					
						
							|  |  |  |         -d          delimiters only checking | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |         -v          verbose trace of delimiter matching | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |         -s lineno:  linenumber to start scan (default is 1). | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-10 07:41:55 +00:00
										 |  |  |     Morecmds is a sequence of LaTeX commands (without backslashes) that | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     are to be considered valid in the scan. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     texcmd = re.compile(r'\\[A-Za-z]+') | 
					
						
							| 
									
										
										
										
											2003-05-14 18:15:55 +00:00
										 |  |  |     falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     validcmds = sets.Set(cmdstr.split()) | 
					
						
							|  |  |  |     for cmd in morecmds: | 
					
						
							|  |  |  |         validcmds.add('\\' + cmd) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if '-m' in opts: | 
					
						
							|  |  |  |         pairmap = {']':'[(', ')':'(['}      # Munged openers | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         pairmap = {']':'[', ')':'('}        # Normal opener for a given closer | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |     openpunct = sets.Set('([')              # Set of valid openers | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  |     braces = re.compile(r'({)|(})') | 
					
						
							| 
									
										
										
										
											2003-08-25 04:39:55 +00:00
										 |  |  |     doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') | 
					
						
							| 
									
										
										
										
											2003-09-08 18:43:46 +00:00
										 |  |  |     spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     openers = []                            # Stack of pending open delimiters | 
					
						
							|  |  |  |     bracestack = []                         # Stack of pending open braces | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-10 07:41:55 +00:00
										 |  |  |     tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') | 
					
						
							|  |  |  |     tableline = re.compile(r'\\line([iv]+){') | 
					
						
							|  |  |  |     tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') | 
					
						
							|  |  |  |     tablelevel = '' | 
					
						
							|  |  |  |     tablestartline = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     startline = int(opts.get('-s', '1')) | 
					
						
							|  |  |  |     lineno = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for lineno, line in izip(count(startline), islice(source, startline-1, None)): | 
					
						
							|  |  |  |         line = line.rstrip() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |         # Check balancing of open/close parenthesis, brackets, and begin/end blocks | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |         for begend, name, punct in delimiters.findall(line): | 
					
						
							|  |  |  |             if '-v' in opts: | 
					
						
							|  |  |  |                 print lineno, '|', begend, name, punct, | 
					
						
							|  |  |  |             if begend == 'begin' and '-d' not in opts: | 
					
						
							|  |  |  |                 openers.append((lineno, name)) | 
					
						
							|  |  |  |             elif punct in openpunct: | 
					
						
							|  |  |  |                 openers.append((lineno, punct)) | 
					
						
							|  |  |  |             elif begend == 'end' and '-d' not in opts: | 
					
						
							|  |  |  |                 matchclose(lineno, name, openers, pairmap) | 
					
						
							|  |  |  |             elif punct in pairmap: | 
					
						
							|  |  |  |                 matchclose(lineno, punct, openers, pairmap) | 
					
						
							|  |  |  |             if '-v' in opts: | 
					
						
							|  |  |  |                 print '   --> ', openers | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  |         # Balance opening and closing braces | 
					
						
							|  |  |  |         for open, close in braces.findall(line): | 
					
						
							|  |  |  |             if open == '{': | 
					
						
							|  |  |  |                 bracestack.append(lineno) | 
					
						
							|  |  |  |             if close == '}': | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     bracestack.pop() | 
					
						
							|  |  |  |                 except IndexError: | 
					
						
							|  |  |  |                     print r'Warning, unmatched } on line %s.' % (lineno,) | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Optionally, skip LaTeX specific checks | 
					
						
							|  |  |  |         if '-d' in opts: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Warn whenever forward slashes encountered with a LaTeX command | 
					
						
							|  |  |  |         for cmd in falsetexcmd.findall(line): | 
					
						
							|  |  |  |             if '822' in line or '.html' in line: | 
					
						
							|  |  |  |                 continue    # Ignore false positives for urls and for /rfc822 | 
					
						
							|  |  |  |             if '\\' + cmd in validcmds: | 
					
						
							|  |  |  |                 print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-09-08 18:43:46 +00:00
										 |  |  |         # Check for markup requiring {} for correct spacing | 
					
						
							|  |  |  |         for cmd in spacingmarkup.findall(line): | 
					
						
							|  |  |  |             print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno) | 
					
						
							| 
									
										
										
										
											2003-09-08 17:33:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |         # Validate commands | 
					
						
							|  |  |  |         nc = line.find(r'\newcommand') | 
					
						
							|  |  |  |         if nc != -1: | 
					
						
							|  |  |  |             start = line.find('{', nc) | 
					
						
							|  |  |  |             end = line.find('}', start) | 
					
						
							|  |  |  |             validcmds.add(line[start+1:end]) | 
					
						
							|  |  |  |         for cmd in texcmd.findall(line): | 
					
						
							|  |  |  |             if cmd not in validcmds: | 
					
						
							|  |  |  |                 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-10 09:04:37 +00:00
										 |  |  |         # Check table levels (make sure lineii only inside tableii) | 
					
						
							| 
									
										
										
										
											2003-05-10 07:41:55 +00:00
										 |  |  |         m = tablestart.search(line) | 
					
						
							|  |  |  |         if m: | 
					
						
							|  |  |  |             tablelevel = m.group(1) | 
					
						
							|  |  |  |             tablestartline = lineno | 
					
						
							|  |  |  |         m = tableline.search(line) | 
					
						
							|  |  |  |         if m and m.group(1) != tablelevel: | 
					
						
							|  |  |  |             print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) | 
					
						
							|  |  |  |         if tableend.search(line): | 
					
						
							|  |  |  |             tablelevel = '' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |         # Style guide warnings | 
					
						
							|  |  |  |         if 'e.g.' in line or 'i.e.' in line: | 
					
						
							|  |  |  |             print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-08-25 04:39:55 +00:00
										 |  |  |         for dw in doubledwords.findall(line): | 
					
						
							|  |  |  |             print r'Doubled word warning.  "%s" on line %d' % (dw, lineno) | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-10 09:04:37 +00:00
										 |  |  |     lastline = lineno | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     for lineno, symbol in openers: | 
					
						
							| 
									
										
										
										
											2003-05-10 07:41:55 +00:00
										 |  |  |         print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) | 
					
						
							| 
									
										
										
										
											2003-05-12 23:33:28 +00:00
										 |  |  |     for lineno in bracestack: | 
					
						
							|  |  |  |         print "Unmatched { on line %d" % (lineno,) | 
					
						
							| 
									
										
										
										
											2003-05-10 09:04:37 +00:00
										 |  |  |     print 'Done checking %d lines.' % (lastline,) | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     return 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(args=None): | 
					
						
							|  |  |  |     if args is None: | 
					
						
							|  |  |  |         args = sys.argv[1:] | 
					
						
							| 
									
										
										
										
											2003-05-14 18:15:55 +00:00
										 |  |  |     optitems, arglist = getopt.getopt(args, "k:mdhs:v") | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  |     opts = dict(optitems) | 
					
						
							|  |  |  |     if '-h' in opts or args==[]: | 
					
						
							|  |  |  |         print __doc__ | 
					
						
							|  |  |  |         return 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if len(arglist) < 1: | 
					
						
							|  |  |  |         print 'Please specify a file to be checked' | 
					
						
							|  |  |  |         return 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |     for i, filespec in enumerate(arglist): | 
					
						
							|  |  |  |         if '*' in filespec or '?' in filespec: | 
					
						
							|  |  |  |             arglist[i:i+1] = glob.glob(filespec) | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-05-16 03:06:39 +00:00
										 |  |  |     morecmds = [v for k,v in optitems if k=='-k'] | 
					
						
							|  |  |  |     err = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for filename in arglist: | 
					
						
							|  |  |  |         print '=' * 30 | 
					
						
							|  |  |  |         print "Checking", filename | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             f = open(filename) | 
					
						
							|  |  |  |         except IOError: | 
					
						
							|  |  |  |             print 'Cannot open file %s.' % arglist[0] | 
					
						
							|  |  |  |             return 2 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             err.append(checkit(f, opts, morecmds)) | 
					
						
							|  |  |  |         finally: | 
					
						
							|  |  |  |             f.close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return max(err) | 
					
						
							| 
									
										
										
										
											2003-05-10 03:30:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     sys.exit(main()) | 
					
						
							|  |  |  | 
 |