mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			233 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			233 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
 | |
| 
 | |
|    Written by Raymond D. Hettinger <python at rcn.com>
 | |
|    Copyright (c) 2003 Python Software Foundation.  All rights reserved.
 | |
| 
 | |
| Designed to catch common markup errors including:
 | |
| * Unbalanced or mismatched parenthesis, brackets, and braces.
 | |
| * Unbalanced or mismatched \\begin and \\end blocks.
 | |
| * Misspelled or invalid LaTeX commands.
 | |
| * Use of forward slashes instead of backslashes for commands.
 | |
| * Table line size mismatches.
 | |
| 
 | |
| Sample command line usage:
 | |
|     python texcheck.py -k chapterheading -m lib/librandomtex *.tex
 | |
| 
 | |
| Options:
 | |
|     -m          Munge parenthesis and brackets. [0,n) would normally mismatch.
 | |
|     -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
 | |
|     -d:         Delimiter check only (useful for non-LaTeX files).
 | |
|     -h:         Help
 | |
|     -s lineno:  Start at lineno (useful for skipping complex sections).
 | |
|     -v:         Verbose.  Trace the matching of //begin and //end blocks.
 | |
| """
 | |
| 
 | |
| import re
 | |
| import sys
 | |
| import getopt
 | |
| from itertools import izip, count, islice
 | |
| import glob
 | |
| 
 | |
| cmdstr = r"""
 | |
|     \section \module \declaremodule \modulesynopsis \moduleauthor
 | |
|     \sectionauthor \versionadded \code \class \method \begin
 | |
|     \optional \var \ref \end \subsection \lineiii \hline \label
 | |
|     \indexii \textrm \ldots \keyword \stindex \index \item \note
 | |
|     \withsubitem \ttindex \footnote \citetitle \samp \opindex
 | |
|     \noindent \exception \strong \dfn \ctype \obindex \character
 | |
|     \indexiii \function \bifuncindex \refmodule \refbimodindex
 | |
|     \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
 | |
|     \regexp \program \production \token \productioncont \term
 | |
|     \grammartoken \lineii \seemodule \file \EOF \documentclass
 | |
|     \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
 | |
|     \tableofcontents \kbd \programopt \envvar \refstmodindex
 | |
|     \cfunction \constant \NULL \moreargs \cfuncline \cdata
 | |
|     \textasciicircum \n \ABC \setindexsubitem \versionchanged
 | |
|     \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
 | |
|     \verbatiminput \methodline \textgreater \seetitle \lineiv
 | |
|     \funclineni \ulink \manpage \funcline \dataline \unspecified
 | |
|     \textbackslash \mimetype \mailheader \seepep \textunderscore
 | |
|     \longprogramopt \infinity \plusminus \shortversion \version
 | |
|     \refmodindex \seerfc \makeindex \makemodindex \renewcommand
 | |
|     \indexname \appendix \protect \indexiv \mbox \textasciitilde
 | |
|     \platform \seeurl \leftmargin \labelwidth \localmoduletable
 | |
|     \LaTeX \copyright \memberline \backslash \pi \centerline
 | |
|     \caption \vspace \textwidth \menuselection \textless
 | |
|     \makevar \csimplemacro \menuselection \bfcode \sub \release
 | |
|     \email \kwindex \refexmodindex \filenq \e \menuselection
 | |
|     \exindex \linev \newsgroup \verbatim \setshortversion
 | |
|     \author \authoraddress \paragraph \subparagraph \cmemberline
 | |
|     \textbar \C \seelink
 | |
| """
 | |
| 
 | |
| def matchclose(c_lineno, c_symbol, openers, pairmap):
 | |
|     "Verify that closing delimiter matches most recent opening delimiter"
 | |
|     try:
 | |
|         o_lineno, o_symbol = openers.pop()
 | |
|     except IndexError:
 | |
|         print "\nDelimiter mismatch.  On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
 | |
|         return
 | |
|     if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
 | |
|     print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
 | |
|     return
 | |
| 
 | |
| def checkit(source, opts, morecmds=[]):
 | |
|     """Check the LaTeX formatting in a sequence of lines.
 | |
| 
 | |
|     Opts is a mapping of options to option values if any:
 | |
|         -m          munge parenthesis and brackets
 | |
|         -d          delimiters only checking
 | |
|         -v          verbose trace of delimiter matching
 | |
|         -s lineno:  linenumber to start scan (default is 1).
 | |
| 
 | |
|     Morecmds is a sequence of LaTeX commands (without backslashes) that
 | |
|     are to be considered valid in the scan.
 | |
|     """
 | |
| 
 | |
|     texcmd = re.compile(r'\\[A-Za-z]+')
 | |
|     falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
 | |
| 
 | |
|     validcmds = set(cmdstr.split())
 | |
|     for cmd in morecmds:
 | |
|         validcmds.add('\\' + cmd)
 | |
| 
 | |
|     if '-m' in opts:
 | |
|         pairmap = {']':'[(', ')':'(['}      # Munged openers
 | |
|     else:
 | |
|         pairmap = {']':'[', ')':'('}        # Normal opener for a given closer
 | |
|     openpunct = set('([')                   # Set of valid openers
 | |
| 
 | |
|     delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
 | |
|     braces = re.compile(r'({)|(})')
 | |
|     doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')
 | |
|     spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')
 | |
| 
 | |
|     openers = []                            # Stack of pending open delimiters
 | |
|     bracestack = []                         # Stack of pending open braces
 | |
| 
 | |
|     tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
 | |
|     tableline = re.compile(r'\\line([iv]+){')
 | |
|     tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
 | |
|     tablelevel = ''
 | |
|     tablestartline = 0
 | |
| 
 | |
|     startline = int(opts.get('-s', '1'))
 | |
|     lineno = 0
 | |
| 
 | |
|     for lineno, line in izip(count(startline), islice(source, startline-1, None)):
 | |
|         line = line.rstrip()
 | |
| 
 | |
|         # Check balancing of open/close parenthesis, brackets, and begin/end blocks
 | |
|         for begend, name, punct in delimiters.findall(line):
 | |
|             if '-v' in opts:
 | |
|                 print lineno, '|', begend, name, punct,
 | |
|             if begend == 'begin' and '-d' not in opts:
 | |
|                 openers.append((lineno, name))
 | |
|             elif punct in openpunct:
 | |
|                 openers.append((lineno, punct))
 | |
|             elif begend == 'end' and '-d' not in opts:
 | |
|                 matchclose(lineno, name, openers, pairmap)
 | |
|             elif punct in pairmap:
 | |
|                 matchclose(lineno, punct, openers, pairmap)
 | |
|             if '-v' in opts:
 | |
|                 print '   --> ', openers
 | |
| 
 | |
|         # Balance opening and closing braces
 | |
|         for open, close in braces.findall(line):
 | |
|             if open == '{':
 | |
|                 bracestack.append(lineno)
 | |
|             if close == '}':
 | |
|                 try:
 | |
|                     bracestack.pop()
 | |
|                 except IndexError:
 | |
|                     print r'Warning, unmatched } on line %s.' % (lineno,)
 | |
| 
 | |
|         # Optionally, skip LaTeX specific checks
 | |
|         if '-d' in opts:
 | |
|             continue
 | |
| 
 | |
|         # Warn whenever forward slashes encountered with a LaTeX command
 | |
|         for cmd in falsetexcmd.findall(line):
 | |
|             if '822' in line or '.html' in line:
 | |
|                 continue    # Ignore false positives for urls and for /rfc822
 | |
|             if '\\' + cmd in validcmds:
 | |
|                 print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
 | |
| 
 | |
|         # Check for markup requiring {} for correct spacing
 | |
|         for cmd in spacingmarkup.findall(line):
 | |
|             print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)
 | |
| 
 | |
|         # Validate commands
 | |
|         nc = line.find(r'\newcommand')
 | |
|         if nc != -1:
 | |
|             start = line.find('{', nc)
 | |
|             end = line.find('}', start)
 | |
|             validcmds.add(line[start+1:end])
 | |
|         for cmd in texcmd.findall(line):
 | |
|             if cmd not in validcmds:
 | |
|                 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
 | |
| 
 | |
|         # Check table levels (make sure lineii only inside tableii)
 | |
|         m = tablestart.search(line)
 | |
|         if m:
 | |
|             tablelevel = m.group(1)
 | |
|             tablestartline = lineno
 | |
|         m = tableline.search(line)
 | |
|         if m and m.group(1) != tablelevel:
 | |
|             print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
 | |
|         if tableend.search(line):
 | |
|             tablelevel = ''
 | |
| 
 | |
|         # Style guide warnings
 | |
|         if 'e.g.' in line or 'i.e.' in line:
 | |
|             print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)
 | |
| 
 | |
|         for dw in doubledwords.findall(line):
 | |
|             print r'Doubled word warning.  "%s" on line %d' % (dw, lineno)
 | |
| 
 | |
|     lastline = lineno
 | |
|     for lineno, symbol in openers:
 | |
|         print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
 | |
|     for lineno in bracestack:
 | |
|         print "Unmatched { on line %d" % (lineno,)
 | |
|     print 'Done checking %d lines.' % (lastline,)
 | |
|     return 0
 | |
| 
 | |
| def main(args=None):
 | |
|     if args is None:
 | |
|         args = sys.argv[1:]
 | |
|     optitems, arglist = getopt.getopt(args, "k:mdhs:v")
 | |
|     opts = dict(optitems)
 | |
|     if '-h' in opts or args==[]:
 | |
|         print __doc__
 | |
|         return 0
 | |
| 
 | |
|     if len(arglist) < 1:
 | |
|         print 'Please specify a file to be checked'
 | |
|         return 1
 | |
| 
 | |
|     for i, filespec in enumerate(arglist):
 | |
|         if '*' in filespec or '?' in filespec:
 | |
|             arglist[i:i+1] = glob.glob(filespec)
 | |
| 
 | |
|     morecmds = [v for k,v in optitems if k=='-k']
 | |
|     err = []
 | |
| 
 | |
|     for filename in arglist:
 | |
|         print '=' * 30
 | |
|         print "Checking", filename
 | |
|         try:
 | |
|             f = open(filename)
 | |
|         except IOError:
 | |
|             print 'Cannot open file %s.' % arglist[0]
 | |
|             return 2
 | |
| 
 | |
|         try:
 | |
|             err.append(checkit(f, opts, morecmds))
 | |
|         finally:
 | |
|             f.close()
 | |
| 
 | |
|     return max(err)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     sys.exit(main())
 | 
