mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			260 lines
		
	
	
	
		
			8.9 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			260 lines
		
	
	
	
		
			8.9 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| '''Add syntax highlighting to Python source code'''
 | |
| 
 | |
| __author__ = 'Raymond Hettinger'
 | |
| 
 | |
| import keyword, tokenize, cgi, re, functools
 | |
| try:
 | |
|     import builtins
 | |
| except ImportError:
 | |
|     import __builtin__ as builtins
 | |
| 
 | |
| #### Analyze Python Source #################################
 | |
| 
 | |
| def is_builtin(s):
 | |
|     'Return True if s is the name of a builtin'
 | |
|     return hasattr(builtins, s)
 | |
| 
 | |
| def combine_range(lines, start, end):
 | |
|     'Join content from a range of lines between start and end'
 | |
|     (srow, scol), (erow, ecol) = start, end
 | |
|     if srow == erow:
 | |
|         return lines[srow-1][scol:ecol], end
 | |
|     rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
 | |
|     return ''.join(rows), end
 | |
| 
 | |
| def analyze_python(source):
 | |
|     '''Generate and classify chunks of Python for syntax highlighting.
 | |
|        Yields tuples in the form: (category, categorized_text).
 | |
|     '''
 | |
|     lines = source.splitlines(True)
 | |
|     lines.append('')
 | |
|     readline = functools.partial(next, iter(lines), '')
 | |
|     kind = tok_str = ''
 | |
|     tok_type = tokenize.COMMENT
 | |
|     written = (1, 0)
 | |
|     for tok in tokenize.generate_tokens(readline):
 | |
|         prev_tok_type, prev_tok_str = tok_type, tok_str
 | |
|         tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
 | |
|         kind = ''
 | |
|         if tok_type == tokenize.COMMENT:
 | |
|             kind = 'comment'
 | |
|         elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
 | |
|             kind = 'operator'
 | |
|         elif tok_type == tokenize.STRING:
 | |
|             kind = 'string'
 | |
|             if prev_tok_type == tokenize.INDENT or scol==0:
 | |
|                 kind = 'docstring'
 | |
|         elif tok_type == tokenize.NAME:
 | |
|             if tok_str in ('def', 'class', 'import', 'from'):
 | |
|                 kind = 'definition'
 | |
|             elif prev_tok_str in ('def', 'class'):
 | |
|                 kind = 'defname'
 | |
|             elif keyword.iskeyword(tok_str):
 | |
|                 kind = 'keyword'
 | |
|             elif is_builtin(tok_str) and prev_tok_str != '.':
 | |
|                 kind = 'builtin'
 | |
|         if kind:
 | |
|             text, written = combine_range(lines, written, (srow, scol))
 | |
|             yield '', text
 | |
|             text, written = tok_str, (erow, ecol)
 | |
|             yield kind, text
 | |
|     line_upto_token, written = combine_range(lines, written, (erow, ecol))
 | |
|     yield '', line_upto_token
 | |
| 
 | |
| #### Raw Output  ###########################################
 | |
| 
 | |
| def raw_highlight(classified_text):
 | |
|     'Straight text display of text classifications'
 | |
|     result = []
 | |
|     for kind, text in classified_text:
 | |
|         result.append('%15s:  %r\n' % (kind or 'plain', text))
 | |
|     return ''.join(result)
 | |
| 
 | |
| #### ANSI Output ###########################################
 | |
| 
 | |
| default_ansi = {
 | |
|     'comment': ('\033[0;31m', '\033[0m'),
 | |
|     'string': ('\033[0;32m', '\033[0m'),
 | |
|     'docstring': ('\033[0;32m', '\033[0m'),
 | |
|     'keyword': ('\033[0;33m', '\033[0m'),
 | |
|     'builtin': ('\033[0;35m', '\033[0m'),
 | |
|     'definition': ('\033[0;33m', '\033[0m'),
 | |
|     'defname': ('\033[0;34m', '\033[0m'),
 | |
|     'operator': ('\033[0;33m', '\033[0m'),
 | |
| }
 | |
| 
 | |
| def ansi_highlight(classified_text, colors=default_ansi):
 | |
|     'Add syntax highlighting to source code using ANSI escape sequences'
 | |
|     # http://en.wikipedia.org/wiki/ANSI_escape_code
 | |
|     result = []
 | |
|     for kind, text in classified_text:
 | |
|         opener, closer = colors.get(kind, ('', ''))
 | |
|         result += [opener, text, closer]
 | |
|     return ''.join(result)
 | |
| 
 | |
| #### HTML Output ###########################################
 | |
| 
 | |
| def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
 | |
|     'Convert classified text to an HTML fragment'
 | |
|     result = [opener]
 | |
|     for kind, text in classified_text:
 | |
|         if kind:
 | |
|             result.append('<span class="%s">' % kind)
 | |
|         result.append(cgi.escape(text))
 | |
|         if kind:
 | |
|             result.append('</span>')
 | |
|     result.append(closer)
 | |
|     return ''.join(result)
 | |
| 
 | |
| default_css = {
 | |
|     '.comment': '{color: crimson;}',
 | |
|     '.string':  '{color: forestgreen;}',
 | |
|     '.docstring': '{color: forestgreen; font-style:italic;}',
 | |
|     '.keyword': '{color: darkorange;}',
 | |
|     '.builtin': '{color: purple;}',
 | |
|     '.definition': '{color: darkorange; font-weight:bold;}',
 | |
|     '.defname': '{color: blue;}',
 | |
|     '.operator': '{color: brown;}',
 | |
| }
 | |
| 
 | |
| default_html = '''\
 | |
| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
 | |
|           "http://www.w3.org/TR/html4/strict.dtd">
 | |
| <html>
 | |
| <head>
 | |
| <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
 | |
| <title> {title} </title>
 | |
| <style type="text/css">
 | |
| {css}
 | |
| </style>
 | |
| </head>
 | |
| <body>
 | |
| {body}
 | |
| </body>
 | |
| </html>
 | |
| '''
 | |
| 
 | |
| def build_html_page(classified_text, title='python',
 | |
|                     css=default_css, html=default_html):
 | |
|     'Create a complete HTML page with colorized source code'
 | |
|     css_str = '\n'.join(['%s %s' % item for item in css.items()])
 | |
|     result = html_highlight(classified_text)
 | |
|     title = cgi.escape(title)
 | |
|     return html.format(title=title, css=css_str, body=result)
 | |
| 
 | |
| #### LaTeX Output ##########################################
 | |
| 
 | |
| default_latex_commands = {
 | |
|     'comment': '{\color{red}#1}',
 | |
|     'string': '{\color{ForestGreen}#1}',
 | |
|     'docstring': '{\emph{\color{ForestGreen}#1}}',
 | |
|     'keyword': '{\color{orange}#1}',
 | |
|     'builtin': '{\color{purple}#1}',
 | |
|     'definition': '{\color{orange}#1}',
 | |
|     'defname': '{\color{blue}#1}',
 | |
|     'operator': '{\color{brown}#1}',
 | |
| }
 | |
| 
 | |
| default_latex_document = r'''
 | |
| \documentclass{article}
 | |
| \usepackage{alltt}
 | |
| \usepackage{upquote}
 | |
| \usepackage{color}
 | |
| \usepackage[usenames,dvipsnames]{xcolor}
 | |
| \usepackage[cm]{fullpage}
 | |
| %(macros)s
 | |
| \begin{document}
 | |
| \center{\LARGE{%(title)s}}
 | |
| \begin{alltt}
 | |
| %(body)s
 | |
| \end{alltt}
 | |
| \end{document}
 | |
| '''
 | |
| 
 | |
| def alltt_escape(s):
 | |
|     'Replace backslash and braces with their escaped equivalents'
 | |
|     xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
 | |
|     return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)
 | |
| 
 | |
| def latex_highlight(classified_text, title = 'python',
 | |
|                     commands = default_latex_commands,
 | |
|                     document = default_latex_document):
 | |
|     'Create a complete LaTeX document with colorized source code'
 | |
|     macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
 | |
|     result = []
 | |
|     for kind, text in classified_text:
 | |
|         if kind:
 | |
|             result.append(r'\py%s{' % kind)
 | |
|         result.append(alltt_escape(text))
 | |
|         if kind:
 | |
|             result.append('}')
 | |
|     return default_latex_document % dict(title=title, macros=macros, body=''.join(result))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     import sys, argparse, webbrowser, os, textwrap
 | |
| 
 | |
|     parser = argparse.ArgumentParser(
 | |
|             description = 'Add syntax highlighting to Python source code',
 | |
|             formatter_class=argparse.RawDescriptionHelpFormatter,
 | |
|             epilog = textwrap.dedent('''
 | |
|                 examples:
 | |
| 
 | |
|                   # Show syntax highlighted code in the terminal window
 | |
|                   $ ./highlight.py myfile.py
 | |
| 
 | |
|                   # Colorize myfile.py and display in a browser
 | |
|                   $ ./highlight.py -b myfile.py
 | |
| 
 | |
|                   # Create an HTML section to embed in an existing webpage
 | |
|                   ./highlight.py -s myfile.py
 | |
| 
 | |
|                   # Create a complete HTML file
 | |
|                   $ ./highlight.py -c myfile.py > myfile.html
 | |
| 
 | |
|                   # Create a PDF using LaTeX
 | |
|                   $ ./highlight.py -l myfile.py | pdflatex
 | |
| 
 | |
|             '''))
 | |
|     parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
 | |
|             help = 'file containing Python sourcecode')
 | |
|     parser.add_argument('-b', '--browser', action = 'store_true',
 | |
|             help = 'launch a browser to show results')
 | |
|     parser.add_argument('-c', '--complete', action = 'store_true',
 | |
|             help = 'build a complete html webpage')
 | |
|     parser.add_argument('-l', '--latex', action = 'store_true',
 | |
|             help = 'build a LaTeX document')
 | |
|     parser.add_argument('-r', '--raw', action = 'store_true',
 | |
|             help = 'raw parse of categorized text')
 | |
|     parser.add_argument('-s', '--section', action = 'store_true',
 | |
|             help = 'show an HTML section rather than a complete webpage')
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     if args.section and (args.browser or args.complete):
 | |
|         parser.error('The -s/--section option is incompatible with '
 | |
|                      'the -b/--browser or -c/--complete options')
 | |
| 
 | |
|     sourcefile = args.sourcefile
 | |
|     with open(sourcefile) as f:
 | |
|         source = f.read()
 | |
|     classified_text = analyze_python(source)
 | |
| 
 | |
|     if args.raw:
 | |
|         encoded = raw_highlight(classified_text)
 | |
|     elif args.complete or args.browser:
 | |
|         encoded = build_html_page(classified_text, title=sourcefile)
 | |
|     elif args.section:
 | |
|         encoded = html_highlight(classified_text)
 | |
|     elif args.latex:
 | |
|         encoded = latex_highlight(classified_text, title=sourcefile)
 | |
|     else:
 | |
|         encoded = ansi_highlight(classified_text)
 | |
| 
 | |
|     if args.browser:
 | |
|         htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
 | |
|         with open(htmlfile, 'w') as f:
 | |
|             f.write(encoded)
 | |
|         webbrowser.open('file://' + os.path.abspath(htmlfile))
 | |
|     else:
 | |
|         sys.stdout.write(encoded)
 | 
