mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
	
	
		
			155 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			155 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								"""Module to analyze Python source code; for syntax coloring tools.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Interface:
							 | 
						||
| 
								 | 
							
									tags = fontify(pytext, searchfrom, searchto)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								The 'pytext' argument is a string containing Python source code.
							 | 
						||
| 
								 | 
							
								The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext. 
							 | 
						||
| 
								 | 
							
								The returned value is a list of tuples, formatted like this:
							 | 
						||
| 
								 | 
							
									[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
							 | 
						||
| 
								 | 
							
								The tuple contents are always like this:
							 | 
						||
| 
								 | 
							
									(tag, startindex, endindex, sublist)
							 | 
						||
| 
								 | 
							
								tag is one of 'keyword', 'string', 'comment' or 'identifier'
							 | 
						||
| 
								 | 
							
								sublist is not used, hence always None. 
							 | 
						||
| 
								 | 
							
								"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Based on FontText.py by Mitchell S. Chapman,
							 | 
						||
| 
								 | 
							
								# which was modified by Zachary Roadhouse,
							 | 
						||
| 
								 | 
							
								# then un-Tk'd by Just van Rossum.
							 | 
						||
| 
								 | 
							
								# Many thanks for regular expression debugging & authoring are due to:
							 | 
						||
| 
								 | 
							
								#	Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
							 | 
						||
| 
								 | 
							
								# So, who owns the copyright? ;-) How about this:
							 | 
						||
| 
								 | 
							
								# Copyright 1996-1997: 
							 | 
						||
| 
								 | 
							
								#	Mitchell S. Chapman,
							 | 
						||
| 
								 | 
							
								#	Zachary Roadhouse,
							 | 
						||
| 
								 | 
							
								#	Tim Peters,
							 | 
						||
| 
								 | 
							
								#	Just van Rossum
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								__version__ = "0.3.1"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import string, regex
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# First a little helper, since I don't like to repeat things. (Tismer speaking)
							 | 
						||
| 
								 | 
							
								import string
							 | 
						||
| 
								 | 
							
								def replace(where, what, with):
							 | 
						||
| 
								 | 
							
									return string.join(string.split(where, what), with)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# This list of keywords is taken from ref/node13.html of the
							 | 
						||
| 
								 | 
							
								# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
							 | 
						||
| 
								 | 
							
								keywordsList = [
							 | 
						||
| 
								 | 
							
									"assert",
							 | 
						||
| 
								 | 
							
									"del", "from", "lambda", "return",
							 | 
						||
| 
								 | 
							
									"and", "elif", "global", "not", "try",
							 | 
						||
| 
								 | 
							
									"break", "else", "if", "or", "while",
							 | 
						||
| 
								 | 
							
									"class", "except", "import", "pass",
							 | 
						||
| 
								 | 
							
									"continue", "finally", "in", "print",
							 | 
						||
| 
								 | 
							
									"def", "for", "is", "raise"]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Build up a regular expression which will match anything
							 | 
						||
| 
								 | 
							
								# interesting, including multi-line triple-quoted strings.
							 | 
						||
| 
								 | 
							
								commentPat = "#.*"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
							 | 
						||
| 
								 | 
							
								quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Way to go, Tim!
							 | 
						||
| 
								 | 
							
								pat = """
							 | 
						||
| 
								 | 
							
									qqq
							 | 
						||
| 
								 | 
							
									[^\\q]*
							 | 
						||
| 
								 | 
							
									\(
							 | 
						||
| 
								 | 
							
										\(	\\\\[\000-\377]
							 | 
						||
| 
								 | 
							
										\|	q
							 | 
						||
| 
								 | 
							
											\(	\\\\[\000-\377]
							 | 
						||
| 
								 | 
							
											\|	[^\\q]
							 | 
						||
| 
								 | 
							
											\|	q
							 | 
						||
| 
								 | 
							
												\(	\\\\[\000-\377]
							 | 
						||
| 
								 | 
							
												\|	[^\\q]
							 | 
						||
| 
								 | 
							
												\)
							 | 
						||
| 
								 | 
							
											\)
							 | 
						||
| 
								 | 
							
										\)
							 | 
						||
| 
								 | 
							
										[^\\q]*
							 | 
						||
| 
								 | 
							
									\)*
							 | 
						||
| 
								 | 
							
									qqq
							 | 
						||
| 
								 | 
							
								"""
							 | 
						||
| 
								 | 
							
								pat = string.join(string.split(pat), '')	# get rid of whitespace
							 | 
						||
| 
								 | 
							
								tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Build up a regular expression which matches all and only
							 | 
						||
| 
								 | 
							
								# Python keywords. This will let us skip the uninteresting
							 | 
						||
| 
								 | 
							
								# identifier references.
							 | 
						||
| 
								 | 
							
								# nonKeyPat identifies characters which may legally precede
							 | 
						||
| 
								 | 
							
								# a keyword pattern.
							 | 
						||
| 
								 | 
							
								nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								keyPat = nonKeyPat + "\("
							 | 
						||
| 
								 | 
							
								for keyword in keywordsList:
							 | 
						||
| 
								 | 
							
									keyPat = keyPat + keyword + "\|"
							 | 
						||
| 
								 | 
							
								keyPat = keyPat[:-2] + "\)" + nonKeyPat
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
							 | 
						||
| 
								 | 
							
								matchRE = regex.compile(matchPat)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"	# Ident w. leading whitespace.
							 | 
						||
| 
								 | 
							
								idRE = regex.compile(idKeyPat)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def fontify(pytext, searchfrom = 0, searchto = None):
							 | 
						||
| 
								 | 
							
									if searchto is None:
							 | 
						||
| 
								 | 
							
										searchto = len(pytext)
							 | 
						||
| 
								 | 
							
									# Cache a few attributes for quicker reference.
							 | 
						||
| 
								 | 
							
									search = matchRE.search
							 | 
						||
| 
								 | 
							
									group = matchRE.group
							 | 
						||
| 
								 | 
							
									idSearch = idRE.search
							 | 
						||
| 
								 | 
							
									idGroup = idRE.group
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									tags = []
							 | 
						||
| 
								 | 
							
									tags_append = tags.append
							 | 
						||
| 
								 | 
							
									commentTag = 'comment'
							 | 
						||
| 
								 | 
							
									stringTag = 'string'
							 | 
						||
| 
								 | 
							
									keywordTag = 'keyword'
							 | 
						||
| 
								 | 
							
									identifierTag = 'identifier'
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									start = 0
							 | 
						||
| 
								 | 
							
									end = searchfrom
							 | 
						||
| 
								 | 
							
									while 1:
							 | 
						||
| 
								 | 
							
										start = search(pytext, end)
							 | 
						||
| 
								 | 
							
										if start < 0 or start >= searchto:
							 | 
						||
| 
								 | 
							
											break	# EXIT LOOP
							 | 
						||
| 
								 | 
							
										match = group(0)
							 | 
						||
| 
								 | 
							
										end = start + len(match)
							 | 
						||
| 
								 | 
							
										c = match[0]
							 | 
						||
| 
								 | 
							
										if c not in "#'\"":
							 | 
						||
| 
								 | 
							
											# Must have matched a keyword.
							 | 
						||
| 
								 | 
							
											if start <> searchfrom:
							 | 
						||
| 
								 | 
							
												# there's still a redundant char before and after it, strip!
							 | 
						||
| 
								 | 
							
												match = match[1:-1]
							 | 
						||
| 
								 | 
							
												start = start + 1
							 | 
						||
| 
								 | 
							
											else:
							 | 
						||
| 
								 | 
							
												# this is the first keyword in the text.
							 | 
						||
| 
								 | 
							
												# Only a space at the end.
							 | 
						||
| 
								 | 
							
												match = match[:-1]
							 | 
						||
| 
								 | 
							
											end = end - 1
							 | 
						||
| 
								 | 
							
											tags_append((keywordTag, start, end, None))
							 | 
						||
| 
								 | 
							
											# If this was a defining keyword, look ahead to the
							 | 
						||
| 
								 | 
							
											# following identifier.
							 | 
						||
| 
								 | 
							
											if match in ["def", "class"]:
							 | 
						||
| 
								 | 
							
												start = idSearch(pytext, end)
							 | 
						||
| 
								 | 
							
												if start == end:
							 | 
						||
| 
								 | 
							
													match = idGroup(0)
							 | 
						||
| 
								 | 
							
													end = start + len(match)
							 | 
						||
| 
								 | 
							
													tags_append((identifierTag, start, end, None))
							 | 
						||
| 
								 | 
							
										elif c == "#":
							 | 
						||
| 
								 | 
							
											tags_append((commentTag, start, end, None))
							 | 
						||
| 
								 | 
							
										else:
							 | 
						||
| 
								 | 
							
											tags_append((stringTag, start, end, None))
							 | 
						||
| 
								 | 
							
									return tags
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def test(path):
							 | 
						||
| 
								 | 
							
									f = open(path)
							 | 
						||
| 
								 | 
							
									text = f.read()
							 | 
						||
| 
								 | 
							
									f.close()
							 | 
						||
| 
								 | 
							
									tags = fontify(text)
							 | 
						||
| 
								 | 
							
									for tag, start, end, sublist in tags:
							 | 
						||
| 
								 | 
							
										print tag, `text[start:end]`
							 |