mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
	
	
		
			337 lines
		
	
	
	
		
			9.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			337 lines
		
	
	
	
		
			9.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | """Parse a Python file and retrieve classes and methods.
 | ||
|  | 
 | ||
|  | Parse enough of a Python file to recognize class and method | ||
|  | definitions and to find out the superclasses of a class. | ||
|  | 
 | ||
|  | The interface consists of a single function: | ||
|  | 	readmodule(module, path) | ||
|  | module is the name of a Python module, path is an optional list of | ||
|  | directories where the module is to be searched.  If present, path is | ||
|  | prepended to the system search path sys.path. | ||
|  | The return value is a dictionary.  The keys of the dictionary are | ||
|  | the names of the classes defined in the module (including classes | ||
|  | that are defined via the from XXX import YYY construct).  The values | ||
|  | are class instances of the class Class defined here. | ||
|  | 
 | ||
|  | A class is described by the class Class in this module.  Instances | ||
|  | of this class have the following instance variables: | ||
|  | 	name -- the name of the class | ||
|  | 	super -- a list of super classes (Class instances) | ||
|  | 	methods -- a dictionary of methods | ||
|  | 	file -- the file in which the class was defined | ||
|  | 	lineno -- the line in the file on which the class statement occurred | ||
|  | The dictionary of methods uses the method names as keys and the line | ||
|  | numbers on which the method was defined as values. | ||
|  | If the name of a super class is not recognized, the corresponding | ||
|  | entry in the list of super classes is not a class instance but a | ||
|  | string giving the name of the super class.  Since import statements | ||
|  | are recognized and imported modules are scanned as well, this | ||
|  | shouldn't happen often. | ||
|  | 
 | ||
|  | BUGS | ||
|  | - Continuation lines are not dealt with at all. | ||
|  | - While triple-quoted strings won't confuse it, lines that look like | ||
|  |   def, class, import or "from ... import" stmts inside backslash-continued | ||
|  |   single-quoted strings are treated like code.  The expense of stopping | ||
|  |   that isn't worth it. | ||
|  | - Code that doesn't pass tabnanny or python -t will confuse it, unless | ||
|  |   you set the module TABWIDTH vrbl (default 8) to the correct tab width | ||
|  |   for the file. | ||
|  | 
 | ||
|  | PACKAGE RELATED BUGS | ||
|  | - If you have a package and a module inside that or another package | ||
|  |   with the same name, module caching doesn't work properly since the | ||
|  |   key is the base name of the module/package. | ||
|  | - The only entry that is returned when you readmodule a package is a | ||
|  |   __path__ whose value is a list which confuses certain class browsers. | ||
|  | - When code does: | ||
|  |   from package import subpackage | ||
|  |   class MyClass(subpackage.SuperClass): | ||
|  |     ... | ||
|  |   It can't locate the parent.  It probably needs to have the same | ||
|  |   hairy logic that the import locator already does.  (This logic | ||
|  |   exists coded in Python in the freeze package.) | ||
|  | """
 | ||
|  | 
 | ||
|  | import os | ||
|  | import sys | ||
|  | import imp | ||
|  | import re | ||
|  | import string | ||
|  | 
 | ||
|  | TABWIDTH = 8 | ||
|  | 
 | ||
|  | _getnext = re.compile(r"""
 | ||
|  |     (?P<String> | ||
|  |        \""" [^"\\]* (?:
 | ||
|  | 			(?: \\. | "(?!"") ) | ||
|  | 			[^"\\]* | ||
|  | 		    )* | ||
|  |        \"""
 | ||
|  | 
 | ||
|  |     |   ''' [^'\\]* (?:
 | ||
|  | 			(?: \\. | '(?!'') ) | ||
|  | 			[^'\\]* | ||
|  | 		    )* | ||
|  | 	'''
 | ||
|  |     ) | ||
|  | 
 | ||
|  | |   (?P<Method> | ||
|  | 	^ | ||
|  | 	(?P<MethodIndent> [ \t]* ) | ||
|  | 	def [ \t]+ | ||
|  | 	(?P<MethodName> [a-zA-Z_] \w* ) | ||
|  | 	[ \t]* \( | ||
|  |     ) | ||
|  | 
 | ||
|  | |   (?P<Class> | ||
|  | 	^ | ||
|  | 	(?P<ClassIndent> [ \t]* ) | ||
|  | 	class [ \t]+ | ||
|  | 	(?P<ClassName> [a-zA-Z_] \w* ) | ||
|  | 	[ \t]* | ||
|  | 	(?P<ClassSupers> \( [^)\n]* \) )? | ||
|  | 	[ \t]* : | ||
|  |     ) | ||
|  | 
 | ||
|  | |   (?P<Import> | ||
|  | 	^ import [ \t]+ | ||
|  | 	(?P<ImportList> [^#;\n]+ ) | ||
|  |     ) | ||
|  | 
 | ||
|  | |   (?P<ImportFrom> | ||
|  | 	^ from [ \t]+ | ||
|  | 	(?P<ImportFromPath> | ||
|  | 	    [a-zA-Z_] \w* | ||
|  | 	    (?: | ||
|  | 		[ \t]* \. [ \t]* [a-zA-Z_] \w* | ||
|  | 	    )* | ||
|  | 	) | ||
|  | 	[ \t]+ | ||
|  | 	import [ \t]+ | ||
|  | 	(?P<ImportFromList> [^#;\n]+ ) | ||
|  |     ) | ||
|  | """, re.VERBOSE | re.DOTALL | re.MULTILINE).search
 | ||
|  | 
 | ||
|  | _modules = {}                           # cache of modules we've seen | ||
|  | 
 | ||
|  | # each Python class is represented by an instance of this class | ||
|  | class Class: | ||
|  | 	'''Class to represent a Python class.''' | ||
|  | 	def __init__(self, module, name, super, file, lineno): | ||
|  | 		self.module = module | ||
|  | 		self.name = name | ||
|  | 		if super is None: | ||
|  | 			super = [] | ||
|  | 		self.super = super | ||
|  | 		self.methods = {} | ||
|  | 		self.file = file | ||
|  | 		self.lineno = lineno | ||
|  | 
 | ||
|  | 	def _addmethod(self, name, lineno): | ||
|  | 		self.methods[name] = lineno | ||
|  | 
 | ||
|  | class Function(Class): | ||
|  | 	'''Class to represent a top-level Python function''' | ||
|  | 	def __init__(self, module, name, file, lineno): | ||
|  | 		Class.__init__(self, module, name, None, file, lineno) | ||
|  | 	def _addmethod(self, name, lineno): | ||
|  | 		assert 0, "Function._addmethod() shouldn't be called" | ||
|  | 
 | ||
|  | def readmodule(module, path=[], inpackage=0): | ||
|  | 	'''Backwards compatible interface.
 | ||
|  | 
 | ||
|  | 	Like readmodule_ex() but strips Function objects from the | ||
|  | 	resulting dictionary.'''
 | ||
|  | 
 | ||
|  | 	dict = readmodule_ex(module, path, inpackage) | ||
|  | 	res = {} | ||
|  | 	for key, value in dict.items(): | ||
|  | 		if not isinstance(value, Function): | ||
|  | 			res[key] = value | ||
|  | 	return res | ||
|  | 
 | ||
|  | def readmodule_ex(module, path=[], inpackage=0): | ||
|  | 	'''Read a module file and return a dictionary of classes.
 | ||
|  | 
 | ||
|  | 	Search for MODULE in PATH and sys.path, read and parse the | ||
|  | 	module and return a dictionary with one entry for each class | ||
|  | 	found in the module.'''
 | ||
|  | 
 | ||
|  | 	dict = {} | ||
|  | 
 | ||
|  | 	i = string.rfind(module, '.') | ||
|  | 	if i >= 0: | ||
|  | 		# Dotted module name | ||
|  | 		package = string.strip(module[:i]) | ||
|  | 		submodule = string.strip(module[i+1:]) | ||
|  | 		parent = readmodule(package, path, inpackage) | ||
|  | 		child = readmodule(submodule, parent['__path__'], 1) | ||
|  | 		return child | ||
|  | 
 | ||
|  | 	if _modules.has_key(module): | ||
|  | 		# we've seen this module before... | ||
|  | 		return _modules[module] | ||
|  | 	if module in sys.builtin_module_names: | ||
|  | 		# this is a built-in module | ||
|  | 		_modules[module] = dict | ||
|  | 		return dict | ||
|  | 
 | ||
|  | 	# search the path for the module | ||
|  | 	f = None | ||
|  | 	if inpackage: | ||
|  | 		try: | ||
|  | 			f, file, (suff, mode, type) = \ | ||
|  | 				imp.find_module(module, path) | ||
|  | 		except ImportError: | ||
|  | 			f = None | ||
|  | 	if f is None: | ||
|  | 		fullpath = list(path) + sys.path | ||
|  | 		f, file, (suff, mode, type) = imp.find_module(module, fullpath) | ||
|  | 	if type == imp.PKG_DIRECTORY: | ||
|  | 		dict['__path__'] = [file] | ||
|  | 		_modules[module] = dict | ||
|  | 		path = [file] + path | ||
|  | 		f, file, (suff, mode, type) = \ | ||
|  | 				imp.find_module('__init__', [file]) | ||
|  | 	if type != imp.PY_SOURCE: | ||
|  | 		# not Python source, can't do anything with this module | ||
|  | 		f.close() | ||
|  | 		_modules[module] = dict | ||
|  | 		return dict | ||
|  | 
 | ||
|  | 	_modules[module] = dict | ||
|  | 	imports = [] | ||
|  | 	classstack = []	# stack of (class, indent) pairs | ||
|  | 	src = f.read() | ||
|  | 	f.close() | ||
|  | 
 | ||
|  | 	# To avoid having to stop the regexp at each newline, instead | ||
|  | 	# when we need a line number we simply string.count the number of | ||
|  | 	# newlines in the string since the last time we did this; i.e., | ||
|  | 	#    lineno = lineno + \ | ||
|  | 	#             string.count(src, '\n', last_lineno_pos, here) | ||
|  | 	#    last_lineno_pos = here | ||
|  | 	countnl = string.count | ||
|  | 	lineno, last_lineno_pos = 1, 0 | ||
|  | 	i = 0 | ||
|  | 	while 1: | ||
|  | 		m = _getnext(src, i) | ||
|  | 		if not m: | ||
|  | 			break | ||
|  | 		start, i = m.span() | ||
|  | 
 | ||
|  | 		if m.start("Method") >= 0: | ||
|  | 			# found a method definition or function | ||
|  | 			thisindent = _indent(m.group("MethodIndent")) | ||
|  | 			meth_name = m.group("MethodName") | ||
|  | 			lineno = lineno + \ | ||
|  | 				 countnl(src, '\n', | ||
|  | 					 last_lineno_pos, start) | ||
|  | 			last_lineno_pos = start | ||
|  | 			# close all classes indented at least as much | ||
|  | 			while classstack and \ | ||
|  | 			      classstack[-1][1] >= thisindent: | ||
|  | 				del classstack[-1] | ||
|  | 			if classstack: | ||
|  | 				# it's a class method | ||
|  | 				cur_class = classstack[-1][0] | ||
|  | 				cur_class._addmethod(meth_name, lineno) | ||
|  | 			else: | ||
|  | 				# it's a function | ||
|  | 				f = Function(module, meth_name, | ||
|  | 					     file, lineno) | ||
|  | 				dict[meth_name] = f | ||
|  | 
 | ||
|  | 		elif m.start("String") >= 0: | ||
|  | 			pass | ||
|  | 
 | ||
|  | 		elif m.start("Class") >= 0: | ||
|  | 			# we found a class definition | ||
|  | 			thisindent = _indent(m.group("ClassIndent")) | ||
|  | 			# close all classes indented at least as much | ||
|  | 			while classstack and \ | ||
|  | 			      classstack[-1][1] >= thisindent: | ||
|  | 				del classstack[-1] | ||
|  | 			lineno = lineno + \ | ||
|  | 				 countnl(src, '\n', last_lineno_pos, start) | ||
|  | 			last_lineno_pos = start | ||
|  | 			class_name = m.group("ClassName") | ||
|  | 			inherit = m.group("ClassSupers") | ||
|  | 			if inherit: | ||
|  | 				# the class inherits from other classes | ||
|  | 				inherit = string.strip(inherit[1:-1]) | ||
|  | 				names = [] | ||
|  | 				for n in string.splitfields(inherit, ','): | ||
|  | 					n = string.strip(n) | ||
|  | 					if dict.has_key(n): | ||
|  | 						# we know this super class | ||
|  | 						n = dict[n] | ||
|  | 					else: | ||
|  | 						c = string.splitfields(n, '.') | ||
|  | 						if len(c) > 1: | ||
|  | 							# super class | ||
|  | 							# is of the | ||
|  | 							# form module.class: | ||
|  | 							# look in | ||
|  | 							# module for class | ||
|  | 							m = c[-2] | ||
|  | 							c = c[-1] | ||
|  | 							if _modules.has_key(m): | ||
|  | 								d = _modules[m] | ||
|  | 								if d.has_key(c): | ||
|  | 									n = d[c] | ||
|  | 					names.append(n) | ||
|  | 				inherit = names | ||
|  | 			# remember this class | ||
|  | 			cur_class = Class(module, class_name, inherit, | ||
|  | 					  file, lineno) | ||
|  | 			dict[class_name] = cur_class | ||
|  | 			classstack.append((cur_class, thisindent)) | ||
|  | 
 | ||
|  | 		elif m.start("Import") >= 0: | ||
|  | 			# import module | ||
|  | 			for n in string.split(m.group("ImportList"), ','): | ||
|  | 				n = string.strip(n) | ||
|  | 				try: | ||
|  | 					# recursively read the imported module | ||
|  | 					d = readmodule(n, path, inpackage) | ||
|  | 				except: | ||
|  | 					##print 'module', n, 'not found' | ||
|  | 					pass | ||
|  | 
 | ||
|  | 		elif m.start("ImportFrom") >= 0: | ||
|  | 			# from module import stuff | ||
|  | 			mod = m.group("ImportFromPath") | ||
|  | 			names = string.split(m.group("ImportFromList"), ',') | ||
|  | 			try: | ||
|  | 				# recursively read the imported module | ||
|  | 				d = readmodule(mod, path, inpackage) | ||
|  | 			except: | ||
|  | 				##print 'module', mod, 'not found' | ||
|  | 				continue | ||
|  | 			# add any classes that were defined in the | ||
|  | 			# imported module to our name space if they | ||
|  | 			# were mentioned in the list | ||
|  | 			for n in names: | ||
|  | 				n = string.strip(n) | ||
|  | 				if d.has_key(n): | ||
|  | 					dict[n] = d[n] | ||
|  | 				elif n == '*': | ||
|  | 					# only add a name if not | ||
|  | 					# already there (to mimic what | ||
|  | 					# Python does internally) | ||
|  | 					# also don't add names that | ||
|  | 					# start with _ | ||
|  | 					for n in d.keys(): | ||
|  | 						if n[0] != '_' and \ | ||
|  | 						   not dict.has_key(n): | ||
|  | 							dict[n] = d[n] | ||
|  | 		else: | ||
|  | 			assert 0, "regexp _getnext found something unexpected" | ||
|  | 
 | ||
|  | 	return dict | ||
|  | 
 | ||
|  | def _indent(ws, _expandtabs=string.expandtabs): | ||
|  | 	return len(_expandtabs(ws, TABWIDTH)) |