mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	
		
			
	
	
		
			284 lines
		
	
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			284 lines
		
	
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | "Utility functions used by the btm_matcher module" | ||
|  | 
 | ||
|  | from . import pytree | ||
|  | from .pgen2 import grammar, token | ||
|  | from .pygram import pattern_symbols, python_symbols | ||
|  | 
 | ||
|  | syms = pattern_symbols | ||
|  | pysyms = python_symbols | ||
|  | tokens = grammar.opmap | ||
|  | token_labels = token | ||
|  | 
 | ||
|  | TYPE_ANY = -1 | ||
|  | TYPE_ALTERNATIVES = -2 | ||
|  | TYPE_GROUP = -3 | ||
|  | 
 | ||
|  | class MinNode(object): | ||
|  |     """This class serves as an intermediate representation of the
 | ||
|  |     pattern tree during the conversion to sets of leaf-to-root | ||
|  |     subpatterns"""
 | ||
|  | 
 | ||
|  |     def __init__(self, type=None, name=None): | ||
|  |         self.type = type | ||
|  |         self.name = name | ||
|  |         self.children = [] | ||
|  |         self.leaf = False | ||
|  |         self.parent = None | ||
|  |         self.alternatives = [] | ||
|  |         self.group = [] | ||
|  | 
 | ||
|  |     def __repr__(self): | ||
|  |         return str(self.type) + ' ' + str(self.name) | ||
|  | 
 | ||
|  |     def leaf_to_root(self): | ||
|  |         """Internal method. Returns a characteristic path of the
 | ||
|  |         pattern tree. This method must be run for all leaves until the | ||
|  |         linear subpatterns are merged into a single"""
 | ||
|  |         node = self | ||
|  |         subp = [] | ||
|  |         while node: | ||
|  |             if node.type == TYPE_ALTERNATIVES: | ||
|  |                 node.alternatives.append(subp) | ||
|  |                 if len(node.alternatives) == len(node.children): | ||
|  |                     #last alternative | ||
|  |                     subp = [tuple(node.alternatives)] | ||
|  |                     node.alternatives = [] | ||
|  |                     node = node.parent | ||
|  |                     continue | ||
|  |                 else: | ||
|  |                     node = node.parent | ||
|  |                     subp = None | ||
|  |                     break | ||
|  | 
 | ||
|  |             if node.type == TYPE_GROUP: | ||
|  |                 node.group.append(subp) | ||
|  |                 #probably should check the number of leaves | ||
|  |                 if len(node.group) == len(node.children): | ||
|  |                     subp = get_characteristic_subpattern(node.group) | ||
|  |                     node.group = [] | ||
|  |                     node = node.parent | ||
|  |                     continue | ||
|  |                 else: | ||
|  |                     node = node.parent | ||
|  |                     subp = None | ||
|  |                     break | ||
|  | 
 | ||
|  |             if node.type == token_labels.NAME and node.name: | ||
|  |                 #in case of type=name, use the name instead | ||
|  |                 subp.append(node.name) | ||
|  |             else: | ||
|  |                 subp.append(node.type) | ||
|  | 
 | ||
|  |             node = node.parent | ||
|  |         return subp | ||
|  | 
 | ||
|  |     def get_linear_subpattern(self): | ||
|  |         """Drives the leaf_to_root method. The reason that
 | ||
|  |         leaf_to_root must be run multiple times is because we need to | ||
|  |         reject 'group' matches; for example the alternative form | ||
|  |         (a | b c) creates a group [b c] that needs to be matched. Since | ||
|  |         matching multiple linear patterns overcomes the automaton's | ||
|  |         capabilities, leaf_to_root merges each group into a single | ||
|  |         choice based on 'characteristic'ity, | ||
|  | 
 | ||
|  |         i.e. (a|b c) -> (a|b) if b more characteristic than c | ||
|  | 
 | ||
|  |         Returns: The most 'characteristic'(as defined by | ||
|  |           get_characteristic_subpattern) path for the compiled pattern | ||
|  |           tree. | ||
|  |         """
 | ||
|  | 
 | ||
|  |         for l in self.leaves(): | ||
|  |             subp = l.leaf_to_root() | ||
|  |             if subp: | ||
|  |                 return subp | ||
|  | 
 | ||
|  |     def leaves(self): | ||
|  |         "Generator that returns the leaves of the tree" | ||
|  |         for child in self.children: | ||
|  |             for x in child.leaves(): | ||
|  |                 yield x | ||
|  |         if not self.children: | ||
|  |             yield self | ||
|  | 
 | ||
|  | def reduce_tree(node, parent=None): | ||
|  |     """
 | ||
|  |     Internal function. Reduces a compiled pattern tree to an | ||
|  |     intermediate representation suitable for feeding the | ||
|  |     automaton. This also trims off any optional pattern elements(like | ||
|  |     [a], a*). | ||
|  |     """
 | ||
|  | 
 | ||
|  |     new_node = None | ||
|  |     #switch on the node type | ||
|  |     if node.type == syms.Matcher: | ||
|  |         #skip | ||
|  |         node = node.children[0] | ||
|  | 
 | ||
|  |     if node.type == syms.Alternatives  : | ||
|  |         #2 cases | ||
|  |         if len(node.children) <= 2: | ||
|  |             #just a single 'Alternative', skip this node | ||
|  |             new_node = reduce_tree(node.children[0], parent) | ||
|  |         else: | ||
|  |             #real alternatives | ||
|  |             new_node = MinNode(type=TYPE_ALTERNATIVES) | ||
|  |             #skip odd children('|' tokens) | ||
|  |             for child in node.children: | ||
|  |                 if node.children.index(child)%2: | ||
|  |                     continue | ||
|  |                 reduced = reduce_tree(child, new_node) | ||
|  |                 if reduced is not None: | ||
|  |                     new_node.children.append(reduced) | ||
|  |     elif node.type == syms.Alternative: | ||
|  |         if len(node.children) > 1: | ||
|  | 
 | ||
|  |             new_node = MinNode(type=TYPE_GROUP) | ||
|  |             for child in node.children: | ||
|  |                 reduced = reduce_tree(child, new_node) | ||
|  |                 if reduced: | ||
|  |                     new_node.children.append(reduced) | ||
|  |             if not new_node.children: | ||
|  |                 # delete the group if all of the children were reduced to None | ||
|  |                 new_node = None | ||
|  | 
 | ||
|  |         else: | ||
|  |             new_node = reduce_tree(node.children[0], parent) | ||
|  | 
 | ||
|  |     elif node.type == syms.Unit: | ||
|  |         if (isinstance(node.children[0], pytree.Leaf) and | ||
|  |             node.children[0].value == '('): | ||
|  |             #skip parentheses | ||
|  |             return reduce_tree(node.children[1], parent) | ||
|  |         if ((isinstance(node.children[0], pytree.Leaf) and | ||
|  |                node.children[0].value == '[') | ||
|  |                or | ||
|  |                (len(node.children)>1 and | ||
|  |                hasattr(node.children[1], "value") and | ||
|  |                node.children[1].value == '[')): | ||
|  |             #skip whole unit if its optional | ||
|  |             return None | ||
|  | 
 | ||
|  |         leaf = True | ||
|  |         details_node = None | ||
|  |         alternatives_node = None | ||
|  |         has_repeater = False | ||
|  |         repeater_node = None | ||
|  |         has_variable_name = False | ||
|  | 
 | ||
|  |         for child in node.children: | ||
|  |             if child.type == syms.Details: | ||
|  |                 leaf = False | ||
|  |                 details_node = child | ||
|  |             elif child.type == syms.Repeater: | ||
|  |                 has_repeater = True | ||
|  |                 repeater_node = child | ||
|  |             elif child.type == syms.Alternatives: | ||
|  |                 alternatives_node = child | ||
|  |             if hasattr(child, 'value') and child.value == '=': # variable name | ||
|  |                 has_variable_name = True | ||
|  | 
 | ||
|  |         #skip variable name | ||
|  |         if has_variable_name: | ||
|  |             #skip variable name, '=' | ||
|  |             name_leaf = node.children[2] | ||
|  |             if hasattr(name_leaf, 'value') and name_leaf.value == '(': | ||
|  |                 # skip parenthesis | ||
|  |                 name_leaf = node.children[3] | ||
|  |         else: | ||
|  |             name_leaf = node.children[0] | ||
|  | 
 | ||
|  |         #set node type | ||
|  |         if name_leaf.type == token_labels.NAME: | ||
|  |             #(python) non-name or wildcard | ||
|  |             if name_leaf.value == 'any': | ||
|  |                 new_node = MinNode(type=TYPE_ANY) | ||
|  |             else: | ||
|  |                 if hasattr(token_labels, name_leaf.value): | ||
|  |                     new_node = MinNode(type=getattr(token_labels, name_leaf.value)) | ||
|  |                 else: | ||
|  |                     new_node = MinNode(type=getattr(pysyms, name_leaf.value)) | ||
|  | 
 | ||
|  |         elif name_leaf.type == token_labels.STRING: | ||
|  |             #(python) name or character; remove the apostrophes from | ||
|  |             #the string value | ||
|  |             name = name_leaf.value.strip("'") | ||
|  |             if name in tokens: | ||
|  |                 new_node = MinNode(type=tokens[name]) | ||
|  |             else: | ||
|  |                 new_node = MinNode(type=token_labels.NAME, name=name) | ||
|  |         elif name_leaf.type == syms.Alternatives: | ||
|  |             new_node = reduce_tree(alternatives_node, parent) | ||
|  | 
 | ||
|  |         #handle repeaters | ||
|  |         if has_repeater: | ||
|  |             if repeater_node.children[0].value == '*': | ||
|  |                 #reduce to None | ||
|  |                 new_node = None | ||
|  |             elif repeater_node.children[0].value == '+': | ||
|  |                 #reduce to a single occurence i.e. do nothing | ||
|  |                 pass | ||
|  |             else: | ||
|  |                 #TODO: handle {min, max} repeaters | ||
|  |                 raise NotImplementedError | ||
|  |                 pass | ||
|  | 
 | ||
|  |         #add children | ||
|  |         if details_node and new_node is not None: | ||
|  |             for child in details_node.children[1:-1]: | ||
|  |                 #skip '<', '>' markers | ||
|  |                 reduced = reduce_tree(child, new_node) | ||
|  |                 if reduced is not None: | ||
|  |                     new_node.children.append(reduced) | ||
|  |     if new_node: | ||
|  |         new_node.parent = parent | ||
|  |     return new_node | ||
|  | 
 | ||
|  | 
 | ||
|  | def get_characteristic_subpattern(subpatterns): | ||
|  |     """Picks the most characteristic from a list of linear patterns
 | ||
|  |     Current order used is: | ||
|  |     names > common_names > common_chars | ||
|  |     """
 | ||
|  |     if not isinstance(subpatterns, list): | ||
|  |         return subpatterns | ||
|  |     if len(subpatterns)==1: | ||
|  |         return subpatterns[0] | ||
|  | 
 | ||
|  |     # first pick out the ones containing variable names | ||
|  |     subpatterns_with_names = [] | ||
|  |     subpatterns_with_common_names = [] | ||
|  |     common_names = ['in', 'for', 'if' , 'not', 'None'] | ||
|  |     subpatterns_with_common_chars = [] | ||
|  |     common_chars = "[]().,:" | ||
|  |     for subpattern in subpatterns: | ||
|  |         if any(rec_test(subpattern, lambda x: type(x) is str)): | ||
|  |             if any(rec_test(subpattern, | ||
|  |                             lambda x: isinstance(x, str) and x in common_chars)): | ||
|  |                 subpatterns_with_common_chars.append(subpattern) | ||
|  |             elif any(rec_test(subpattern, | ||
|  |                               lambda x: isinstance(x, str) and x in common_names)): | ||
|  |                 subpatterns_with_common_names.append(subpattern) | ||
|  | 
 | ||
|  |             else: | ||
|  |                 subpatterns_with_names.append(subpattern) | ||
|  | 
 | ||
|  |     if subpatterns_with_names: | ||
|  |         subpatterns = subpatterns_with_names | ||
|  |     elif subpatterns_with_common_names: | ||
|  |         subpatterns = subpatterns_with_common_names | ||
|  |     elif subpatterns_with_common_chars: | ||
|  |         subpatterns = subpatterns_with_common_chars | ||
|  |     # of the remaining subpatterns pick out the longest one | ||
|  |     return max(subpatterns, key=len) | ||
|  | 
 | ||
|  | def rec_test(sequence, test_func): | ||
|  |     """Tests test_func on all items of sequence and items of included
 | ||
|  |     sub-iterables"""
 | ||
|  |     for x in sequence: | ||
|  |         if isinstance(x, (list, tuple)): | ||
|  |             for y in rec_test(x, test_func): | ||
|  |                 yield y | ||
|  |         else: | ||
|  |             yield test_func(x) |