mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 03:04:41 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			280 lines
		
	
	
	
		
			9.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			280 lines
		
	
	
	
		
			9.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| "Utility functions used by the btm_matcher module"
 | |
| 
 | |
| from . import pytree
 | |
| from .pgen2 import grammar, token
 | |
| from .pygram import pattern_symbols, python_symbols
 | |
| 
 | |
| syms = pattern_symbols
 | |
| pysyms = python_symbols
 | |
| tokens = grammar.opmap
 | |
| token_labels = token
 | |
| 
 | |
| TYPE_ANY = -1
 | |
| TYPE_ALTERNATIVES = -2
 | |
| TYPE_GROUP = -3
 | |
| 
 | |
| class MinNode(object):
 | |
|     """This class serves as an intermediate representation of the
 | |
|     pattern tree during the conversion to sets of leaf-to-root
 | |
|     subpatterns"""
 | |
| 
 | |
|     def __init__(self, type=None, name=None):
 | |
|         self.type = type
 | |
|         self.name = name
 | |
|         self.children = []
 | |
|         self.leaf = False
 | |
|         self.parent = None
 | |
|         self.alternatives = []
 | |
|         self.group = []
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return str(self.type) + ' ' + str(self.name)
 | |
| 
 | |
|     def leaf_to_root(self):
 | |
|         """Internal method. Returns a characteristic path of the
 | |
|         pattern tree. This method must be run for all leaves until the
 | |
|         linear subpatterns are merged into a single"""
 | |
|         node = self
 | |
|         subp = []
 | |
|         while node:
 | |
|             if node.type == TYPE_ALTERNATIVES:
 | |
|                 node.alternatives.append(subp)
 | |
|                 if len(node.alternatives) == len(node.children):
 | |
|                     #last alternative
 | |
|                     subp = [tuple(node.alternatives)]
 | |
|                     node.alternatives = []
 | |
|                     node = node.parent
 | |
|                     continue
 | |
|                 else:
 | |
|                     node = node.parent
 | |
|                     subp = None
 | |
|                     break
 | |
| 
 | |
|             if node.type == TYPE_GROUP:
 | |
|                 node.group.append(subp)
 | |
|                 #probably should check the number of leaves
 | |
|                 if len(node.group) == len(node.children):
 | |
|                     subp = get_characteristic_subpattern(node.group)
 | |
|                     node.group = []
 | |
|                     node = node.parent
 | |
|                     continue
 | |
|                 else:
 | |
|                     node = node.parent
 | |
|                     subp = None
 | |
|                     break
 | |
| 
 | |
|             if node.type == token_labels.NAME and node.name:
 | |
|                 #in case of type=name, use the name instead
 | |
|                 subp.append(node.name)
 | |
|             else:
 | |
|                 subp.append(node.type)
 | |
| 
 | |
|             node = node.parent
 | |
|         return subp
 | |
| 
 | |
|     def get_linear_subpattern(self):
 | |
|         """Drives the leaf_to_root method. The reason that
 | |
|         leaf_to_root must be run multiple times is because we need to
 | |
|         reject 'group' matches; for example the alternative form
 | |
|         (a | b c) creates a group [b c] that needs to be matched. Since
 | |
|         matching multiple linear patterns overcomes the automaton's
 | |
|         capabilities, leaf_to_root merges each group into a single
 | |
|         choice based on 'characteristic'ity,
 | |
| 
 | |
|         i.e. (a|b c) -> (a|b) if b more characteristic than c
 | |
| 
 | |
|         Returns: The most 'characteristic'(as defined by
 | |
|           get_characteristic_subpattern) path for the compiled pattern
 | |
|           tree.
 | |
|         """
 | |
| 
 | |
|         for l in self.leaves():
 | |
|             subp = l.leaf_to_root()
 | |
|             if subp:
 | |
|                 return subp
 | |
| 
 | |
|     def leaves(self):
 | |
|         "Generator that returns the leaves of the tree"
 | |
|         for child in self.children:
 | |
|             yield from child.leaves()
 | |
|         if not self.children:
 | |
|             yield self
 | |
| 
 | |
| def reduce_tree(node, parent=None):
 | |
|     """
 | |
|     Internal function. Reduces a compiled pattern tree to an
 | |
|     intermediate representation suitable for feeding the
 | |
|     automaton. This also trims off any optional pattern elements(like
 | |
|     [a], a*).
 | |
|     """
 | |
| 
 | |
|     new_node = None
 | |
|     #switch on the node type
 | |
|     if node.type == syms.Matcher:
 | |
|         #skip
 | |
|         node = node.children[0]
 | |
| 
 | |
|     if node.type == syms.Alternatives  :
 | |
|         #2 cases
 | |
|         if len(node.children) <= 2:
 | |
|             #just a single 'Alternative', skip this node
 | |
|             new_node = reduce_tree(node.children[0], parent)
 | |
|         else:
 | |
|             #real alternatives
 | |
|             new_node = MinNode(type=TYPE_ALTERNATIVES)
 | |
|             #skip odd children('|' tokens)
 | |
|             for child in node.children:
 | |
|                 if node.children.index(child)%2:
 | |
|                     continue
 | |
|                 reduced = reduce_tree(child, new_node)
 | |
|                 if reduced is not None:
 | |
|                     new_node.children.append(reduced)
 | |
|     elif node.type == syms.Alternative:
 | |
|         if len(node.children) > 1:
 | |
| 
 | |
|             new_node = MinNode(type=TYPE_GROUP)
 | |
|             for child in node.children:
 | |
|                 reduced = reduce_tree(child, new_node)
 | |
|                 if reduced:
 | |
|                     new_node.children.append(reduced)
 | |
|             if not new_node.children:
 | |
|                 # delete the group if all of the children were reduced to None
 | |
|                 new_node = None
 | |
| 
 | |
|         else:
 | |
|             new_node = reduce_tree(node.children[0], parent)
 | |
| 
 | |
|     elif node.type == syms.Unit:
 | |
|         if (isinstance(node.children[0], pytree.Leaf) and
 | |
|             node.children[0].value == '('):
 | |
|             #skip parentheses
 | |
|             return reduce_tree(node.children[1], parent)
 | |
|         if ((isinstance(node.children[0], pytree.Leaf) and
 | |
|                node.children[0].value == '[')
 | |
|                or
 | |
|                (len(node.children)>1 and
 | |
|                hasattr(node.children[1], "value") and
 | |
|                node.children[1].value == '[')):
 | |
|             #skip whole unit if its optional
 | |
|             return None
 | |
| 
 | |
|         leaf = True
 | |
|         details_node = None
 | |
|         alternatives_node = None
 | |
|         has_repeater = False
 | |
|         repeater_node = None
 | |
|         has_variable_name = False
 | |
| 
 | |
|         for child in node.children:
 | |
|             if child.type == syms.Details:
 | |
|                 leaf = False
 | |
|                 details_node = child
 | |
|             elif child.type == syms.Repeater:
 | |
|                 has_repeater = True
 | |
|                 repeater_node = child
 | |
|             elif child.type == syms.Alternatives:
 | |
|                 alternatives_node = child
 | |
|             if hasattr(child, 'value') and child.value == '=': # variable name
 | |
|                 has_variable_name = True
 | |
| 
 | |
|         #skip variable name
 | |
|         if has_variable_name:
 | |
|             #skip variable name, '='
 | |
|             name_leaf = node.children[2]
 | |
|             if hasattr(name_leaf, 'value') and name_leaf.value == '(':
 | |
|                 # skip parenthesis
 | |
|                 name_leaf = node.children[3]
 | |
|         else:
 | |
|             name_leaf = node.children[0]
 | |
| 
 | |
|         #set node type
 | |
|         if name_leaf.type == token_labels.NAME:
 | |
|             #(python) non-name or wildcard
 | |
|             if name_leaf.value == 'any':
 | |
|                 new_node = MinNode(type=TYPE_ANY)
 | |
|             else:
 | |
|                 if hasattr(token_labels, name_leaf.value):
 | |
|                     new_node = MinNode(type=getattr(token_labels, name_leaf.value))
 | |
|                 else:
 | |
|                     new_node = MinNode(type=getattr(pysyms, name_leaf.value))
 | |
| 
 | |
|         elif name_leaf.type == token_labels.STRING:
 | |
|             #(python) name or character; remove the apostrophes from
 | |
|             #the string value
 | |
|             name = name_leaf.value.strip("'")
 | |
|             if name in tokens:
 | |
|                 new_node = MinNode(type=tokens[name])
 | |
|             else:
 | |
|                 new_node = MinNode(type=token_labels.NAME, name=name)
 | |
|         elif name_leaf.type == syms.Alternatives:
 | |
|             new_node = reduce_tree(alternatives_node, parent)
 | |
| 
 | |
|         #handle repeaters
 | |
|         if has_repeater:
 | |
|             if repeater_node.children[0].value == '*':
 | |
|                 #reduce to None
 | |
|                 new_node = None
 | |
|             elif repeater_node.children[0].value == '+':
 | |
|                 #reduce to a single occurrence i.e. do nothing
 | |
|                 pass
 | |
|             else:
 | |
|                 #TODO: handle {min, max} repeaters
 | |
|                 raise NotImplementedError
 | |
| 
 | |
|         #add children
 | |
|         if details_node and new_node is not None:
 | |
|             for child in details_node.children[1:-1]:
 | |
|                 #skip '<', '>' markers
 | |
|                 reduced = reduce_tree(child, new_node)
 | |
|                 if reduced is not None:
 | |
|                     new_node.children.append(reduced)
 | |
|     if new_node:
 | |
|         new_node.parent = parent
 | |
|     return new_node
 | |
| 
 | |
| 
 | |
| def get_characteristic_subpattern(subpatterns):
 | |
|     """Picks the most characteristic from a list of linear patterns
 | |
|     Current order used is:
 | |
|     names > common_names > common_chars
 | |
|     """
 | |
|     if not isinstance(subpatterns, list):
 | |
|         return subpatterns
 | |
|     if len(subpatterns)==1:
 | |
|         return subpatterns[0]
 | |
| 
 | |
|     # first pick out the ones containing variable names
 | |
|     subpatterns_with_names = []
 | |
|     subpatterns_with_common_names = []
 | |
|     common_names = ['in', 'for', 'if' , 'not', 'None']
 | |
|     subpatterns_with_common_chars = []
 | |
|     common_chars = "[]().,:"
 | |
|     for subpattern in subpatterns:
 | |
|         if any(rec_test(subpattern, lambda x: type(x) is str)):
 | |
|             if any(rec_test(subpattern,
 | |
|                             lambda x: isinstance(x, str) and x in common_chars)):
 | |
|                 subpatterns_with_common_chars.append(subpattern)
 | |
|             elif any(rec_test(subpattern,
 | |
|                               lambda x: isinstance(x, str) and x in common_names)):
 | |
|                 subpatterns_with_common_names.append(subpattern)
 | |
| 
 | |
|             else:
 | |
|                 subpatterns_with_names.append(subpattern)
 | |
| 
 | |
|     if subpatterns_with_names:
 | |
|         subpatterns = subpatterns_with_names
 | |
|     elif subpatterns_with_common_names:
 | |
|         subpatterns = subpatterns_with_common_names
 | |
|     elif subpatterns_with_common_chars:
 | |
|         subpatterns = subpatterns_with_common_chars
 | |
|     # of the remaining subpatterns pick out the longest one
 | |
|     return max(subpatterns, key=len)
 | |
| 
 | |
| def rec_test(sequence, test_func):
 | |
|     """Tests test_func on all items of sequence and items of included
 | |
|     sub-iterables"""
 | |
|     for x in sequence:
 | |
|         if isinstance(x, (list, tuple)):
 | |
|             yield from rec_test(x, test_func)
 | |
|         else:
 | |
|             yield test_func(x)
 | 
