mirror of
				https://github.com/python/cpython.git
				synced 2025-10-24 18:33:49 +00:00 
			
		
		
		
	 b0871cac11
			
		
	
	
		b0871cac11
		
	
	
	
	
		
			
			svn+ssh://pythondev@svn.python.org/python/branches/py3k
................
  r85510 | benjamin.peterson | 2010-10-14 18:00:04 -0500 (Thu, 14 Oct 2010) | 61 lines
  Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
  ........
    r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line
    wrap with parens
  ........
    r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line
    use parens
  ........
    r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line
    things which use touch_import should be pre order
  ........
    r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines
    This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers.
  ........
    r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line
    allow star_expr in testlist_gexp
  ........
    r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line
    wrap long line
  ........
    r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line
    cleanup
  ........
    r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line
    when there's a None value and a traceback, don't call type with it #9661
  ........
    r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines
    Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.
  ........
    r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line
    fix urllib fixer with multiple as imports on a line #10069
  ........
    r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line
    stop abusing docstrings
  ........
    r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line
    kill sibling import
  ........
    r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line
    remove trailing whitespace
  ........
    r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line
    typo
  ........
................
		
	
			
		
			
				
	
	
		
			283 lines
		
	
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| "Utility functions used by the btm_matcher module"
 | |
| 
 | |
| from . import pytree
 | |
| from .pgen2 import grammar, token
 | |
| from .pygram import pattern_symbols, python_symbols
 | |
| 
 | |
| syms = pattern_symbols
 | |
| pysyms = python_symbols
 | |
| tokens = grammar.opmap
 | |
| token_labels = token
 | |
| 
 | |
| TYPE_ANY = -1
 | |
| TYPE_ALTERNATIVES = -2
 | |
| TYPE_GROUP = -3
 | |
| 
 | |
| class MinNode(object):
 | |
|     """This class serves as an intermediate representation of the
 | |
|     pattern tree during the conversion to sets of leaf-to-root
 | |
|     subpatterns"""
 | |
| 
 | |
|     def __init__(self, type=None, name=None):
 | |
|         self.type = type
 | |
|         self.name = name
 | |
|         self.children = []
 | |
|         self.leaf = False
 | |
|         self.parent = None
 | |
|         self.alternatives = []
 | |
|         self.group = []
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return str(self.type) + ' ' + str(self.name)
 | |
| 
 | |
|     def leaf_to_root(self):
 | |
|         """Internal method. Returns a characteristic path of the
 | |
|         pattern tree. This method must be run for all leaves until the
 | |
|         linear subpatterns are merged into a single"""
 | |
|         node = self
 | |
|         subp = []
 | |
|         while node:
 | |
|             if node.type == TYPE_ALTERNATIVES:
 | |
|                 node.alternatives.append(subp)
 | |
|                 if len(node.alternatives) == len(node.children):
 | |
|                     #last alternative
 | |
|                     subp = [tuple(node.alternatives)]
 | |
|                     node.alternatives = []
 | |
|                     node = node.parent
 | |
|                     continue
 | |
|                 else:
 | |
|                     node = node.parent
 | |
|                     subp = None
 | |
|                     break
 | |
| 
 | |
|             if node.type == TYPE_GROUP:
 | |
|                 node.group.append(subp)
 | |
|                 #probably should check the number of leaves
 | |
|                 if len(node.group) == len(node.children):
 | |
|                     subp = get_characteristic_subpattern(node.group)
 | |
|                     node.group = []
 | |
|                     node = node.parent
 | |
|                     continue
 | |
|                 else:
 | |
|                     node = node.parent
 | |
|                     subp = None
 | |
|                     break
 | |
| 
 | |
|             if node.type == token_labels.NAME and node.name:
 | |
|                 #in case of type=name, use the name instead
 | |
|                 subp.append(node.name)
 | |
|             else:
 | |
|                 subp.append(node.type)
 | |
| 
 | |
|             node = node.parent
 | |
|         return subp
 | |
| 
 | |
|     def get_linear_subpattern(self):
 | |
|         """Drives the leaf_to_root method. The reason that
 | |
|         leaf_to_root must be run multiple times is because we need to
 | |
|         reject 'group' matches; for example the alternative form
 | |
|         (a | b c) creates a group [b c] that needs to be matched. Since
 | |
|         matching multiple linear patterns overcomes the automaton's
 | |
|         capabilities, leaf_to_root merges each group into a single
 | |
|         choice based on 'characteristic'ity,
 | |
| 
 | |
|         i.e. (a|b c) -> (a|b) if b more characteristic than c
 | |
| 
 | |
|         Returns: The most 'characteristic'(as defined by
 | |
|           get_characteristic_subpattern) path for the compiled pattern
 | |
|           tree.
 | |
|         """
 | |
| 
 | |
|         for l in self.leaves():
 | |
|             subp = l.leaf_to_root()
 | |
|             if subp:
 | |
|                 return subp
 | |
| 
 | |
|     def leaves(self):
 | |
|         "Generator that returns the leaves of the tree"
 | |
|         for child in self.children:
 | |
|             for x in child.leaves():
 | |
|                 yield x
 | |
|         if not self.children:
 | |
|             yield self
 | |
| 
 | |
| def reduce_tree(node, parent=None):
 | |
|     """
 | |
|     Internal function. Reduces a compiled pattern tree to an
 | |
|     intermediate representation suitable for feeding the
 | |
|     automaton. This also trims off any optional pattern elements(like
 | |
|     [a], a*).
 | |
|     """
 | |
| 
 | |
|     new_node = None
 | |
|     #switch on the node type
 | |
|     if node.type == syms.Matcher:
 | |
|         #skip
 | |
|         node = node.children[0]
 | |
| 
 | |
|     if node.type == syms.Alternatives  :
 | |
|         #2 cases
 | |
|         if len(node.children) <= 2:
 | |
|             #just a single 'Alternative', skip this node
 | |
|             new_node = reduce_tree(node.children[0], parent)
 | |
|         else:
 | |
|             #real alternatives
 | |
|             new_node = MinNode(type=TYPE_ALTERNATIVES)
 | |
|             #skip odd children('|' tokens)
 | |
|             for child in node.children:
 | |
|                 if node.children.index(child)%2:
 | |
|                     continue
 | |
|                 reduced = reduce_tree(child, new_node)
 | |
|                 if reduced is not None:
 | |
|                     new_node.children.append(reduced)
 | |
|     elif node.type == syms.Alternative:
 | |
|         if len(node.children) > 1:
 | |
| 
 | |
|             new_node = MinNode(type=TYPE_GROUP)
 | |
|             for child in node.children:
 | |
|                 reduced = reduce_tree(child, new_node)
 | |
|                 if reduced:
 | |
|                     new_node.children.append(reduced)
 | |
|             if not new_node.children:
 | |
|                 # delete the group if all of the children were reduced to None
 | |
|                 new_node = None
 | |
| 
 | |
|         else:
 | |
|             new_node = reduce_tree(node.children[0], parent)
 | |
| 
 | |
|     elif node.type == syms.Unit:
 | |
|         if (isinstance(node.children[0], pytree.Leaf) and
 | |
|             node.children[0].value == '('):
 | |
|             #skip parentheses
 | |
|             return reduce_tree(node.children[1], parent)
 | |
|         if ((isinstance(node.children[0], pytree.Leaf) and
 | |
|                node.children[0].value == '[')
 | |
|                or
 | |
|                (len(node.children)>1 and
 | |
|                hasattr(node.children[1], "value") and
 | |
|                node.children[1].value == '[')):
 | |
|             #skip whole unit if its optional
 | |
|             return None
 | |
| 
 | |
|         leaf = True
 | |
|         details_node = None
 | |
|         alternatives_node = None
 | |
|         has_repeater = False
 | |
|         repeater_node = None
 | |
|         has_variable_name = False
 | |
| 
 | |
|         for child in node.children:
 | |
|             if child.type == syms.Details:
 | |
|                 leaf = False
 | |
|                 details_node = child
 | |
|             elif child.type == syms.Repeater:
 | |
|                 has_repeater = True
 | |
|                 repeater_node = child
 | |
|             elif child.type == syms.Alternatives:
 | |
|                 alternatives_node = child
 | |
|             if hasattr(child, 'value') and child.value == '=': # variable name
 | |
|                 has_variable_name = True
 | |
| 
 | |
|         #skip variable name
 | |
|         if has_variable_name:
 | |
|             #skip variable name, '='
 | |
|             name_leaf = node.children[2]
 | |
|             if hasattr(name_leaf, 'value') and name_leaf.value == '(':
 | |
|                 # skip parenthesis
 | |
|                 name_leaf = node.children[3]
 | |
|         else:
 | |
|             name_leaf = node.children[0]
 | |
| 
 | |
|         #set node type
 | |
|         if name_leaf.type == token_labels.NAME:
 | |
|             #(python) non-name or wildcard
 | |
|             if name_leaf.value == 'any':
 | |
|                 new_node = MinNode(type=TYPE_ANY)
 | |
|             else:
 | |
|                 if hasattr(token_labels, name_leaf.value):
 | |
|                     new_node = MinNode(type=getattr(token_labels, name_leaf.value))
 | |
|                 else:
 | |
|                     new_node = MinNode(type=getattr(pysyms, name_leaf.value))
 | |
| 
 | |
|         elif name_leaf.type == token_labels.STRING:
 | |
|             #(python) name or character; remove the apostrophes from
 | |
|             #the string value
 | |
|             name = name_leaf.value.strip("'")
 | |
|             if name in tokens:
 | |
|                 new_node = MinNode(type=tokens[name])
 | |
|             else:
 | |
|                 new_node = MinNode(type=token_labels.NAME, name=name)
 | |
|         elif name_leaf.type == syms.Alternatives:
 | |
|             new_node = reduce_tree(alternatives_node, parent)
 | |
| 
 | |
|         #handle repeaters
 | |
|         if has_repeater:
 | |
|             if repeater_node.children[0].value == '*':
 | |
|                 #reduce to None
 | |
|                 new_node = None
 | |
|             elif repeater_node.children[0].value == '+':
 | |
|                 #reduce to a single occurence i.e. do nothing
 | |
|                 pass
 | |
|             else:
 | |
|                 #TODO: handle {min, max} repeaters
 | |
|                 raise NotImplementedError
 | |
|                 pass
 | |
| 
 | |
|         #add children
 | |
|         if details_node and new_node is not None:
 | |
|             for child in details_node.children[1:-1]:
 | |
|                 #skip '<', '>' markers
 | |
|                 reduced = reduce_tree(child, new_node)
 | |
|                 if reduced is not None:
 | |
|                     new_node.children.append(reduced)
 | |
|     if new_node:
 | |
|         new_node.parent = parent
 | |
|     return new_node
 | |
| 
 | |
| 
 | |
| def get_characteristic_subpattern(subpatterns):
 | |
|     """Picks the most characteristic from a list of linear patterns
 | |
|     Current order used is:
 | |
|     names > common_names > common_chars
 | |
|     """
 | |
|     if not isinstance(subpatterns, list):
 | |
|         return subpatterns
 | |
|     if len(subpatterns)==1:
 | |
|         return subpatterns[0]
 | |
| 
 | |
|     # first pick out the ones containing variable names
 | |
|     subpatterns_with_names = []
 | |
|     subpatterns_with_common_names = []
 | |
|     common_names = ['in', 'for', 'if' , 'not', 'None']
 | |
|     subpatterns_with_common_chars = []
 | |
|     common_chars = "[]().,:"
 | |
|     for subpattern in subpatterns:
 | |
|         if any(rec_test(subpattern, lambda x: type(x) is str)):
 | |
|             if any(rec_test(subpattern,
 | |
|                             lambda x: isinstance(x, str) and x in common_chars)):
 | |
|                 subpatterns_with_common_chars.append(subpattern)
 | |
|             elif any(rec_test(subpattern,
 | |
|                               lambda x: isinstance(x, str) and x in common_names)):
 | |
|                 subpatterns_with_common_names.append(subpattern)
 | |
| 
 | |
|             else:
 | |
|                 subpatterns_with_names.append(subpattern)
 | |
| 
 | |
|     if subpatterns_with_names:
 | |
|         subpatterns = subpatterns_with_names
 | |
|     elif subpatterns_with_common_names:
 | |
|         subpatterns = subpatterns_with_common_names
 | |
|     elif subpatterns_with_common_chars:
 | |
|         subpatterns = subpatterns_with_common_chars
 | |
|     # of the remaining subpatterns pick out the longest one
 | |
|     return max(subpatterns, key=len)
 | |
| 
 | |
| def rec_test(sequence, test_func):
 | |
|     """Tests test_func on all items of sequence and items of included
 | |
|     sub-iterables"""
 | |
|     for x in sequence:
 | |
|         if isinstance(x, (list, tuple)):
 | |
|             for y in rec_test(x, test_func):
 | |
|                 yield y
 | |
|         else:
 | |
|             yield test_func(x)
 |