mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
	
	
		
			164 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			164 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | """A bottom-up tree matching algorithm implementation meant to speed
 | ||
|  | up 2to3's matching process. After the tree patterns are reduced to | ||
|  | their rarest linear path, a linear Aho-Corasick automaton is | ||
|  | created. The linear automaton traverses the linear paths from the | ||
|  | leaves to the root of the AST and returns a set of nodes for further | ||
|  | matching. This reduces significantly the number of candidate nodes."""
 | ||
|  | 
 | ||
|  | __author__ = "George Boutsioukis <gboutsioukis@gmail.com>" | ||
|  | 
 | ||
|  | import logging | ||
|  | import itertools | ||
|  | from collections import defaultdict | ||
|  | 
 | ||
|  | from . import pytree | ||
|  | from .btm_utils import reduce_tree | ||
|  | 
 | ||
|  | class BMNode(object): | ||
|  |     """Class for a node of the Aho-Corasick automaton used in matching""" | ||
|  |     count = itertools.count() | ||
|  |     def __init__(self): | ||
|  |         self.transition_table = {} | ||
|  |         self.fixers = [] | ||
|  |         self.id = next(BMNode.count) | ||
|  |         self.content = '' | ||
|  | 
 | ||
|  | class BottomMatcher(object): | ||
|  |     """The main matcher class. After instantiating the patterns should
 | ||
|  |     be added using the add_fixer method"""
 | ||
|  | 
 | ||
|  |     def __init__(self): | ||
|  |         self.match = set() | ||
|  |         self.root = BMNode() | ||
|  |         self.nodes = [self.root] | ||
|  |         self.fixers = [] | ||
|  |         self.logger = logging.getLogger("RefactoringTool") | ||
|  | 
 | ||
|  |     def add_fixer(self, fixer): | ||
|  |         """Reduces a fixer's pattern tree to a linear path and adds it
 | ||
|  |         to the matcher(a common Aho-Corasick automaton). The fixer is | ||
|  |         appended on the matching states and called when they are | ||
|  |         reached"""
 | ||
|  |         self.fixers.append(fixer) | ||
|  |         tree = reduce_tree(fixer.pattern_tree) | ||
|  |         linear = tree.get_linear_subpattern() | ||
|  |         match_nodes = self.add(linear, start=self.root) | ||
|  |         for match_node in match_nodes: | ||
|  |             match_node.fixers.append(fixer) | ||
|  | 
 | ||
|  |     def add(self, pattern, start): | ||
|  |         "Recursively adds a linear pattern to the AC automaton" | ||
|  |         #print("adding pattern", pattern, "to", start) | ||
|  |         if not pattern: | ||
|  |             #print("empty pattern") | ||
|  |             return [start] | ||
|  |         if isinstance(pattern[0], tuple): | ||
|  |             #alternatives | ||
|  |             #print("alternatives") | ||
|  |             match_nodes = [] | ||
|  |             for alternative in pattern[0]: | ||
|  |                 #add all alternatives, and add the rest of the pattern | ||
|  |                 #to each end node | ||
|  |                 end_nodes = self.add(alternative, start=start) | ||
|  |                 for end in end_nodes: | ||
|  |                     match_nodes.extend(self.add(pattern[1:], end)) | ||
|  |             return match_nodes | ||
|  |         else: | ||
|  |             #single token | ||
|  |             #not last | ||
|  |             if pattern[0] not in start.transition_table: | ||
|  |                 #transition did not exist, create new | ||
|  |                 next_node = BMNode() | ||
|  |                 start.transition_table[pattern[0]] = next_node | ||
|  |             else: | ||
|  |                 #transition exists already, follow | ||
|  |                 next_node = start.transition_table[pattern[0]] | ||
|  | 
 | ||
|  |             if pattern[1:]: | ||
|  |                 end_nodes = self.add(pattern[1:], start=next_node) | ||
|  |             else: | ||
|  |                 end_nodes = [next_node] | ||
|  |             return end_nodes | ||
|  | 
 | ||
|  |     def run(self, leaves): | ||
|  |         """The main interface with the bottom matcher. The tree is
 | ||
|  |         traversed from the bottom using the constructed | ||
|  |         automaton. Nodes are only checked once as the tree is | ||
|  |         retraversed. When the automaton fails, we give it one more | ||
|  |         shot(in case the above tree matches as a whole with the | ||
|  |         rejected leaf), then we break for the next leaf. There is the | ||
|  |         special case of multiple arguments(see code comments) where we | ||
|  |         recheck the nodes | ||
|  | 
 | ||
|  |         Args: | ||
|  |            The leaves of the AST tree to be matched | ||
|  | 
 | ||
|  |         Returns: | ||
|  |            A dictionary of node matches with fixers as the keys | ||
|  |         """
 | ||
|  |         current_ac_node = self.root | ||
|  |         results = defaultdict(list) | ||
|  |         for leaf in leaves: | ||
|  |             current_ast_node = leaf | ||
|  |             while current_ast_node: | ||
|  |                 current_ast_node.was_checked = True | ||
|  |                 for child in current_ast_node.children: | ||
|  |                     # multiple statements, recheck | ||
|  |                     if isinstance(child, pytree.Leaf) and child.value == ";": | ||
|  |                         current_ast_node.was_checked = False | ||
|  |                         break | ||
|  |                 if current_ast_node.type == 1: | ||
|  |                     #name | ||
|  |                     node_token = current_ast_node.value | ||
|  |                 else: | ||
|  |                     node_token = current_ast_node.type | ||
|  | 
 | ||
|  |                 if node_token in current_ac_node.transition_table: | ||
|  |                     #token matches | ||
|  |                     current_ac_node = current_ac_node.transition_table[node_token] | ||
|  |                     for fixer in current_ac_node.fixers: | ||
|  |                         results[fixer].append(current_ast_node) | ||
|  |                 else: | ||
|  |                     #matching failed, reset automaton | ||
|  |                     current_ac_node = self.root | ||
|  |                     if (current_ast_node.parent is not None | ||
|  |                         and current_ast_node.parent.was_checked): | ||
|  |                         #the rest of the tree upwards has been checked, next leaf | ||
|  |                         break | ||
|  | 
 | ||
|  |                     #recheck the rejected node once from the root | ||
|  |                     if node_token in current_ac_node.transition_table: | ||
|  |                         #token matches | ||
|  |                         current_ac_node = current_ac_node.transition_table[node_token] | ||
|  |                         for fixer in current_ac_node.fixers: | ||
|  |                             results[fixer].append(current_ast_node) | ||
|  | 
 | ||
|  |                 current_ast_node = current_ast_node.parent | ||
|  |         return results | ||
|  | 
 | ||
|  |     def print_ac(self): | ||
|  |         "Prints a graphviz diagram of the BM automaton(for debugging)" | ||
|  |         print("digraph g{") | ||
|  |         def print_node(node): | ||
|  |             for subnode_key in node.transition_table.keys(): | ||
|  |                 subnode = node.transition_table[subnode_key] | ||
|  |                 print("%d -> %d [label=%s] //%s" % | ||
|  |                       (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) | ||
|  |                 if subnode_key == 1: | ||
|  |                     print(subnode.content) | ||
|  |                 print_node(subnode) | ||
|  |         print_node(self.root) | ||
|  |         print("}") | ||
|  | 
 | ||
|  | # taken from pytree.py for debugging; only used by print_ac | ||
|  | _type_reprs = {} | ||
|  | def type_repr(type_num): | ||
|  |     global _type_reprs | ||
|  |     if not _type_reprs: | ||
|  |         from .pygram import python_symbols | ||
|  |         # printing tokens is possible but not as useful | ||
|  |         # from .pgen2 import token // token.__dict__.items(): | ||
|  |         for name, val in python_symbols.__dict__.items(): | ||
|  |             if type(val) == int: _type_reprs[val] = name | ||
|  |     return _type_reprs.setdefault(type_num, type_num) |