+++ /dev/null
-"""A bottom-up tree matching algorithm implementation meant to speed\r
-up 2to3's matching process. After the tree patterns are reduced to\r
-their rarest linear path, a linear Aho-Corasick automaton is\r
-created. The linear automaton traverses the linear paths from the\r
-leaves to the root of the AST and returns a set of nodes for further\r
-matching. This reduces significantly the number of candidate nodes."""\r
-\r
-__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"\r
-\r
-import logging\r
-import itertools\r
-from collections import defaultdict\r
-\r
-from . import pytree\r
-from .btm_utils import reduce_tree\r
-\r
-class BMNode(object):\r
- """Class for a node of the Aho-Corasick automaton used in matching"""\r
- count = itertools.count()\r
- def __init__(self):\r
- self.transition_table = {}\r
- self.fixers = []\r
- self.id = next(BMNode.count)\r
- self.content = ''\r
-\r
-class BottomMatcher(object):\r
- """The main matcher class. After instantiating the patterns should\r
- be added using the add_fixer method"""\r
-\r
- def __init__(self):\r
- self.match = set()\r
- self.root = BMNode()\r
- self.nodes = [self.root]\r
- self.fixers = []\r
- self.logger = logging.getLogger("RefactoringTool")\r
-\r
- def add_fixer(self, fixer):\r
- """Reduces a fixer's pattern tree to a linear path and adds it\r
- to the matcher(a common Aho-Corasick automaton). The fixer is\r
- appended on the matching states and called when they are\r
- reached"""\r
- self.fixers.append(fixer)\r
- tree = reduce_tree(fixer.pattern_tree)\r
- linear = tree.get_linear_subpattern()\r
- match_nodes = self.add(linear, start=self.root)\r
- for match_node in match_nodes:\r
- match_node.fixers.append(fixer)\r
-\r
- def add(self, pattern, start):\r
- "Recursively adds a linear pattern to the AC automaton"\r
- #print("adding pattern", pattern, "to", start)\r
- if not pattern:\r
- #print("empty pattern")\r
- return [start]\r
- if isinstance(pattern[0], tuple):\r
- #alternatives\r
- #print("alternatives")\r
- match_nodes = []\r
- for alternative in pattern[0]:\r
- #add all alternatives, and add the rest of the pattern\r
- #to each end node\r
- end_nodes = self.add(alternative, start=start)\r
- for end in end_nodes:\r
- match_nodes.extend(self.add(pattern[1:], end))\r
- return match_nodes\r
- else:\r
- #single token\r
- #not last\r
- if pattern[0] not in start.transition_table:\r
- #transition did not exist, create new\r
- next_node = BMNode()\r
- start.transition_table[pattern[0]] = next_node\r
- else:\r
- #transition exists already, follow\r
- next_node = start.transition_table[pattern[0]]\r
-\r
- if pattern[1:]:\r
- end_nodes = self.add(pattern[1:], start=next_node)\r
- else:\r
- end_nodes = [next_node]\r
- return end_nodes\r
-\r
- def run(self, leaves):\r
- """The main interface with the bottom matcher. The tree is\r
- traversed from the bottom using the constructed\r
- automaton. Nodes are only checked once as the tree is\r
- retraversed. When the automaton fails, we give it one more\r
- shot(in case the above tree matches as a whole with the\r
- rejected leaf), then we break for the next leaf. There is the\r
- special case of multiple arguments(see code comments) where we\r
- recheck the nodes\r
-\r
- Args:\r
- The leaves of the AST tree to be matched\r
-\r
- Returns:\r
- A dictionary of node matches with fixers as the keys\r
- """\r
- current_ac_node = self.root\r
- results = defaultdict(list)\r
- for leaf in leaves:\r
- current_ast_node = leaf\r
- while current_ast_node:\r
- current_ast_node.was_checked = True\r
- for child in current_ast_node.children:\r
- # multiple statements, recheck\r
- if isinstance(child, pytree.Leaf) and child.value == u";":\r
- current_ast_node.was_checked = False\r
- break\r
- if current_ast_node.type == 1:\r
- #name\r
- node_token = current_ast_node.value\r
- else:\r
- node_token = current_ast_node.type\r
-\r
- if node_token in current_ac_node.transition_table:\r
- #token matches\r
- current_ac_node = current_ac_node.transition_table[node_token]\r
- for fixer in current_ac_node.fixers:\r
- if not fixer in results:\r
- results[fixer] = []\r
- results[fixer].append(current_ast_node)\r
-\r
- else:\r
- #matching failed, reset automaton\r
- current_ac_node = self.root\r
- if (current_ast_node.parent is not None\r
- and current_ast_node.parent.was_checked):\r
- #the rest of the tree upwards has been checked, next leaf\r
- break\r
-\r
- #recheck the rejected node once from the root\r
- if node_token in current_ac_node.transition_table:\r
- #token matches\r
- current_ac_node = current_ac_node.transition_table[node_token]\r
- for fixer in current_ac_node.fixers:\r
- if not fixer in results.keys():\r
- results[fixer] = []\r
- results[fixer].append(current_ast_node)\r
-\r
- current_ast_node = current_ast_node.parent\r
- return results\r
-\r
- def print_ac(self):\r
- "Prints a graphviz diagram of the BM automaton(for debugging)"\r
- print("digraph g{")\r
- def print_node(node):\r
- for subnode_key in node.transition_table.keys():\r
- subnode = node.transition_table[subnode_key]\r
- print("%d -> %d [label=%s] //%s" %\r
- (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))\r
- if subnode_key == 1:\r
- print(subnode.content)\r
- print_node(subnode)\r
- print_node(self.root)\r
- print("}")\r
-\r
-# taken from pytree.py for debugging; only used by print_ac\r
-_type_reprs = {}\r
-def type_repr(type_num):\r
- global _type_reprs\r
- if not _type_reprs:\r
- from .pygram import python_symbols\r
- # printing tokens is possible but not as useful\r
- # from .pgen2 import token // token.__dict__.items():\r
- for name, val in python_symbols.__dict__.items():\r
- if type(val) == int: _type_reprs[val] = name\r
- return _type_reprs.setdefault(type_num, type_num)\r