]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | """A bottom-up tree matching algorithm implementation meant to speed\r |
2 | up 2to3's matching process. After the tree patterns are reduced to\r | |
3 | their rarest linear path, a linear Aho-Corasick automaton is\r | |
4 | created. The linear automaton traverses the linear paths from the\r | |
5 | leaves to the root of the AST and returns a set of nodes for further\r | |
6 | matching. This reduces significantly the number of candidate nodes."""\r | |
7 | \r | |
8 | __author__ = "George Boutsioukis <gboutsioukis@gmail.com>"\r | |
9 | \r | |
10 | import logging\r | |
11 | import itertools\r | |
12 | from collections import defaultdict\r | |
13 | \r | |
14 | from . import pytree\r | |
15 | from .btm_utils import reduce_tree\r | |
16 | \r | |
17 | class BMNode(object):\r | |
18 | """Class for a node of the Aho-Corasick automaton used in matching"""\r | |
19 | count = itertools.count()\r | |
20 | def __init__(self):\r | |
21 | self.transition_table = {}\r | |
22 | self.fixers = []\r | |
23 | self.id = next(BMNode.count)\r | |
24 | self.content = ''\r | |
25 | \r | |
26 | class BottomMatcher(object):\r | |
27 | """The main matcher class. After instantiating the patterns should\r | |
28 | be added using the add_fixer method"""\r | |
29 | \r | |
30 | def __init__(self):\r | |
31 | self.match = set()\r | |
32 | self.root = BMNode()\r | |
33 | self.nodes = [self.root]\r | |
34 | self.fixers = []\r | |
35 | self.logger = logging.getLogger("RefactoringTool")\r | |
36 | \r | |
37 | def add_fixer(self, fixer):\r | |
38 | """Reduces a fixer's pattern tree to a linear path and adds it\r | |
39 | to the matcher(a common Aho-Corasick automaton). The fixer is\r | |
40 | appended on the matching states and called when they are\r | |
41 | reached"""\r | |
42 | self.fixers.append(fixer)\r | |
43 | tree = reduce_tree(fixer.pattern_tree)\r | |
44 | linear = tree.get_linear_subpattern()\r | |
45 | match_nodes = self.add(linear, start=self.root)\r | |
46 | for match_node in match_nodes:\r | |
47 | match_node.fixers.append(fixer)\r | |
48 | \r | |
49 | def add(self, pattern, start):\r | |
50 | "Recursively adds a linear pattern to the AC automaton"\r | |
51 | #print("adding pattern", pattern, "to", start)\r | |
52 | if not pattern:\r | |
53 | #print("empty pattern")\r | |
54 | return [start]\r | |
55 | if isinstance(pattern[0], tuple):\r | |
56 | #alternatives\r | |
57 | #print("alternatives")\r | |
58 | match_nodes = []\r | |
59 | for alternative in pattern[0]:\r | |
60 | #add all alternatives, and add the rest of the pattern\r | |
61 | #to each end node\r | |
62 | end_nodes = self.add(alternative, start=start)\r | |
63 | for end in end_nodes:\r | |
64 | match_nodes.extend(self.add(pattern[1:], end))\r | |
65 | return match_nodes\r | |
66 | else:\r | |
67 | #single token\r | |
68 | #not last\r | |
69 | if pattern[0] not in start.transition_table:\r | |
70 | #transition did not exist, create new\r | |
71 | next_node = BMNode()\r | |
72 | start.transition_table[pattern[0]] = next_node\r | |
73 | else:\r | |
74 | #transition exists already, follow\r | |
75 | next_node = start.transition_table[pattern[0]]\r | |
76 | \r | |
77 | if pattern[1:]:\r | |
78 | end_nodes = self.add(pattern[1:], start=next_node)\r | |
79 | else:\r | |
80 | end_nodes = [next_node]\r | |
81 | return end_nodes\r | |
82 | \r | |
83 | def run(self, leaves):\r | |
84 | """The main interface with the bottom matcher. The tree is\r | |
85 | traversed from the bottom using the constructed\r | |
86 | automaton. Nodes are only checked once as the tree is\r | |
87 | retraversed. When the automaton fails, we give it one more\r | |
88 | shot(in case the above tree matches as a whole with the\r | |
89 | rejected leaf), then we break for the next leaf. There is the\r | |
90 | special case of multiple arguments(see code comments) where we\r | |
91 | recheck the nodes\r | |
92 | \r | |
93 | Args:\r | |
94 | The leaves of the AST tree to be matched\r | |
95 | \r | |
96 | Returns:\r | |
97 | A dictionary of node matches with fixers as the keys\r | |
98 | """\r | |
99 | current_ac_node = self.root\r | |
100 | results = defaultdict(list)\r | |
101 | for leaf in leaves:\r | |
102 | current_ast_node = leaf\r | |
103 | while current_ast_node:\r | |
104 | current_ast_node.was_checked = True\r | |
105 | for child in current_ast_node.children:\r | |
106 | # multiple statements, recheck\r | |
107 | if isinstance(child, pytree.Leaf) and child.value == u";":\r | |
108 | current_ast_node.was_checked = False\r | |
109 | break\r | |
110 | if current_ast_node.type == 1:\r | |
111 | #name\r | |
112 | node_token = current_ast_node.value\r | |
113 | else:\r | |
114 | node_token = current_ast_node.type\r | |
115 | \r | |
116 | if node_token in current_ac_node.transition_table:\r | |
117 | #token matches\r | |
118 | current_ac_node = current_ac_node.transition_table[node_token]\r | |
119 | for fixer in current_ac_node.fixers:\r | |
120 | if not fixer in results:\r | |
121 | results[fixer] = []\r | |
122 | results[fixer].append(current_ast_node)\r | |
123 | \r | |
124 | else:\r | |
125 | #matching failed, reset automaton\r | |
126 | current_ac_node = self.root\r | |
127 | if (current_ast_node.parent is not None\r | |
128 | and current_ast_node.parent.was_checked):\r | |
129 | #the rest of the tree upwards has been checked, next leaf\r | |
130 | break\r | |
131 | \r | |
132 | #recheck the rejected node once from the root\r | |
133 | if node_token in current_ac_node.transition_table:\r | |
134 | #token matches\r | |
135 | current_ac_node = current_ac_node.transition_table[node_token]\r | |
136 | for fixer in current_ac_node.fixers:\r | |
137 | if not fixer in results.keys():\r | |
138 | results[fixer] = []\r | |
139 | results[fixer].append(current_ast_node)\r | |
140 | \r | |
141 | current_ast_node = current_ast_node.parent\r | |
142 | return results\r | |
143 | \r | |
144 | def print_ac(self):\r | |
145 | "Prints a graphviz diagram of the BM automaton(for debugging)"\r | |
146 | print("digraph g{")\r | |
147 | def print_node(node):\r | |
148 | for subnode_key in node.transition_table.keys():\r | |
149 | subnode = node.transition_table[subnode_key]\r | |
150 | print("%d -> %d [label=%s] //%s" %\r | |
151 | (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))\r | |
152 | if subnode_key == 1:\r | |
153 | print(subnode.content)\r | |
154 | print_node(subnode)\r | |
155 | print_node(self.root)\r | |
156 | print("}")\r | |
157 | \r | |
158 | # taken from pytree.py for debugging; only used by print_ac\r | |
159 | _type_reprs = {}\r | |
160 | def type_repr(type_num):\r | |
161 | global _type_reprs\r | |
162 | if not _type_reprs:\r | |
163 | from .pygram import python_symbols\r | |
164 | # printing tokens is possible but not as useful\r | |
165 | # from .pgen2 import token // token.__dict__.items():\r | |
166 | for name, val in python_symbols.__dict__.items():\r | |
167 | if type(val) == int: _type_reprs[val] = name\r | |
168 | return _type_reprs.setdefault(type_num, type_num)\r |