1 "Utility functions used by the btm_matcher module"
4 from .pgen2
import grammar
, token
5 from .pygram
import pattern_symbols
, python_symbols
8 pysyms
= python_symbols
13 TYPE_ALTERNATIVES
= -2
16 class MinNode(object):
17 """This class serves as an intermediate representation of the
18 pattern tree during the conversion to sets of leaf-to-root
21 def __init__(self
, type=None, name
=None):
27 self
.alternatives
= []
31 return str(self
.type) + ' ' + str(self
.name
)
33 def leaf_to_root(self
):
34 """Internal method. Returns a characteristic path of the
35 pattern tree. This method must be run for all leaves until the
36 linear subpatterns are merged into a single"""
40 if node
.type == TYPE_ALTERNATIVES
:
41 node
.alternatives
.append(subp
)
42 if len(node
.alternatives
) == len(node
.children
):
44 subp
= [tuple(node
.alternatives
)]
45 node
.alternatives
= []
53 if node
.type == TYPE_GROUP
:
54 node
.group
.append(subp
)
55 #probably should check the number of leaves
56 if len(node
.group
) == len(node
.children
):
57 subp
= get_characteristic_subpattern(node
.group
)
66 if node
.type == token_labels
.NAME
and node
.name
:
67 #in case of type=name, use the name instead
68 subp
.append(node
.name
)
70 subp
.append(node
.type)
75 def get_linear_subpattern(self
):
76 """Drives the leaf_to_root method. The reason that
77 leaf_to_root must be run multiple times is because we need to
78 reject 'group' matches; for example the alternative form
79 (a | b c) creates a group [b c] that needs to be matched. Since
80 matching multiple linear patterns overcomes the automaton's
81 capabilities, leaf_to_root merges each group into a single
82 choice based on 'characteristic'ity,
84 i.e. (a|b c) -> (a|b) if b more characteristic than c
86 Returns: The most 'characteristic'(as defined by
87 get_characteristic_subpattern) path for the compiled pattern
91 for l
in self
.leaves():
92 subp
= l
.leaf_to_root()
97 "Generator that returns the leaves of the tree"
98 for child
in self
.children
:
99 for x
in child
.leaves():
101 if not self
.children
:
104 def reduce_tree(node
, parent
=None):
106 Internal function. Reduces a compiled pattern tree to an
107 intermediate representation suitable for feeding the
108 automaton. This also trims off any optional pattern elements(like
113 #switch on the node type
114 if node
.type == syms
.Matcher
:
116 node
= node
.children
[0]
118 if node
.type == syms
.Alternatives
:
120 if len(node
.children
) <= 2:
121 #just a single 'Alternative', skip this node
122 new_node
= reduce_tree(node
.children
[0], parent
)
125 new_node
= MinNode(type=TYPE_ALTERNATIVES
)
126 #skip odd children('|' tokens)
127 for child
in node
.children
:
128 if node
.children
.index(child
)%2:
130 reduced
= reduce_tree(child
, new_node
)
131 if reduced
is not None:
132 new_node
.children
.append(reduced
)
133 elif node
.type == syms
.Alternative
:
134 if len(node
.children
) > 1:
136 new_node
= MinNode(type=TYPE_GROUP
)
137 for child
in node
.children
:
138 reduced
= reduce_tree(child
, new_node
)
140 new_node
.children
.append(reduced
)
141 if not new_node
.children
:
142 # delete the group if all of the children were reduced to None
146 new_node
= reduce_tree(node
.children
[0], parent
)
148 elif node
.type == syms
.Unit
:
149 if (isinstance(node
.children
[0], pytree
.Leaf
) and
150 node
.children
[0].value
== '('):
152 return reduce_tree(node
.children
[1], parent
)
153 if ((isinstance(node
.children
[0], pytree
.Leaf
) and
154 node
.children
[0].value
== '[')
156 (len(node
.children
)>1 and
157 hasattr(node
.children
[1], "value") and
158 node
.children
[1].value
== '[')):
159 #skip whole unit if its optional
164 alternatives_node
= None
167 has_variable_name
= False
169 for child
in node
.children
:
170 if child
.type == syms
.Details
:
173 elif child
.type == syms
.Repeater
:
175 repeater_node
= child
176 elif child
.type == syms
.Alternatives
:
177 alternatives_node
= child
178 if hasattr(child
, 'value') and child
.value
== '=': # variable name
179 has_variable_name
= True
182 if has_variable_name
:
183 #skip variable name, '='
184 name_leaf
= node
.children
[2]
185 if hasattr(name_leaf
, 'value') and name_leaf
.value
== '(':
187 name_leaf
= node
.children
[3]
189 name_leaf
= node
.children
[0]
192 if name_leaf
.type == token_labels
.NAME
:
193 #(python) non-name or wildcard
194 if name_leaf
.value
== 'any':
195 new_node
= MinNode(type=TYPE_ANY
)
197 if hasattr(token_labels
, name_leaf
.value
):
198 new_node
= MinNode(type=getattr(token_labels
, name_leaf
.value
))
200 new_node
= MinNode(type=getattr(pysyms
, name_leaf
.value
))
202 elif name_leaf
.type == token_labels
.STRING
:
203 #(python) name or character; remove the apostrophes from
205 name
= name_leaf
.value
.strip("'")
207 new_node
= MinNode(type=tokens
[name
])
209 new_node
= MinNode(type=token_labels
.NAME
, name
=name
)
210 elif name_leaf
.type == syms
.Alternatives
:
211 new_node
= reduce_tree(alternatives_node
, parent
)
215 if repeater_node
.children
[0].value
== '*':
218 elif repeater_node
.children
[0].value
== '+':
219 #reduce to a single occurence i.e. do nothing
222 #TODO: handle {min, max} repeaters
223 raise NotImplementedError
227 if details_node
and new_node
is not None:
228 for child
in details_node
.children
[1:-1]:
229 #skip '<', '>' markers
230 reduced
= reduce_tree(child
, new_node
)
231 if reduced
is not None:
232 new_node
.children
.append(reduced
)
234 new_node
.parent
= parent
238 def get_characteristic_subpattern(subpatterns
):
239 """Picks the most characteristic from a list of linear patterns
240 Current order used is:
241 names > common_names > common_chars
243 if not isinstance(subpatterns
, list):
245 if len(subpatterns
)==1:
246 return subpatterns
[0]
248 # first pick out the ones containing variable names
249 subpatterns_with_names
= []
250 subpatterns_with_common_names
= []
251 common_names
= ['in', 'for', 'if' , 'not', 'None']
252 subpatterns_with_common_chars
= []
253 common_chars
= "[]().,:"
254 for subpattern
in subpatterns
:
255 if any(rec_test(subpattern
, lambda x
: type(x
) is str)):
256 if any(rec_test(subpattern
,
257 lambda x
: isinstance(x
, str) and x
in common_chars
)):
258 subpatterns_with_common_chars
.append(subpattern
)
259 elif any(rec_test(subpattern
,
260 lambda x
: isinstance(x
, str) and x
in common_names
)):
261 subpatterns_with_common_names
.append(subpattern
)
264 subpatterns_with_names
.append(subpattern
)
266 if subpatterns_with_names
:
267 subpatterns
= subpatterns_with_names
268 elif subpatterns_with_common_names
:
269 subpatterns
= subpatterns_with_common_names
270 elif subpatterns_with_common_chars
:
271 subpatterns
= subpatterns_with_common_chars
272 # of the remaining subpatterns pick out the longest one
273 return max(subpatterns
, key
=len)
275 def rec_test(sequence
, test_func
):
276 """Tests test_func on all items of sequence and items of included
279 if isinstance(x
, (list, tuple)):
280 for y
in rec_test(x
, test_func
):