+++ /dev/null
-"Utility functions used by the btm_matcher module"\r
-\r
-from . import pytree\r
-from .pgen2 import grammar, token\r
-from .pygram import pattern_symbols, python_symbols\r
-\r
-syms = pattern_symbols\r
-pysyms = python_symbols\r
-tokens = grammar.opmap\r
-token_labels = token\r
-\r
-TYPE_ANY = -1\r
-TYPE_ALTERNATIVES = -2\r
-TYPE_GROUP = -3\r
-\r
-class MinNode(object):\r
- """This class serves as an intermediate representation of the\r
- pattern tree during the conversion to sets of leaf-to-root\r
- subpatterns"""\r
-\r
- def __init__(self, type=None, name=None):\r
- self.type = type\r
- self.name = name\r
- self.children = []\r
- self.leaf = False\r
- self.parent = None\r
- self.alternatives = []\r
- self.group = []\r
-\r
- def __repr__(self):\r
- return str(self.type) + ' ' + str(self.name)\r
-\r
- def leaf_to_root(self):\r
- """Internal method. Returns a characteristic path of the\r
- pattern tree. This method must be run for all leaves until the\r
- linear subpatterns are merged into a single"""\r
- node = self\r
- subp = []\r
- while node:\r
- if node.type == TYPE_ALTERNATIVES:\r
- node.alternatives.append(subp)\r
- if len(node.alternatives) == len(node.children):\r
- #last alternative\r
- subp = [tuple(node.alternatives)]\r
- node.alternatives = []\r
- node = node.parent\r
- continue\r
- else:\r
- node = node.parent\r
- subp = None\r
- break\r
-\r
- if node.type == TYPE_GROUP:\r
- node.group.append(subp)\r
- #probably should check the number of leaves\r
- if len(node.group) == len(node.children):\r
- subp = get_characteristic_subpattern(node.group)\r
- node.group = []\r
- node = node.parent\r
- continue\r
- else:\r
- node = node.parent\r
- subp = None\r
- break\r
-\r
- if node.type == token_labels.NAME and node.name:\r
- #in case of type=name, use the name instead\r
- subp.append(node.name)\r
- else:\r
- subp.append(node.type)\r
-\r
- node = node.parent\r
- return subp\r
-\r
- def get_linear_subpattern(self):\r
- """Drives the leaf_to_root method. The reason that\r
- leaf_to_root must be run multiple times is because we need to\r
- reject 'group' matches; for example the alternative form\r
- (a | b c) creates a group [b c] that needs to be matched. Since\r
- matching multiple linear patterns overcomes the automaton's\r
- capabilities, leaf_to_root merges each group into a single\r
- choice based on 'characteristic'ity,\r
-\r
- i.e. (a|b c) -> (a|b) if b more characteristic than c\r
-\r
- Returns: The most 'characteristic'(as defined by\r
- get_characteristic_subpattern) path for the compiled pattern\r
- tree.\r
- """\r
-\r
- for l in self.leaves():\r
- subp = l.leaf_to_root()\r
- if subp:\r
- return subp\r
-\r
- def leaves(self):\r
- "Generator that returns the leaves of the tree"\r
- for child in self.children:\r
- for x in child.leaves():\r
- yield x\r
- if not self.children:\r
- yield self\r
-\r
-def reduce_tree(node, parent=None):\r
- """\r
- Internal function. Reduces a compiled pattern tree to an\r
- intermediate representation suitable for feeding the\r
- automaton. This also trims off any optional pattern elements(like\r
- [a], a*).\r
- """\r
-\r
- new_node = None\r
- #switch on the node type\r
- if node.type == syms.Matcher:\r
- #skip\r
- node = node.children[0]\r
-\r
- if node.type == syms.Alternatives :\r
- #2 cases\r
- if len(node.children) <= 2:\r
- #just a single 'Alternative', skip this node\r
- new_node = reduce_tree(node.children[0], parent)\r
- else:\r
- #real alternatives\r
- new_node = MinNode(type=TYPE_ALTERNATIVES)\r
- #skip odd children('|' tokens)\r
- for child in node.children:\r
- if node.children.index(child)%2:\r
- continue\r
- reduced = reduce_tree(child, new_node)\r
- if reduced is not None:\r
- new_node.children.append(reduced)\r
- elif node.type == syms.Alternative:\r
- if len(node.children) > 1:\r
-\r
- new_node = MinNode(type=TYPE_GROUP)\r
- for child in node.children:\r
- reduced = reduce_tree(child, new_node)\r
- if reduced:\r
- new_node.children.append(reduced)\r
- if not new_node.children:\r
- # delete the group if all of the children were reduced to None\r
- new_node = None\r
-\r
- else:\r
- new_node = reduce_tree(node.children[0], parent)\r
-\r
- elif node.type == syms.Unit:\r
- if (isinstance(node.children[0], pytree.Leaf) and\r
- node.children[0].value == '('):\r
- #skip parentheses\r
- return reduce_tree(node.children[1], parent)\r
- if ((isinstance(node.children[0], pytree.Leaf) and\r
- node.children[0].value == '[')\r
- or\r
- (len(node.children)>1 and\r
- hasattr(node.children[1], "value") and\r
- node.children[1].value == '[')):\r
- #skip whole unit if its optional\r
- return None\r
-\r
- leaf = True\r
- details_node = None\r
- alternatives_node = None\r
- has_repeater = False\r
- repeater_node = None\r
- has_variable_name = False\r
-\r
- for child in node.children:\r
- if child.type == syms.Details:\r
- leaf = False\r
- details_node = child\r
- elif child.type == syms.Repeater:\r
- has_repeater = True\r
- repeater_node = child\r
- elif child.type == syms.Alternatives:\r
- alternatives_node = child\r
- if hasattr(child, 'value') and child.value == '=': # variable name\r
- has_variable_name = True\r
-\r
- #skip variable name\r
- if has_variable_name:\r
- #skip variable name, '='\r
- name_leaf = node.children[2]\r
- if hasattr(name_leaf, 'value') and name_leaf.value == '(':\r
- # skip parenthesis\r
- name_leaf = node.children[3]\r
- else:\r
- name_leaf = node.children[0]\r
-\r
- #set node type\r
- if name_leaf.type == token_labels.NAME:\r
- #(python) non-name or wildcard\r
- if name_leaf.value == 'any':\r
- new_node = MinNode(type=TYPE_ANY)\r
- else:\r
- if hasattr(token_labels, name_leaf.value):\r
- new_node = MinNode(type=getattr(token_labels, name_leaf.value))\r
- else:\r
- new_node = MinNode(type=getattr(pysyms, name_leaf.value))\r
-\r
- elif name_leaf.type == token_labels.STRING:\r
- #(python) name or character; remove the apostrophes from\r
- #the string value\r
- name = name_leaf.value.strip("'")\r
- if name in tokens:\r
- new_node = MinNode(type=tokens[name])\r
- else:\r
- new_node = MinNode(type=token_labels.NAME, name=name)\r
- elif name_leaf.type == syms.Alternatives:\r
- new_node = reduce_tree(alternatives_node, parent)\r
-\r
- #handle repeaters\r
- if has_repeater:\r
- if repeater_node.children[0].value == '*':\r
- #reduce to None\r
- new_node = None\r
- elif repeater_node.children[0].value == '+':\r
- #reduce to a single occurence i.e. do nothing\r
- pass\r
- else:\r
- #TODO: handle {min, max} repeaters\r
- raise NotImplementedError\r
- pass\r
-\r
- #add children\r
- if details_node and new_node is not None:\r
- for child in details_node.children[1:-1]:\r
- #skip '<', '>' markers\r
- reduced = reduce_tree(child, new_node)\r
- if reduced is not None:\r
- new_node.children.append(reduced)\r
- if new_node:\r
- new_node.parent = parent\r
- return new_node\r
-\r
-\r
-def get_characteristic_subpattern(subpatterns):\r
- """Picks the most characteristic from a list of linear patterns\r
- Current order used is:\r
- names > common_names > common_chars\r
- """\r
- if not isinstance(subpatterns, list):\r
- return subpatterns\r
- if len(subpatterns)==1:\r
- return subpatterns[0]\r
-\r
- # first pick out the ones containing variable names\r
- subpatterns_with_names = []\r
- subpatterns_with_common_names = []\r
- common_names = ['in', 'for', 'if' , 'not', 'None']\r
- subpatterns_with_common_chars = []\r
- common_chars = "[]().,:"\r
- for subpattern in subpatterns:\r
- if any(rec_test(subpattern, lambda x: type(x) is str)):\r
- if any(rec_test(subpattern,\r
- lambda x: isinstance(x, str) and x in common_chars)):\r
- subpatterns_with_common_chars.append(subpattern)\r
- elif any(rec_test(subpattern,\r
- lambda x: isinstance(x, str) and x in common_names)):\r
- subpatterns_with_common_names.append(subpattern)\r
-\r
- else:\r
- subpatterns_with_names.append(subpattern)\r
-\r
- if subpatterns_with_names:\r
- subpatterns = subpatterns_with_names\r
- elif subpatterns_with_common_names:\r
- subpatterns = subpatterns_with_common_names\r
- elif subpatterns_with_common_chars:\r
- subpatterns = subpatterns_with_common_chars\r
- # of the remaining subpatterns pick out the longest one\r
- return max(subpatterns, key=len)\r
-\r
-def rec_test(sequence, test_func):\r
- """Tests test_func on all items of sequence and items of included\r
- sub-iterables"""\r
- for x in sequence:\r
- if isinstance(x, (list, tuple)):\r
- for y in rec_test(x, test_func):\r
- yield y\r
- else:\r
- yield test_func(x)\r