+++ /dev/null
-# Copyright 2006 Google, Inc. All Rights Reserved.\r
-# Licensed to PSF under a Contributor Agreement.\r
-\r
-"""Pattern compiler.\r
-\r
-The grammer is taken from PatternGrammar.txt.\r
-\r
-The compiler compiles a pattern to a pytree.*Pattern instance.\r
-"""\r
-\r
-__author__ = "Guido van Rossum <guido@python.org>"\r
-\r
-# Python imports\r
-import os\r
-import StringIO\r
-\r
-# Fairly local imports\r
-from .pgen2 import driver, literals, token, tokenize, parse, grammar\r
-\r
-# Really local imports\r
-from . import pytree\r
-from . import pygram\r
-\r
-# The pattern grammar file\r
-_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),\r
- "PatternGrammar.txt")\r
-\r
-\r
-class PatternSyntaxError(Exception):\r
- pass\r
-\r
-\r
-def tokenize_wrapper(input):\r
- """Tokenizes a string suppressing significant whitespace."""\r
- skip = set((token.NEWLINE, token.INDENT, token.DEDENT))\r
- tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline)\r
- for quintuple in tokens:\r
- type, value, start, end, line_text = quintuple\r
- if type not in skip:\r
- yield quintuple\r
-\r
-\r
-class PatternCompiler(object):\r
-\r
- def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):\r
- """Initializer.\r
-\r
- Takes an optional alternative filename for the pattern grammar.\r
- """\r
- self.grammar = driver.load_grammar(grammar_file)\r
- self.syms = pygram.Symbols(self.grammar)\r
- self.pygrammar = pygram.python_grammar\r
- self.pysyms = pygram.python_symbols\r
- self.driver = driver.Driver(self.grammar, convert=pattern_convert)\r
-\r
- def compile_pattern(self, input, debug=False, with_tree=False):\r
- """Compiles a pattern string to a nested pytree.*Pattern object."""\r
- tokens = tokenize_wrapper(input)\r
- try:\r
- root = self.driver.parse_tokens(tokens, debug=debug)\r
- except parse.ParseError as e:\r
- raise PatternSyntaxError(str(e))\r
- if with_tree:\r
- return self.compile_node(root), root\r
- else:\r
- return self.compile_node(root)\r
-\r
- def compile_node(self, node):\r
- """Compiles a node, recursively.\r
-\r
- This is one big switch on the node type.\r
- """\r
- # XXX Optimize certain Wildcard-containing-Wildcard patterns\r
- # that can be merged\r
- if node.type == self.syms.Matcher:\r
- node = node.children[0] # Avoid unneeded recursion\r
-\r
- if node.type == self.syms.Alternatives:\r
- # Skip the odd children since they are just '|' tokens\r
- alts = [self.compile_node(ch) for ch in node.children[::2]]\r
- if len(alts) == 1:\r
- return alts[0]\r
- p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)\r
- return p.optimize()\r
-\r
- if node.type == self.syms.Alternative:\r
- units = [self.compile_node(ch) for ch in node.children]\r
- if len(units) == 1:\r
- return units[0]\r
- p = pytree.WildcardPattern([units], min=1, max=1)\r
- return p.optimize()\r
-\r
- if node.type == self.syms.NegatedUnit:\r
- pattern = self.compile_basic(node.children[1:])\r
- p = pytree.NegatedPattern(pattern)\r
- return p.optimize()\r
-\r
- assert node.type == self.syms.Unit\r
-\r
- name = None\r
- nodes = node.children\r
- if len(nodes) >= 3 and nodes[1].type == token.EQUAL:\r
- name = nodes[0].value\r
- nodes = nodes[2:]\r
- repeat = None\r
- if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:\r
- repeat = nodes[-1]\r
- nodes = nodes[:-1]\r
-\r
- # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]\r
- pattern = self.compile_basic(nodes, repeat)\r
-\r
- if repeat is not None:\r
- assert repeat.type == self.syms.Repeater\r
- children = repeat.children\r
- child = children[0]\r
- if child.type == token.STAR:\r
- min = 0\r
- max = pytree.HUGE\r
- elif child.type == token.PLUS:\r
- min = 1\r
- max = pytree.HUGE\r
- elif child.type == token.LBRACE:\r
- assert children[-1].type == token.RBRACE\r
- assert len(children) in (3, 5)\r
- min = max = self.get_int(children[1])\r
- if len(children) == 5:\r
- max = self.get_int(children[3])\r
- else:\r
- assert False\r
- if min != 1 or max != 1:\r
- pattern = pattern.optimize()\r
- pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)\r
-\r
- if name is not None:\r
- pattern.name = name\r
- return pattern.optimize()\r
-\r
- def compile_basic(self, nodes, repeat=None):\r
- # Compile STRING | NAME [Details] | (...) | [...]\r
- assert len(nodes) >= 1\r
- node = nodes[0]\r
- if node.type == token.STRING:\r
- value = unicode(literals.evalString(node.value))\r
- return pytree.LeafPattern(_type_of_literal(value), value)\r
- elif node.type == token.NAME:\r
- value = node.value\r
- if value.isupper():\r
- if value not in TOKEN_MAP:\r
- raise PatternSyntaxError("Invalid token: %r" % value)\r
- if nodes[1:]:\r
- raise PatternSyntaxError("Can't have details for token")\r
- return pytree.LeafPattern(TOKEN_MAP[value])\r
- else:\r
- if value == "any":\r
- type = None\r
- elif not value.startswith("_"):\r
- type = getattr(self.pysyms, value, None)\r
- if type is None:\r
- raise PatternSyntaxError("Invalid symbol: %r" % value)\r
- if nodes[1:]: # Details present\r
- content = [self.compile_node(nodes[1].children[1])]\r
- else:\r
- content = None\r
- return pytree.NodePattern(type, content)\r
- elif node.value == "(":\r
- return self.compile_node(nodes[1])\r
- elif node.value == "[":\r
- assert repeat is None\r
- subpattern = self.compile_node(nodes[1])\r
- return pytree.WildcardPattern([[subpattern]], min=0, max=1)\r
- assert False, node\r
-\r
- def get_int(self, node):\r
- assert node.type == token.NUMBER\r
- return int(node.value)\r
-\r
-\r
-# Map named tokens to the type value for a LeafPattern\r
-TOKEN_MAP = {"NAME": token.NAME,\r
- "STRING": token.STRING,\r
- "NUMBER": token.NUMBER,\r
- "TOKEN": None}\r
-\r
-\r
-def _type_of_literal(value):\r
- if value[0].isalpha():\r
- return token.NAME\r
- elif value in grammar.opmap:\r
- return grammar.opmap[value]\r
- else:\r
- return None\r
-\r
-\r
-def pattern_convert(grammar, raw_node_info):\r
- """Converts raw node information to a Node or Leaf instance."""\r
- type, value, context, children = raw_node_info\r
- if children or type in grammar.number2symbol:\r
- return pytree.Node(type, children, context=context)\r
- else:\r
- return pytree.Leaf(type, value, context=context)\r
-\r
-\r
-def compile_pattern(pattern):\r
- return PatternCompiler().compile_pattern(pattern)\r