+++ /dev/null
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.\r
-# Licensed to PSF under a Contributor Agreement.\r
-\r
-"""Convert graminit.[ch] spit out by pgen to Python code.\r
-\r
-Pgen is the Python parser generator. It is useful to quickly create a\r
-parser from a grammar file in Python's grammar notation. But I don't\r
-want my parsers to be written in C (yet), so I'm translating the\r
-parsing tables to Python data structures and writing a Python parse\r
-engine.\r
-\r
-Note that the token numbers are constants determined by the standard\r
-Python tokenizer. The standard token module defines these numbers and\r
-their names (the names are not used much). The token numbers are\r
-hardcoded into the Python tokenizer and into pgen. A Python\r
-implementation of the Python tokenizer is also available, in the\r
-standard tokenize module.\r
-\r
-On the other hand, symbol numbers (representing the grammar's\r
-non-terminals) are assigned by pgen based on the actual grammar\r
-input.\r
-\r
-Note: this module is pretty much obsolete; the pgen module generates\r
-equivalent grammar tables directly from the Grammar.txt input file\r
-without having to invoke the Python pgen C program.\r
-\r
-"""\r
-\r
-# Python imports\r
-import re\r
-\r
-# Local imports\r
-from pgen2 import grammar, token\r
-\r
-\r
-class Converter(grammar.Grammar):\r
- """Grammar subclass that reads classic pgen output files.\r
-\r
- The run() method reads the tables as produced by the pgen parser\r
- generator, typically contained in two C files, graminit.h and\r
- graminit.c. The other methods are for internal use only.\r
-\r
- See the base class for more documentation.\r
-\r
- """\r
-\r
- def run(self, graminit_h, graminit_c):\r
- """Load the grammar tables from the text files written by pgen."""\r
- self.parse_graminit_h(graminit_h)\r
- self.parse_graminit_c(graminit_c)\r
- self.finish_off()\r
-\r
- def parse_graminit_h(self, filename):\r
- """Parse the .h file written by pgen. (Internal)\r
-\r
- This file is a sequence of #define statements defining the\r
- nonterminals of the grammar as numbers. We build two tables\r
- mapping the numbers to names and back.\r
-\r
- """\r
- try:\r
- f = open(filename)\r
- except IOError, err:\r
- print "Can't open %s: %s" % (filename, err)\r
- return False\r
- self.symbol2number = {}\r
- self.number2symbol = {}\r
- lineno = 0\r
- for line in f:\r
- lineno += 1\r
- mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)\r
- if not mo and line.strip():\r
- print "%s(%s): can't parse %s" % (filename, lineno,\r
- line.strip())\r
- else:\r
- symbol, number = mo.groups()\r
- number = int(number)\r
- assert symbol not in self.symbol2number\r
- assert number not in self.number2symbol\r
- self.symbol2number[symbol] = number\r
- self.number2symbol[number] = symbol\r
- return True\r
-\r
- def parse_graminit_c(self, filename):\r
- """Parse the .c file written by pgen. (Internal)\r
-\r
- The file looks as follows. The first two lines are always this:\r
-\r
- #include "pgenheaders.h"\r
- #include "grammar.h"\r
-\r
- After that come four blocks:\r
-\r
- 1) one or more state definitions\r
- 2) a table defining dfas\r
- 3) a table defining labels\r
- 4) a struct defining the grammar\r
-\r
- A state definition has the following form:\r
- - one or more arc arrays, each of the form:\r
- static arc arcs_<n>_<m>[<k>] = {\r
- {<i>, <j>},\r
- ...\r
- };\r
- - followed by a state array, of the form:\r
- static state states_<s>[<t>] = {\r
- {<k>, arcs_<n>_<m>},\r
- ...\r
- };\r
-\r
- """\r
- try:\r
- f = open(filename)\r
- except IOError, err:\r
- print "Can't open %s: %s" % (filename, err)\r
- return False\r
- # The code below essentially uses f's iterator-ness!\r
- lineno = 0\r
-\r
- # Expect the two #include lines\r
- lineno, line = lineno+1, f.next()\r
- assert line == '#include "pgenheaders.h"\n', (lineno, line)\r
- lineno, line = lineno+1, f.next()\r
- assert line == '#include "grammar.h"\n', (lineno, line)\r
-\r
- # Parse the state definitions\r
- lineno, line = lineno+1, f.next()\r
- allarcs = {}\r
- states = []\r
- while line.startswith("static arc "):\r
- while line.startswith("static arc "):\r
- mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",\r
- line)\r
- assert mo, (lineno, line)\r
- n, m, k = map(int, mo.groups())\r
- arcs = []\r
- for _ in range(k):\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"\s+{(\d+), (\d+)},$", line)\r
- assert mo, (lineno, line)\r
- i, j = map(int, mo.groups())\r
- arcs.append((i, j))\r
- lineno, line = lineno+1, f.next()\r
- assert line == "};\n", (lineno, line)\r
- allarcs[(n, m)] = arcs\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)\r
- assert mo, (lineno, line)\r
- s, t = map(int, mo.groups())\r
- assert s == len(states), (lineno, line)\r
- state = []\r
- for _ in range(t):\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)\r
- assert mo, (lineno, line)\r
- k, n, m = map(int, mo.groups())\r
- arcs = allarcs[n, m]\r
- assert k == len(arcs), (lineno, line)\r
- state.append(arcs)\r
- states.append(state)\r
- lineno, line = lineno+1, f.next()\r
- assert line == "};\n", (lineno, line)\r
- lineno, line = lineno+1, f.next()\r
- self.states = states\r
-\r
- # Parse the dfas\r
- dfas = {}\r
- mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)\r
- assert mo, (lineno, line)\r
- ndfas = int(mo.group(1))\r
- for i in range(ndfas):\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',\r
- line)\r
- assert mo, (lineno, line)\r
- symbol = mo.group(2)\r
- number, x, y, z = map(int, mo.group(1, 3, 4, 5))\r
- assert self.symbol2number[symbol] == number, (lineno, line)\r
- assert self.number2symbol[number] == symbol, (lineno, line)\r
- assert x == 0, (lineno, line)\r
- state = states[z]\r
- assert y == len(state), (lineno, line)\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)\r
- assert mo, (lineno, line)\r
- first = {}\r
- rawbitset = eval(mo.group(1))\r
- for i, c in enumerate(rawbitset):\r
- byte = ord(c)\r
- for j in range(8):\r
- if byte & (1<<j):\r
- first[i*8 + j] = 1\r
- dfas[number] = (state, first)\r
- lineno, line = lineno+1, f.next()\r
- assert line == "};\n", (lineno, line)\r
- self.dfas = dfas\r
-\r
- # Parse the labels\r
- labels = []\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"static label labels\[(\d+)\] = {$", line)\r
- assert mo, (lineno, line)\r
- nlabels = int(mo.group(1))\r
- for i in range(nlabels):\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)\r
- assert mo, (lineno, line)\r
- x, y = mo.groups()\r
- x = int(x)\r
- if y == "0":\r
- y = None\r
- else:\r
- y = eval(y)\r
- labels.append((x, y))\r
- lineno, line = lineno+1, f.next()\r
- assert line == "};\n", (lineno, line)\r
- self.labels = labels\r
-\r
- # Parse the grammar struct\r
- lineno, line = lineno+1, f.next()\r
- assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"\s+(\d+),$", line)\r
- assert mo, (lineno, line)\r
- ndfas = int(mo.group(1))\r
- assert ndfas == len(self.dfas)\r
- lineno, line = lineno+1, f.next()\r
- assert line == "\tdfas,\n", (lineno, line)\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"\s+{(\d+), labels},$", line)\r
- assert mo, (lineno, line)\r
- nlabels = int(mo.group(1))\r
- assert nlabels == len(self.labels), (lineno, line)\r
- lineno, line = lineno+1, f.next()\r
- mo = re.match(r"\s+(\d+)$", line)\r
- assert mo, (lineno, line)\r
- start = int(mo.group(1))\r
- assert start in self.number2symbol, (lineno, line)\r
- self.start = start\r
- lineno, line = lineno+1, f.next()\r
- assert line == "};\n", (lineno, line)\r
- try:\r
- lineno, line = lineno+1, f.next()\r
- except StopIteration:\r
- pass\r
- else:\r
- assert 0, (lineno, line)\r
-\r
- def finish_off(self):\r
- """Create additional useful structures. (Internal)."""\r
- self.keywords = {} # map from keyword strings to arc labels\r
- self.tokens = {} # map from numeric token values to arc labels\r
- for ilabel, (type, value) in enumerate(self.labels):\r
- if type == token.NAME and value is not None:\r
- self.keywords[value] = ilabel\r
- elif value is None:\r
- self.tokens[type] = ilabel\r