[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / lib2to3 / pgen2 / conv.py

# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.\r
# Licensed to PSF under a Contributor Agreement.\r
\r
"""Convert graminit.[ch] spit out by pgen to Python code.\r
\r
Pgen is the Python parser generator.  It is useful to quickly create a\r
parser from a grammar file in Python's grammar notation.  But I don't\r
want my parsers to be written in C (yet), so I'm translating the\r
parsing tables to Python data structures and writing a Python parse\r
engine.\r
\r
Note that the token numbers are constants determined by the standard\r
Python tokenizer.  The standard token module defines these numbers and\r
their names (the names are not used much).  The token numbers are\r
hardcoded into the Python tokenizer and into pgen.  A Python\r
implementation of the Python tokenizer is also available, in the\r
standard tokenize module.\r
\r
On the other hand, symbol numbers (representing the grammar's\r
non-terminals) are assigned by pgen based on the actual grammar\r
input.\r
\r
Note: this module is pretty much obsolete; the pgen module generates\r
equivalent grammar tables directly from the Grammar.txt input file\r
without having to invoke the Python pgen C program.\r
\r
"""\r
\r
# Python imports\r
import re\r
\r
# Local imports\r
from pgen2 import grammar, token\r
\r
\r
class Converter(grammar.Grammar):\r
    """Grammar subclass that reads classic pgen output files.\r
\r
    The run() method reads the tables as produced by the pgen parser\r
    generator, typically contained in two C files, graminit.h and\r
    graminit.c.  The other methods are for internal use only.\r
\r
    See the base class for more documentation.\r
\r
    """\r
\r
    def run(self, graminit_h, graminit_c):\r
        """Load the grammar tables from the text files written by pgen."""\r
        self.parse_graminit_h(graminit_h)\r
        self.parse_graminit_c(graminit_c)\r
        self.finish_off()\r
\r
    def parse_graminit_h(self, filename):\r
        """Parse the .h file written by pgen.  (Internal)\r
\r
        This file is a sequence of #define statements defining the\r
        nonterminals of the grammar as numbers.  We build two tables\r
        mapping the numbers to names and back.\r
\r
        """\r
        try:\r
            f = open(filename)\r
        except IOError, err:\r
            print "Can't open %s: %s" % (filename, err)\r
            return False\r
        self.symbol2number = {}\r
        self.number2symbol = {}\r
        lineno = 0\r
        for line in f:\r
            lineno += 1\r
            mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)\r
            if not mo and line.strip():\r
                print "%s(%s): can't parse %s" % (filename, lineno,\r
                                                  line.strip())\r
            else:\r
                symbol, number = mo.groups()\r
                number = int(number)\r
                assert symbol not in self.symbol2number\r
                assert number not in self.number2symbol\r
                self.symbol2number[symbol] = number\r
                self.number2symbol[number] = symbol\r
        return True\r
\r
    def parse_graminit_c(self, filename):\r
        """Parse the .c file written by pgen.  (Internal)\r
\r
        The file looks as follows.  The first two lines are always this:\r
\r
        #include "pgenheaders.h"\r
        #include "grammar.h"\r
\r
        After that come four blocks:\r
\r
        1) one or more state definitions\r
        2) a table defining dfas\r
        3) a table defining labels\r
        4) a struct defining the grammar\r
\r
        A state definition has the following form:\r
        - one or more arc arrays, each of the form:\r
          static arc arcs_<n>_<m>[<k>] = {\r
                  {<i>, <j>},\r
                  ...\r
          };\r
        - followed by a state array, of the form:\r
          static state states_<s>[<t>] = {\r
                  {<k>, arcs_<n>_<m>},\r
                  ...\r
          };\r
\r
        """\r
        try:\r
            f = open(filename)\r
        except IOError, err:\r
            print "Can't open %s: %s" % (filename, err)\r
            return False\r
        # The code below essentially uses f's iterator-ness!\r
        lineno = 0\r
\r
        # Expect the two #include lines\r
        lineno, line = lineno+1, f.next()\r
        assert line == '#include "pgenheaders.h"\n', (lineno, line)\r
        lineno, line = lineno+1, f.next()\r
        assert line == '#include "grammar.h"\n', (lineno, line)\r
\r
        # Parse the state definitions\r
        lineno, line = lineno+1, f.next()\r
        allarcs = {}\r
        states = []\r
        while line.startswith("static arc "):\r
            while line.startswith("static arc "):\r
                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",\r
                              line)\r
                assert mo, (lineno, line)\r
                n, m, k = map(int, mo.groups())\r
                arcs = []\r
                for _ in range(k):\r
                    lineno, line = lineno+1, f.next()\r
                    mo = re.match(r"\s+{(\d+), (\d+)},$", line)\r
                    assert mo, (lineno, line)\r
                    i, j = map(int, mo.groups())\r
                    arcs.append((i, j))\r
                lineno, line = lineno+1, f.next()\r
                assert line == "};\n", (lineno, line)\r
                allarcs[(n, m)] = arcs\r
                lineno, line = lineno+1, f.next()\r
            mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)\r
            assert mo, (lineno, line)\r
            s, t = map(int, mo.groups())\r
            assert s == len(states), (lineno, line)\r
            state = []\r
            for _ in range(t):\r
                lineno, line = lineno+1, f.next()\r
                mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)\r
                assert mo, (lineno, line)\r
                k, n, m = map(int, mo.groups())\r
                arcs = allarcs[n, m]\r
                assert k == len(arcs), (lineno, line)\r
                state.append(arcs)\r
            states.append(state)\r
            lineno, line = lineno+1, f.next()\r
            assert line == "};\n", (lineno, line)\r
            lineno, line = lineno+1, f.next()\r
        self.states = states\r
\r
        # Parse the dfas\r
        dfas = {}\r
        mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)\r
        assert mo, (lineno, line)\r
        ndfas = int(mo.group(1))\r
        for i in range(ndfas):\r
            lineno, line = lineno+1, f.next()\r
            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',\r
                          line)\r
            assert mo, (lineno, line)\r
            symbol = mo.group(2)\r
            number, x, y, z = map(int, mo.group(1, 3, 4, 5))\r
            assert self.symbol2number[symbol] == number, (lineno, line)\r
            assert self.number2symbol[number] == symbol, (lineno, line)\r
            assert x == 0, (lineno, line)\r
            state = states[z]\r
            assert y == len(state), (lineno, line)\r
            lineno, line = lineno+1, f.next()\r
            mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)\r
            assert mo, (lineno, line)\r
            first = {}\r
            rawbitset = eval(mo.group(1))\r
            for i, c in enumerate(rawbitset):\r
                byte = ord(c)\r
                for j in range(8):\r
                    if byte & (1<<j):\r
                        first[i*8 + j] = 1\r
            dfas[number] = (state, first)\r
        lineno, line = lineno+1, f.next()\r
        assert line == "};\n", (lineno, line)\r
        self.dfas = dfas\r
\r
        # Parse the labels\r
        labels = []\r
        lineno, line = lineno+1, f.next()\r
        mo = re.match(r"static label labels\[(\d+)\] = {$", line)\r
        assert mo, (lineno, line)\r
        nlabels = int(mo.group(1))\r
        for i in range(nlabels):\r
            lineno, line = lineno+1, f.next()\r
            mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)\r
            assert mo, (lineno, line)\r
            x, y = mo.groups()\r
            x = int(x)\r
            if y == "0":\r
                y = None\r
            else:\r
                y = eval(y)\r
            labels.append((x, y))\r
        lineno, line = lineno+1, f.next()\r
        assert line == "};\n", (lineno, line)\r
        self.labels = labels\r
\r
        # Parse the grammar struct\r
        lineno, line = lineno+1, f.next()\r
        assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)\r
        lineno, line = lineno+1, f.next()\r
        mo = re.match(r"\s+(\d+),$", line)\r
        assert mo, (lineno, line)\r
        ndfas = int(mo.group(1))\r
        assert ndfas == len(self.dfas)\r
        lineno, line = lineno+1, f.next()\r
        assert line == "\tdfas,\n", (lineno, line)\r
        lineno, line = lineno+1, f.next()\r
        mo = re.match(r"\s+{(\d+), labels},$", line)\r
        assert mo, (lineno, line)\r
        nlabels = int(mo.group(1))\r
        assert nlabels == len(self.labels), (lineno, line)\r
        lineno, line = lineno+1, f.next()\r
        mo = re.match(r"\s+(\d+)$", line)\r
        assert mo, (lineno, line)\r
        start = int(mo.group(1))\r
        assert start in self.number2symbol, (lineno, line)\r
        self.start = start\r
        lineno, line = lineno+1, f.next()\r
        assert line == "};\n", (lineno, line)\r
        try:\r
            lineno, line = lineno+1, f.next()\r
        except StopIteration:\r
            pass\r
        else:\r
            assert 0, (lineno, line)\r
\r
    def finish_off(self):\r
        """Create additional useful structures.  (Internal)."""\r
        self.keywords = {} # map from keyword strings to arc labels\r
        self.tokens = {}   # map from numeric token values to arc labels\r
        for ilabel, (type, value) in enumerate(self.labels):\r
            if type == token.NAME and value is not None:\r
                self.keywords[value] = ilabel\r
            elif value is None:\r
                self.tokens[type] = ilabel\r
Commit	Line	Data
4710c53d	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.\r
	2	# Licensed to PSF under a Contributor Agreement.\r
	3	\r
	4	"""Convert graminit.[ch] spit out by pgen to Python code.\r
	5	\r
	6	Pgen is the Python parser generator. It is useful to quickly create a\r
	7	parser from a grammar file in Python's grammar notation. But I don't\r
	8	want my parsers to be written in C (yet), so I'm translating the\r
	9	parsing tables to Python data structures and writing a Python parse\r
	10	engine.\r
	11	\r
	12	Note that the token numbers are constants determined by the standard\r
	13	Python tokenizer. The standard token module defines these numbers and\r
	14	their names (the names are not used much). The token numbers are\r
	15	hardcoded into the Python tokenizer and into pgen. A Python\r
	16	implementation of the Python tokenizer is also available, in the\r
	17	standard tokenize module.\r
	18	\r
	19	On the other hand, symbol numbers (representing the grammar's\r
	20	non-terminals) are assigned by pgen based on the actual grammar\r
	21	input.\r
	22	\r
	23	Note: this module is pretty much obsolete; the pgen module generates\r
	24	equivalent grammar tables directly from the Grammar.txt input file\r
	25	without having to invoke the Python pgen C program.\r
	26	\r
	27	"""\r
	28	\r
	29	# Python imports\r
	30	import re\r
	31	\r
	32	# Local imports\r
	33	from pgen2 import grammar, token\r
	34	\r
	35	\r
	36	class Converter(grammar.Grammar):\r
	37	"""Grammar subclass that reads classic pgen output files.\r
	38	\r
	39	The run() method reads the tables as produced by the pgen parser\r
	40	generator, typically contained in two C files, graminit.h and\r
	41	graminit.c. The other methods are for internal use only.\r
	42	\r
	43	See the base class for more documentation.\r
	44	\r
	45	"""\r
	46	\r
	47	def run(self, graminit_h, graminit_c):\r
	48	"""Load the grammar tables from the text files written by pgen."""\r
	49	self.parse_graminit_h(graminit_h)\r
	50	self.parse_graminit_c(graminit_c)\r
	51	self.finish_off()\r
	52	\r
	53	def parse_graminit_h(self, filename):\r
	54	"""Parse the .h file written by pgen. (Internal)\r
	55	\r
	56	This file is a sequence of #define statements defining the\r
	57	nonterminals of the grammar as numbers. We build two tables\r
	58	mapping the numbers to names and back.\r
	59	\r
	60	"""\r
	61	try:\r
	62	f = open(filename)\r
	63	except IOError, err:\r
	64	print "Can't open %s: %s" % (filename, err)\r
65	return False\r
66	self.symbol2number = {}\r
67	self.number2symbol = {}\r
68	lineno = 0\r
69	for line in f:\r
70	lineno += 1\r
71	mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)\r
72	if not mo and line.strip():\r
73	print "%s(%s): can't parse %s" % (filename, lineno,\r
74	line.strip())\r
75	else:\r
76	symbol, number = mo.groups()\r
77	number = int(number)\r
78	assert symbol not in self.symbol2number\r
79	assert number not in self.number2symbol\r
80	self.symbol2number[symbol] = number\r
81	self.number2symbol[number] = symbol\r
82	return True\r
83	\r
84	def parse_graminit_c(self, filename):\r
85	"""Parse the .c file written by pgen. (Internal)\r
86	\r
87	The file looks as follows. The first two lines are always this:\r
88	\r
89	#include "pgenheaders.h"\r
90	#include "grammar.h"\r
91	\r
92	After that come four blocks:\r
93	\r
94	1) one or more state definitions\r
95	2) a table defining dfas\r
96	3) a table defining labels\r
97	4) a struct defining the grammar\r
98	\r
99	A state definition has the following form:\r
100	- one or more arc arrays, each of the form:\r
101	static arc arcs_<n>_<m>[<k>] = {\r
102	{<i>, <j>},\r
103	...\r
104	};\r
105	- followed by a state array, of the form:\r
106	static state states_<s>[<t>] = {\r
107	{<k>, arcs_<n>_<m>},\r
108	...\r
109	};\r
110	\r
111	"""\r
112	try:\r
113	f = open(filename)\r
114	except IOError, err:\r
115	print "Can't open %s: %s" % (filename, err)\r
116	return False\r
117	# The code below essentially uses f's iterator-ness!\r
118	lineno = 0\r
119	\r
120	# Expect the two #include lines\r
121	lineno, line = lineno+1, f.next()\r
122	assert line == '#include "pgenheaders.h"\n', (lineno, line)\r
123	lineno, line = lineno+1, f.next()\r
124	assert line == '#include "grammar.h"\n', (lineno, line)\r
125	\r
126	# Parse the state definitions\r
127	lineno, line = lineno+1, f.next()\r
128	allarcs = {}\r
129	states = []\r
130	while line.startswith("static arc "):\r
131	while line.startswith("static arc "):\r
132	mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",\r
133	line)\r
134	assert mo, (lineno, line)\r
135	n, m, k = map(int, mo.groups())\r
136	arcs = []\r
137	for _ in range(k):\r
138	lineno, line = lineno+1, f.next()\r
139	mo = re.match(r"\s+{(\d+), (\d+)},$", line)\r
140	assert mo, (lineno, line)\r
141	i, j = map(int, mo.groups())\r
142	arcs.append((i, j))\r
143	lineno, line = lineno+1, f.next()\r
144	assert line == "};\n", (lineno, line)\r
145	allarcs[(n, m)] = arcs\r
146	lineno, line = lineno+1, f.next()\r
147	mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)\r
148	assert mo, (lineno, line)\r
149	s, t = map(int, mo.groups())\r
150	assert s == len(states), (lineno, line)\r
151	state = []\r
152	for _ in range(t):\r
153	lineno, line = lineno+1, f.next()\r
154	mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)\r
155	assert mo, (lineno, line)\r
156	k, n, m = map(int, mo.groups())\r
157	arcs = allarcs[n, m]\r
158	assert k == len(arcs), (lineno, line)\r
159	state.append(arcs)\r
160	states.append(state)\r
161	lineno, line = lineno+1, f.next()\r
162	assert line == "};\n", (lineno, line)\r
163	lineno, line = lineno+1, f.next()\r
164	self.states = states\r
165	\r
166	# Parse the dfas\r
167	dfas = {}\r
168	mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)\r
169	assert mo, (lineno, line)\r
170	ndfas = int(mo.group(1))\r
171	for i in range(ndfas):\r
172	lineno, line = lineno+1, f.next()\r
173	mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',\r
174	line)\r
175	assert mo, (lineno, line)\r
176	symbol = mo.group(2)\r
177	number, x, y, z = map(int, mo.group(1, 3, 4, 5))\r
178	assert self.symbol2number[symbol] == number, (lineno, line)\r
179	assert self.number2symbol[number] == symbol, (lineno, line)\r
180	assert x == 0, (lineno, line)\r
181	state = states[z]\r
182	assert y == len(state), (lineno, line)\r
183	lineno, line = lineno+1, f.next()\r
184	mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)\r
185	assert mo, (lineno, line)\r
186	first = {}\r
187	rawbitset = eval(mo.group(1))\r
188	for i, c in enumerate(rawbitset):\r
189	byte = ord(c)\r
190	for j in range(8):\r
191	if byte & (1<<j):\r
192	first[i*8 + j] = 1\r
193	dfas[number] = (state, first)\r
194	lineno, line = lineno+1, f.next()\r
195	assert line == "};\n", (lineno, line)\r
196	self.dfas = dfas\r
197	\r
198	# Parse the labels\r
199	labels = []\r
200	lineno, line = lineno+1, f.next()\r
201	mo = re.match(r"static label labels\[(\d+)\] = {$", line)\r
202	assert mo, (lineno, line)\r
203	nlabels = int(mo.group(1))\r
204	for i in range(nlabels):\r
205	lineno, line = lineno+1, f.next()\r
206	mo = re.match(r'\s+{(\d+), (0\|"\w+")},$', line)\r
207	assert mo, (lineno, line)\r
208	x, y = mo.groups()\r
209	x = int(x)\r
210	if y == "0":\r
211	y = None\r
212	else:\r
213	y = eval(y)\r
214	labels.append((x, y))\r
215	lineno, line = lineno+1, f.next()\r
216	assert line == "};\n", (lineno, line)\r
217	self.labels = labels\r
218	\r
219	# Parse the grammar struct\r
220	lineno, line = lineno+1, f.next()\r
221	assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)\r
222	lineno, line = lineno+1, f.next()\r
223	mo = re.match(r"\s+(\d+),$", line)\r
224	assert mo, (lineno, line)\r
225	ndfas = int(mo.group(1))\r
226	assert ndfas == len(self.dfas)\r
227	lineno, line = lineno+1, f.next()\r
228	assert line == "\tdfas,\n", (lineno, line)\r
229	lineno, line = lineno+1, f.next()\r
230	mo = re.match(r"\s+{(\d+), labels},$", line)\r
231	assert mo, (lineno, line)\r
232	nlabels = int(mo.group(1))\r
233	assert nlabels == len(self.labels), (lineno, line)\r
234	lineno, line = lineno+1, f.next()\r
235	mo = re.match(r"\s+(\d+)$", line)\r
236	assert mo, (lineno, line)\r
237	start = int(mo.group(1))\r
238	assert start in self.number2symbol, (lineno, line)\r
239	self.start = start\r
240	lineno, line = lineno+1, f.next()\r
241	assert line == "};\n", (lineno, line)\r
242	try:\r
243	lineno, line = lineno+1, f.next()\r
244	except StopIteration:\r
245	pass\r
246	else:\r
247	assert 0, (lineno, line)\r
248	\r
249	def finish_off(self):\r
250	"""Create additional useful structures. (Internal)."""\r
251	self.keywords = {} # map from keyword strings to arc labels\r
252	self.tokens = {} # map from numeric token values to arc labels\r
253	for ilabel, (type, value) in enumerate(self.labels):\r
254	if type == token.NAME and value is not None:\r
255	self.keywords[value] = ilabel\r
256	elif value is None:\r
257	self.tokens[type] = ilabel\r