]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/lib2to3/pgen2/conv.py
a179ff00b03aeaa6b03d9c2e4d5d9498e36bd82e
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / lib2to3 / pgen2 / conv.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 """Convert graminit.[ch] spit out by pgen to Python code.
5
6 Pgen is the Python parser generator. It is useful to quickly create a
7 parser from a grammar file in Python's grammar notation. But I don't
8 want my parsers to be written in C (yet), so I'm translating the
9 parsing tables to Python data structures and writing a Python parse
10 engine.
11
12 Note that the token numbers are constants determined by the standard
13 Python tokenizer. The standard token module defines these numbers and
14 their names (the names are not used much). The token numbers are
15 hardcoded into the Python tokenizer and into pgen. A Python
16 implementation of the Python tokenizer is also available, in the
17 standard tokenize module.
18
19 On the other hand, symbol numbers (representing the grammar's
20 non-terminals) are assigned by pgen based on the actual grammar
21 input.
22
23 Note: this module is pretty much obsolete; the pgen module generates
24 equivalent grammar tables directly from the Grammar.txt input file
25 without having to invoke the Python pgen C program.
26
27 """
28
29 # Python imports
30 import re
31
32 # Local imports
33 from pgen2 import grammar, token
34
35
36 class Converter(grammar.Grammar):
37 """Grammar subclass that reads classic pgen output files.
38
39 The run() method reads the tables as produced by the pgen parser
40 generator, typically contained in two C files, graminit.h and
41 graminit.c. The other methods are for internal use only.
42
43 See the base class for more documentation.
44
45 """
46
47 def run(self, graminit_h, graminit_c):
48 """Load the grammar tables from the text files written by pgen."""
49 self.parse_graminit_h(graminit_h)
50 self.parse_graminit_c(graminit_c)
51 self.finish_off()
52
53 def parse_graminit_h(self, filename):
54 """Parse the .h file written by pgen. (Internal)
55
56 This file is a sequence of #define statements defining the
57 nonterminals of the grammar as numbers. We build two tables
58 mapping the numbers to names and back.
59
60 """
61 try:
62 f = open(filename)
63 except IOError, err:
64 print "Can't open %s: %s" % (filename, err)
65 return False
66 self.symbol2number = {}
67 self.number2symbol = {}
68 lineno = 0
69 for line in f:
70 lineno += 1
71 mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
72 if not mo and line.strip():
73 print "%s(%s): can't parse %s" % (filename, lineno,
74 line.strip())
75 else:
76 symbol, number = mo.groups()
77 number = int(number)
78 assert symbol not in self.symbol2number
79 assert number not in self.number2symbol
80 self.symbol2number[symbol] = number
81 self.number2symbol[number] = symbol
82 return True
83
84 def parse_graminit_c(self, filename):
85 """Parse the .c file written by pgen. (Internal)
86
87 The file looks as follows. The first two lines are always this:
88
89 #include "pgenheaders.h"
90 #include "grammar.h"
91
92 After that come four blocks:
93
94 1) one or more state definitions
95 2) a table defining dfas
96 3) a table defining labels
97 4) a struct defining the grammar
98
99 A state definition has the following form:
100 - one or more arc arrays, each of the form:
101 static arc arcs_<n>_<m>[<k>] = {
102 {<i>, <j>},
103 ...
104 };
105 - followed by a state array, of the form:
106 static state states_<s>[<t>] = {
107 {<k>, arcs_<n>_<m>},
108 ...
109 };
110
111 """
112 try:
113 f = open(filename)
114 except IOError, err:
115 print "Can't open %s: %s" % (filename, err)
116 return False
117 # The code below essentially uses f's iterator-ness!
118 lineno = 0
119
120 # Expect the two #include lines
121 lineno, line = lineno+1, f.next()
122 assert line == '#include "pgenheaders.h"\n', (lineno, line)
123 lineno, line = lineno+1, f.next()
124 assert line == '#include "grammar.h"\n', (lineno, line)
125
126 # Parse the state definitions
127 lineno, line = lineno+1, f.next()
128 allarcs = {}
129 states = []
130 while line.startswith("static arc "):
131 while line.startswith("static arc "):
132 mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
133 line)
134 assert mo, (lineno, line)
135 n, m, k = map(int, mo.groups())
136 arcs = []
137 for _ in range(k):
138 lineno, line = lineno+1, f.next()
139 mo = re.match(r"\s+{(\d+), (\d+)},$", line)
140 assert mo, (lineno, line)
141 i, j = map(int, mo.groups())
142 arcs.append((i, j))
143 lineno, line = lineno+1, f.next()
144 assert line == "};\n", (lineno, line)
145 allarcs[(n, m)] = arcs
146 lineno, line = lineno+1, f.next()
147 mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
148 assert mo, (lineno, line)
149 s, t = map(int, mo.groups())
150 assert s == len(states), (lineno, line)
151 state = []
152 for _ in range(t):
153 lineno, line = lineno+1, f.next()
154 mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
155 assert mo, (lineno, line)
156 k, n, m = map(int, mo.groups())
157 arcs = allarcs[n, m]
158 assert k == len(arcs), (lineno, line)
159 state.append(arcs)
160 states.append(state)
161 lineno, line = lineno+1, f.next()
162 assert line == "};\n", (lineno, line)
163 lineno, line = lineno+1, f.next()
164 self.states = states
165
166 # Parse the dfas
167 dfas = {}
168 mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
169 assert mo, (lineno, line)
170 ndfas = int(mo.group(1))
171 for i in range(ndfas):
172 lineno, line = lineno+1, f.next()
173 mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
174 line)
175 assert mo, (lineno, line)
176 symbol = mo.group(2)
177 number, x, y, z = map(int, mo.group(1, 3, 4, 5))
178 assert self.symbol2number[symbol] == number, (lineno, line)
179 assert self.number2symbol[number] == symbol, (lineno, line)
180 assert x == 0, (lineno, line)
181 state = states[z]
182 assert y == len(state), (lineno, line)
183 lineno, line = lineno+1, f.next()
184 mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
185 assert mo, (lineno, line)
186 first = {}
187 rawbitset = eval(mo.group(1))
188 for i, c in enumerate(rawbitset):
189 byte = ord(c)
190 for j in range(8):
191 if byte & (1<<j):
192 first[i*8 + j] = 1
193 dfas[number] = (state, first)
194 lineno, line = lineno+1, f.next()
195 assert line == "};\n", (lineno, line)
196 self.dfas = dfas
197
198 # Parse the labels
199 labels = []
200 lineno, line = lineno+1, f.next()
201 mo = re.match(r"static label labels\[(\d+)\] = {$", line)
202 assert mo, (lineno, line)
203 nlabels = int(mo.group(1))
204 for i in range(nlabels):
205 lineno, line = lineno+1, f.next()
206 mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
207 assert mo, (lineno, line)
208 x, y = mo.groups()
209 x = int(x)
210 if y == "0":
211 y = None
212 else:
213 y = eval(y)
214 labels.append((x, y))
215 lineno, line = lineno+1, f.next()
216 assert line == "};\n", (lineno, line)
217 self.labels = labels
218
219 # Parse the grammar struct
220 lineno, line = lineno+1, f.next()
221 assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
222 lineno, line = lineno+1, f.next()
223 mo = re.match(r"\s+(\d+),$", line)
224 assert mo, (lineno, line)
225 ndfas = int(mo.group(1))
226 assert ndfas == len(self.dfas)
227 lineno, line = lineno+1, f.next()
228 assert line == "\tdfas,\n", (lineno, line)
229 lineno, line = lineno+1, f.next()
230 mo = re.match(r"\s+{(\d+), labels},$", line)
231 assert mo, (lineno, line)
232 nlabels = int(mo.group(1))
233 assert nlabels == len(self.labels), (lineno, line)
234 lineno, line = lineno+1, f.next()
235 mo = re.match(r"\s+(\d+)$", line)
236 assert mo, (lineno, line)
237 start = int(mo.group(1))
238 assert start in self.number2symbol, (lineno, line)
239 self.start = start
240 lineno, line = lineno+1, f.next()
241 assert line == "};\n", (lineno, line)
242 try:
243 lineno, line = lineno+1, f.next()
244 except StopIteration:
245 pass
246 else:
247 assert 0, (lineno, line)
248
249 def finish_off(self):
250 """Create additional useful structures. (Internal)."""
251 self.keywords = {} # map from keyword strings to arc labels
252 self.tokens = {} # map from numeric token values to arc labels
253 for ilabel, (type, value) in enumerate(self.labels):
254 if type == token.NAME and value is not None:
255 self.keywords[value] = ilabel
256 elif value is None:
257 self.tokens[type] = ilabel