AppPkg/Applications/Python/Python-2.7.2/Lib/lib2to3/pgen2/conv.py

   1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
   2 # Licensed to PSF under a Contributor Agreement.
   3
   4 """Convert graminit.[ch] spit out by pgen to Python code.
   5
   6 Pgen is the Python parser generator.  It is useful to quickly create a
   7 parser from a grammar file in Python's grammar notation.  But I don't
   8 want my parsers to be written in C (yet), so I'm translating the
   9 parsing tables to Python data structures and writing a Python parse
  10 engine.
  11
  12 Note that the token numbers are constants determined by the standard
  13 Python tokenizer.  The standard token module defines these numbers and
  14 their names (the names are not used much).  The token numbers are
  15 hardcoded into the Python tokenizer and into pgen.  A Python
  16 implementation of the Python tokenizer is also available, in the
  17 standard tokenize module.
  18
  19 On the other hand, symbol numbers (representing the grammar's
  20 non-terminals) are assigned by pgen based on the actual grammar
  21 input.
  22
  23 Note: this module is pretty much obsolete; the pgen module generates
  24 equivalent grammar tables directly from the Grammar.txt input file
  25 without having to invoke the Python pgen C program.
  26
  27 """
  28
  29 # Python imports
  30 import re
  31
  32 # Local imports
  33 from pgen2 import grammar, token
  34
  35
  36 class Converter(grammar.Grammar):
  37     """Grammar subclass that reads classic pgen output files.
  38
  39     The run() method reads the tables as produced by the pgen parser
  40     generator, typically contained in two C files, graminit.h and
  41     graminit.c.  The other methods are for internal use only.
  42
  43     See the base class for more documentation.
  44
  45     """
  46
  47     def run(self, graminit_h, graminit_c):
  48         """Load the grammar tables from the text files written by pgen."""
  49         self.parse_graminit_h(graminit_h)
  50         self.parse_graminit_c(graminit_c)
  51         self.finish_off()
  52
  53     def parse_graminit_h(self, filename):
  54         """Parse the .h file written by pgen.  (Internal)
  55
  56         This file is a sequence of #define statements defining the
  57         nonterminals of the grammar as numbers.  We build two tables
  58         mapping the numbers to names and back.
  59
  60         """
  61         try:
  62             f = open(filename)
  63         except IOError, err:
  64             print "Can't open %s: %s" % (filename, err)
  65             return False
  66         self.symbol2number = {}
  67         self.number2symbol = {}
  68         lineno = 0
  69         for line in f:
  70             lineno += 1
  71             mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
  72             if not mo and line.strip():
  73                 print "%s(%s): can't parse %s" % (filename, lineno,
  74                                                   line.strip())
  75             else:
  76                 symbol, number = mo.groups()
  77                 number = int(number)
  78                 assert symbol not in self.symbol2number
  79                 assert number not in self.number2symbol
  80                 self.symbol2number[symbol] = number
  81                 self.number2symbol[number] = symbol
  82         return True
  83
  84     def parse_graminit_c(self, filename):
  85         """Parse the .c file written by pgen.  (Internal)
  86
  87         The file looks as follows.  The first two lines are always this:
  88
  89         #include "pgenheaders.h"
  90         #include "grammar.h"
  91
  92         After that come four blocks:
  93
  94         1) one or more state definitions
  95         2) a table defining dfas
  96         3) a table defining labels
  97         4) a struct defining the grammar
  98
  99         A state definition has the following form:
 100         - one or more arc arrays, each of the form:
 101           static arc arcs_<n>_<m>[<k>] = {
 102                   {<i>, <j>},
 103                   ...
 104           };
 105         - followed by a state array, of the form:
 106           static state states_<s>[<t>] = {
 107                   {<k>, arcs_<n>_<m>},
 108                   ...
 109           };
 110
 111         """
 112         try:
 113             f = open(filename)
 114         except IOError, err:
 115             print "Can't open %s: %s" % (filename, err)
 116             return False
 117         # The code below essentially uses f's iterator-ness!
 118         lineno = 0
 119
 120         # Expect the two #include lines
 121         lineno, line = lineno+1, f.next()
 122         assert line == '#include "pgenheaders.h"\n', (lineno, line)
 123         lineno, line = lineno+1, f.next()
 124         assert line == '#include "grammar.h"\n', (lineno, line)
 125
 126         # Parse the state definitions
 127         lineno, line = lineno+1, f.next()
 128         allarcs = {}
 129         states = []
 130         while line.startswith("static arc "):
 131             while line.startswith("static arc "):
 132                 mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
 133                               line)
 134                 assert mo, (lineno, line)
 135                 n, m, k = map(int, mo.groups())
 136                 arcs = []
 137                 for _ in range(k):
 138                     lineno, line = lineno+1, f.next()
 139                     mo = re.match(r"\s+{(\d+), (\d+)},$", line)
 140                     assert mo, (lineno, line)
 141                     i, j = map(int, mo.groups())
 142                     arcs.append((i, j))
 143                 lineno, line = lineno+1, f.next()
 144                 assert line == "};\n", (lineno, line)
 145                 allarcs[(n, m)] = arcs
 146                 lineno, line = lineno+1, f.next()
 147             mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
 148             assert mo, (lineno, line)
 149             s, t = map(int, mo.groups())
 150             assert s == len(states), (lineno, line)
 151             state = []
 152             for _ in range(t):
 153                 lineno, line = lineno+1, f.next()
 154                 mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
 155                 assert mo, (lineno, line)
 156                 k, n, m = map(int, mo.groups())
 157                 arcs = allarcs[n, m]
 158                 assert k == len(arcs), (lineno, line)
 159                 state.append(arcs)
 160             states.append(state)
 161             lineno, line = lineno+1, f.next()
 162             assert line == "};\n", (lineno, line)
 163             lineno, line = lineno+1, f.next()
 164         self.states = states
 165
 166         # Parse the dfas
 167         dfas = {}
 168         mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
 169         assert mo, (lineno, line)
 170         ndfas = int(mo.group(1))
 171         for i in range(ndfas):
 172             lineno, line = lineno+1, f.next()
 173             mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
 174                           line)
 175             assert mo, (lineno, line)
 176             symbol = mo.group(2)
 177             number, x, y, z = map(int, mo.group(1, 3, 4, 5))
 178             assert self.symbol2number[symbol] == number, (lineno, line)
 179             assert self.number2symbol[number] == symbol, (lineno, line)
 180             assert x == 0, (lineno, line)
 181             state = states[z]
 182             assert y == len(state), (lineno, line)
 183             lineno, line = lineno+1, f.next()
 184             mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
 185             assert mo, (lineno, line)
 186             first = {}
 187             rawbitset = eval(mo.group(1))
 188             for i, c in enumerate(rawbitset):
 189                 byte = ord(c)
 190                 for j in range(8):
 191                     if byte & (1<<j):
 192                         first[i*8 + j] = 1
 193             dfas[number] = (state, first)
 194         lineno, line = lineno+1, f.next()
 195         assert line == "};\n", (lineno, line)
 196         self.dfas = dfas
 197
 198         # Parse the labels
 199         labels = []
 200         lineno, line = lineno+1, f.next()
 201         mo = re.match(r"static label labels\[(\d+)\] = {$", line)
 202         assert mo, (lineno, line)
 203         nlabels = int(mo.group(1))
 204         for i in range(nlabels):
 205             lineno, line = lineno+1, f.next()
 206             mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
 207             assert mo, (lineno, line)
 208             x, y = mo.groups()
 209             x = int(x)
 210             if y == "0":
 211                 y = None
 212             else:
 213                 y = eval(y)
 214             labels.append((x, y))
 215         lineno, line = lineno+1, f.next()
 216         assert line == "};\n", (lineno, line)
 217         self.labels = labels
 218
 219         # Parse the grammar struct
 220         lineno, line = lineno+1, f.next()
 221         assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
 222         lineno, line = lineno+1, f.next()
 223         mo = re.match(r"\s+(\d+),$", line)
 224         assert mo, (lineno, line)
 225         ndfas = int(mo.group(1))
 226         assert ndfas == len(self.dfas)
 227         lineno, line = lineno+1, f.next()
 228         assert line == "\tdfas,\n", (lineno, line)
 229         lineno, line = lineno+1, f.next()
 230         mo = re.match(r"\s+{(\d+), labels},$", line)
 231         assert mo, (lineno, line)
 232         nlabels = int(mo.group(1))
 233         assert nlabels == len(self.labels), (lineno, line)
 234         lineno, line = lineno+1, f.next()
 235         mo = re.match(r"\s+(\d+)$", line)
 236         assert mo, (lineno, line)
 237         start = int(mo.group(1))
 238         assert start in self.number2symbol, (lineno, line)
 239         self.start = start
 240         lineno, line = lineno+1, f.next()
 241         assert line == "};\n", (lineno, line)
 242         try:
 243             lineno, line = lineno+1, f.next()
 244         except StopIteration:
 245             pass
 246         else:
 247             assert 0, (lineno, line)
 248
 249     def finish_off(self):
 250         """Create additional useful structures.  (Internal)."""
 251         self.keywords = {} # map from keyword strings to arc labels
 252         self.tokens = {}   # map from numeric token values to arc labels
 253         for ilabel, (type, value) in enumerate(self.labels):
 254             if type == token.NAME and value is not None:
 255                 self.keywords[value] = ilabel
 256             elif value is None:
 257                 self.tokens[type] = ilabel