]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.\r |
2 | # Licensed to PSF under a Contributor Agreement.\r | |
3 | \r | |
4 | """This module defines the data structures used to represent a grammar.\r | |
5 | \r | |
6 | These are a bit arcane because they are derived from the data\r | |
7 | structures used by Python's 'pgen' parser generator.\r | |
8 | \r | |
9 | There's also a table here mapping operators to their names in the\r | |
10 | token module; the Python tokenize module reports all operators as the\r | |
11 | fallback token code OP, but the parser needs the actual token code.\r | |
12 | \r | |
13 | """\r | |
14 | \r | |
15 | # Python imports\r | |
16 | import pickle\r | |
17 | \r | |
18 | # Local imports\r | |
19 | from . import token, tokenize\r | |
20 | \r | |
21 | \r | |
22 | class Grammar(object):\r | |
23 | """Pgen parsing tables tables conversion class.\r | |
24 | \r | |
25 | Once initialized, this class supplies the grammar tables for the\r | |
26 | parsing engine implemented by parse.py. The parsing engine\r | |
27 | accesses the instance variables directly. The class here does not\r | |
28 | provide initialization of the tables; several subclasses exist to\r | |
29 | do this (see the conv and pgen modules).\r | |
30 | \r | |
31 | The load() method reads the tables from a pickle file, which is\r | |
32 | much faster than the other ways offered by subclasses. The pickle\r | |
33 | file is written by calling dump() (after loading the grammar\r | |
34 | tables using a subclass). The report() method prints a readable\r | |
35 | representation of the tables to stdout, for debugging.\r | |
36 | \r | |
37 | The instance variables are as follows:\r | |
38 | \r | |
39 | symbol2number -- a dict mapping symbol names to numbers. Symbol\r | |
40 | numbers are always 256 or higher, to distinguish\r | |
41 | them from token numbers, which are between 0 and\r | |
42 | 255 (inclusive).\r | |
43 | \r | |
44 | number2symbol -- a dict mapping numbers to symbol names;\r | |
45 | these two are each other's inverse.\r | |
46 | \r | |
47 | states -- a list of DFAs, where each DFA is a list of\r | |
48 | states, each state is is a list of arcs, and each\r | |
49 | arc is a (i, j) pair where i is a label and j is\r | |
50 | a state number. The DFA number is the index into\r | |
51 | this list. (This name is slightly confusing.)\r | |
52 | Final states are represented by a special arc of\r | |
53 | the form (0, j) where j is its own state number.\r | |
54 | \r | |
55 | dfas -- a dict mapping symbol numbers to (DFA, first)\r | |
56 | pairs, where DFA is an item from the states list\r | |
57 | above, and first is a set of tokens that can\r | |
58 | begin this grammar rule (represented by a dict\r | |
59 | whose values are always 1).\r | |
60 | \r | |
61 | labels -- a list of (x, y) pairs where x is either a token\r | |
62 | number or a symbol number, and y is either None\r | |
63 | or a string; the strings are keywords. The label\r | |
64 | number is the index in this list; label numbers\r | |
65 | are used to mark state transitions (arcs) in the\r | |
66 | DFAs.\r | |
67 | \r | |
68 | start -- the number of the grammar's start symbol.\r | |
69 | \r | |
70 | keywords -- a dict mapping keyword strings to arc labels.\r | |
71 | \r | |
72 | tokens -- a dict mapping token numbers to arc labels.\r | |
73 | \r | |
74 | """\r | |
75 | \r | |
76 | def __init__(self):\r | |
77 | self.symbol2number = {}\r | |
78 | self.number2symbol = {}\r | |
79 | self.states = []\r | |
80 | self.dfas = {}\r | |
81 | self.labels = [(0, "EMPTY")]\r | |
82 | self.keywords = {}\r | |
83 | self.tokens = {}\r | |
84 | self.symbol2label = {}\r | |
85 | self.start = 256\r | |
86 | \r | |
87 | def dump(self, filename):\r | |
88 | """Dump the grammar tables to a pickle file."""\r | |
89 | f = open(filename, "wb")\r | |
90 | pickle.dump(self.__dict__, f, 2)\r | |
91 | f.close()\r | |
92 | \r | |
93 | def load(self, filename):\r | |
94 | """Load the grammar tables from a pickle file."""\r | |
95 | f = open(filename, "rb")\r | |
96 | d = pickle.load(f)\r | |
97 | f.close()\r | |
98 | self.__dict__.update(d)\r | |
99 | \r | |
100 | def copy(self):\r | |
101 | """\r | |
102 | Copy the grammar.\r | |
103 | """\r | |
104 | new = self.__class__()\r | |
105 | for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",\r | |
106 | "tokens", "symbol2label"):\r | |
107 | setattr(new, dict_attr, getattr(self, dict_attr).copy())\r | |
108 | new.labels = self.labels[:]\r | |
109 | new.states = self.states[:]\r | |
110 | new.start = self.start\r | |
111 | return new\r | |
112 | \r | |
113 | def report(self):\r | |
114 | """Dump the grammar tables to standard output, for debugging."""\r | |
115 | from pprint import pprint\r | |
116 | print "s2n"\r | |
117 | pprint(self.symbol2number)\r | |
118 | print "n2s"\r | |
119 | pprint(self.number2symbol)\r | |
120 | print "states"\r | |
121 | pprint(self.states)\r | |
122 | print "dfas"\r | |
123 | pprint(self.dfas)\r | |
124 | print "labels"\r | |
125 | pprint(self.labels)\r | |
126 | print "start", self.start\r | |
127 | \r | |
128 | \r | |
129 | # Map from operator to number (since tokenize doesn't do this)\r | |
130 | \r | |
131 | opmap_raw = """\r | |
132 | ( LPAR\r | |
133 | ) RPAR\r | |
134 | [ LSQB\r | |
135 | ] RSQB\r | |
136 | : COLON\r | |
137 | , COMMA\r | |
138 | ; SEMI\r | |
139 | + PLUS\r | |
140 | - MINUS\r | |
141 | * STAR\r | |
142 | / SLASH\r | |
143 | | VBAR\r | |
144 | & AMPER\r | |
145 | < LESS\r | |
146 | > GREATER\r | |
147 | = EQUAL\r | |
148 | . DOT\r | |
149 | % PERCENT\r | |
150 | ` BACKQUOTE\r | |
151 | { LBRACE\r | |
152 | } RBRACE\r | |
153 | @ AT\r | |
154 | == EQEQUAL\r | |
155 | != NOTEQUAL\r | |
156 | <> NOTEQUAL\r | |
157 | <= LESSEQUAL\r | |
158 | >= GREATEREQUAL\r | |
159 | ~ TILDE\r | |
160 | ^ CIRCUMFLEX\r | |
161 | << LEFTSHIFT\r | |
162 | >> RIGHTSHIFT\r | |
163 | ** DOUBLESTAR\r | |
164 | += PLUSEQUAL\r | |
165 | -= MINEQUAL\r | |
166 | *= STAREQUAL\r | |
167 | /= SLASHEQUAL\r | |
168 | %= PERCENTEQUAL\r | |
169 | &= AMPEREQUAL\r | |
170 | |= VBAREQUAL\r | |
171 | ^= CIRCUMFLEXEQUAL\r | |
172 | <<= LEFTSHIFTEQUAL\r | |
173 | >>= RIGHTSHIFTEQUAL\r | |
174 | **= DOUBLESTAREQUAL\r | |
175 | // DOUBLESLASH\r | |
176 | //= DOUBLESLASHEQUAL\r | |
177 | -> RARROW\r | |
178 | """\r | |
179 | \r | |
180 | opmap = {}\r | |
181 | for line in opmap_raw.splitlines():\r | |
182 | if line:\r | |
183 | op, name = line.split()\r | |
184 | opmap[op] = getattr(token, name)\r |