]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/lib2to3/pgen2/pgen.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 from . import grammar
, token
, tokenize
7 class PgenGrammar(grammar
.Grammar
):
10 class ParserGenerator(object):
12 def __init__(self
, filename
, stream
=None):
15 stream
= open(filename
)
16 close_stream
= stream
.close
17 self
.filename
= filename
19 self
.generator
= tokenize
.generate_tokens(stream
.readline
)
20 self
.gettoken() # Initialize lookahead
21 self
.dfas
, self
.startsymbol
= self
.parse()
22 if close_stream
is not None:
24 self
.first
= {} # map from symbol name to set of tokens
27 def make_grammar(self
):
29 names
= self
.dfas
.keys()
31 names
.remove(self
.startsymbol
)
32 names
.insert(0, self
.startsymbol
)
34 i
= 256 + len(c
.symbol2number
)
35 c
.symbol2number
[name
] = i
36 c
.number2symbol
[i
] = name
42 for label
, next
in state
.arcs
.iteritems():
43 arcs
.append((self
.make_label(c
, label
), dfa
.index(next
)))
45 arcs
.append((0, dfa
.index(state
)))
47 c
.states
.append(states
)
48 c
.dfas
[c
.symbol2number
[name
]] = (states
, self
.make_first(c
, name
))
49 c
.start
= c
.symbol2number
[self
.startsymbol
]
52 def make_first(self
, c
, name
):
53 rawfirst
= self
.first
[name
]
55 for label
in rawfirst
:
56 ilabel
= self
.make_label(c
, label
)
57 ##assert ilabel not in first # XXX failed on <> ... !=
61 def make_label(self
, c
, label
):
62 # XXX Maybe this should be a method on a subclass of converter?
63 ilabel
= len(c
.labels
)
64 if label
[0].isalpha():
65 # Either a symbol name or a named token
66 if label
in c
.symbol2number
:
67 # A symbol name (a non-terminal)
68 if label
in c
.symbol2label
:
69 return c
.symbol2label
[label
]
71 c
.labels
.append((c
.symbol2number
[label
], None))
72 c
.symbol2label
[label
] = ilabel
75 # A named token (NAME, NUMBER, STRING)
76 itoken
= getattr(token
, label
, None)
77 assert isinstance(itoken
, int), label
78 assert itoken
in token
.tok_name
, label
79 if itoken
in c
.tokens
:
80 return c
.tokens
[itoken
]
82 c
.labels
.append((itoken
, None))
83 c
.tokens
[itoken
] = ilabel
86 # Either a keyword or an operator
87 assert label
[0] in ('"', "'"), label
89 if value
[0].isalpha():
91 if value
in c
.keywords
:
92 return c
.keywords
[value
]
94 c
.labels
.append((token
.NAME
, value
))
95 c
.keywords
[value
] = ilabel
98 # An operator (any non-numeric token)
99 itoken
= grammar
.opmap
[value
] # Fails if unknown token
100 if itoken
in c
.tokens
:
101 return c
.tokens
[itoken
]
103 c
.labels
.append((itoken
, None))
104 c
.tokens
[itoken
] = ilabel
107 def addfirstsets(self
):
108 names
= self
.dfas
.keys()
111 if name
not in self
.first
:
113 #print name, self.first[name].keys()
115 def calcfirst(self
, name
):
116 dfa
= self
.dfas
[name
]
117 self
.first
[name
] = None # dummy to detect left recursion
121 for label
, next
in state
.arcs
.iteritems():
122 if label
in self
.dfas
:
123 if label
in self
.first
:
124 fset
= self
.first
[label
]
126 raise ValueError("recursion for rule %r" % name
)
128 self
.calcfirst(label
)
129 fset
= self
.first
[label
]
130 totalset
.update(fset
)
131 overlapcheck
[label
] = fset
134 overlapcheck
[label
] = {label
: 1}
136 for label
, itsfirst
in overlapcheck
.iteritems():
137 for symbol
in itsfirst
:
138 if symbol
in inverse
:
139 raise ValueError("rule %s is ambiguous; %s is in the"
140 " first sets of %s as well as %s" %
141 (name
, symbol
, label
, inverse
[symbol
]))
142 inverse
[symbol
] = label
143 self
.first
[name
] = totalset
148 # MSTART: (NEWLINE | RULE)* ENDMARKER
149 while self
.type != token
.ENDMARKER
:
150 while self
.type == token
.NEWLINE
:
152 # RULE: NAME ':' RHS NEWLINE
153 name
= self
.expect(token
.NAME
)
154 self
.expect(token
.OP
, ":")
155 a
, z
= self
.parse_rhs()
156 self
.expect(token
.NEWLINE
)
157 #self.dump_nfa(name, a, z)
158 dfa
= self
.make_dfa(a
, z
)
159 #self.dump_dfa(name, dfa)
161 self
.simplify_dfa(dfa
)
164 #print name, oldlen, newlen
165 if startsymbol
is None:
167 return dfas
, startsymbol
169 def make_dfa(self
, start
, finish
):
170 # To turn an NFA into a DFA, we define the states of the DFA
171 # to correspond to *sets* of states of the NFA. Then do some
172 # state reduction. Let's represent sets as dicts with 1 for
174 assert isinstance(start
, NFAState
)
175 assert isinstance(finish
, NFAState
)
178 addclosure(state
, base
)
180 def addclosure(state
, base
):
181 assert isinstance(state
, NFAState
)
185 for label
, next
in state
.arcs
:
187 addclosure(next
, base
)
188 states
= [DFAState(closure(start
), finish
)]
189 for state
in states
: # NB states grows while we're iterating
191 for nfastate
in state
.nfaset
:
192 for label
, next
in nfastate
.arcs
:
193 if label
is not None:
194 addclosure(next
, arcs
.setdefault(label
, {}))
195 for label
, nfaset
in arcs
.iteritems():
197 if st
.nfaset
== nfaset
:
200 st
= DFAState(nfaset
, finish
)
202 state
.addarc(st
, label
)
203 return states
# List of DFAState instances; first one is start
205 def dump_nfa(self
, name
, start
, finish
):
206 print "Dump of NFA for", name
208 for i
, state
in enumerate(todo
):
209 print " State", i
, state
is finish
and "(final)" or ""
210 for label
, next
in state
.arcs
:
219 print " %s -> %d" % (label
, j
)
221 def dump_dfa(self
, name
, dfa
):
222 print "Dump of DFA for", name
223 for i
, state
in enumerate(dfa
):
224 print " State", i
, state
.isfinal
and "(final)" or ""
225 for label
, next
in state
.arcs
.iteritems():
226 print " %s -> %d" % (label
, dfa
.index(next
))
228 def simplify_dfa(self
, dfa
):
229 # This is not theoretically optimal, but works well enough.
230 # Algorithm: repeatedly look for two states that have the same
231 # set of arcs (same labels pointing to the same nodes) and
232 # unify them, until things stop changing.
234 # dfa is a list of DFAState instances
238 for i
, state_i
in enumerate(dfa
):
239 for j
in range(i
+1, len(dfa
)):
241 if state_i
== state_j
:
242 #print " unify", i, j
245 state
.unifystate(state_j
, state_i
)
250 # RHS: ALT ('|' ALT)*
251 a
, z
= self
.parse_alt()
252 if self
.value
!= "|":
259 while self
.value
== "|":
261 a
, z
= self
.parse_alt()
268 a
, b
= self
.parse_item()
269 while (self
.value
in ("(", "[") or
270 self
.type in (token
.NAME
, token
.STRING
)):
271 c
, d
= self
.parse_item()
276 def parse_item(self
):
277 # ITEM: '[' RHS ']' | ATOM ['+' | '*']
278 if self
.value
== "[":
280 a
, z
= self
.parse_rhs()
281 self
.expect(token
.OP
, "]")
285 a
, z
= self
.parse_atom()
287 if value
not in ("+", "*"):
296 def parse_atom(self
):
297 # ATOM: '(' RHS ')' | NAME | STRING
298 if self
.value
== "(":
300 a
, z
= self
.parse_rhs()
301 self
.expect(token
.OP
, ")")
303 elif self
.type in (token
.NAME
, token
.STRING
):
306 a
.addarc(z
, self
.value
)
310 self
.raise_error("expected (...) or NAME or STRING, got %s/%s",
311 self
.type, self
.value
)
313 def expect(self
, type, value
=None):
314 if self
.type != type or (value
is not None and self
.value
!= value
):
315 self
.raise_error("expected %s/%s, got %s/%s",
316 type, value
, self
.type, self
.value
)
322 tup
= self
.generator
.next()
323 while tup
[0] in (tokenize
.COMMENT
, tokenize
.NL
):
324 tup
= self
.generator
.next()
325 self
.type, self
.value
, self
.begin
, self
.end
, self
.line
= tup
326 #print token.tok_name[self.type], repr(self.value)
328 def raise_error(self
, msg
, *args
):
333 msg
= " ".join([msg
] + map(str, args
))
334 raise SyntaxError(msg
, (self
.filename
, self
.end
[0],
335 self
.end
[1], self
.line
))
337 class NFAState(object):
340 self
.arcs
= [] # list of (label, NFAState) pairs
342 def addarc(self
, next
, label
=None):
343 assert label
is None or isinstance(label
, str)
344 assert isinstance(next
, NFAState
)
345 self
.arcs
.append((label
, next
))
347 class DFAState(object):
349 def __init__(self
, nfaset
, final
):
350 assert isinstance(nfaset
, dict)
351 assert isinstance(iter(nfaset
).next(), NFAState
)
352 assert isinstance(final
, NFAState
)
354 self
.isfinal
= final
in nfaset
355 self
.arcs
= {} # map from label to DFAState
357 def addarc(self
, next
, label
):
358 assert isinstance(label
, str)
359 assert label
not in self
.arcs
360 assert isinstance(next
, DFAState
)
361 self
.arcs
[label
] = next
363 def unifystate(self
, old
, new
):
364 for label
, next
in self
.arcs
.iteritems():
366 self
.arcs
[label
] = new
368 def __eq__(self
, other
):
369 # Equality test -- ignore the nfaset instance variable
370 assert isinstance(other
, DFAState
)
371 if self
.isfinal
!= other
.isfinal
:
373 # Can't just return self.arcs == other.arcs, because that
374 # would invoke this method recursively, with cycles...
375 if len(self
.arcs
) != len(other
.arcs
):
377 for label
, next
in self
.arcs
.iteritems():
378 if next
is not other
.arcs
.get(label
):
382 __hash__
= None # For Py3 compatibility.
384 def generate_grammar(filename
="Grammar.txt"):
385 p
= ParserGenerator(filename
)
386 return p
.make_grammar()