]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/Lib/tokenize.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / tokenize.py
1 """Tokenization help for Python programs.
2
3 generate_tokens(readline) is a generator that breaks a stream of
4 text into Python tokens. It accepts a readline-like method which is called
5 repeatedly to get the next line of input (or "" for EOF). It generates
6 5-tuples with these members:
7
8 the token type (see token.py)
9 the token (a string)
10 the starting (row, column) indices of the token (a 2-tuple of ints)
11 the ending (row, column) indices of the token (a 2-tuple of ints)
12 the original line (string)
13
14 It is designed to match the working of the Python tokenizer exactly, except
15 that it produces COMMENT tokens for comments and gives type OP for all
16 operators
17
18 Older entry points
19 tokenize_loop(readline, tokeneater)
20 tokenize(readline, tokeneater=printtoken)
21 are the same, except instead of generating tokens, tokeneater is a callback
22 function to which the 5 fields described above are passed as 5 arguments,
23 each time a new token is found."""
24
25 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
26 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
27 'Skip Montanaro, Raymond Hettinger')
28
29 from itertools import chain
30 import string, re
31 from token import *
32
33 import token
34 __all__ = [x for x in dir(token) if not x.startswith("_")]
35 __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
36 del x
37 del token
38
39 COMMENT = N_TOKENS
40 tok_name[COMMENT] = 'COMMENT'
41 NL = N_TOKENS + 1
42 tok_name[NL] = 'NL'
43 N_TOKENS += 2
44
45 def group(*choices): return '(' + '|'.join(choices) + ')'
46 def any(*choices): return group(*choices) + '*'
47 def maybe(*choices): return group(*choices) + '?'
48
49 Whitespace = r'[ \f\t]*'
50 Comment = r'#[^\r\n]*'
51 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
52 Name = r'[a-zA-Z_]\w*'
53
54 Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
55 Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
56 Binnumber = r'0[bB][01]+[lL]?'
57 Decnumber = r'[1-9]\d*[lL]?'
58 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
59 Exponent = r'[eE][-+]?\d+'
60 Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
61 Expfloat = r'\d+' + Exponent
62 Floatnumber = group(Pointfloat, Expfloat)
63 Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
64 Number = group(Imagnumber, Floatnumber, Intnumber)
65
66 # Tail end of ' string.
67 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
68 # Tail end of " string.
69 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
70 # Tail end of ''' string.
71 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
72 # Tail end of """ string.
73 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
74 Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
75 # Single-line ' or " string.
76 String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
77 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
78
79 # Because of leftmost-then-longest match semantics, be sure to put the
80 # longest operators first (e.g., if = came before ==, == would get
81 # recognized as two instances of =).
82 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
83 r"//=?",
84 r"[+\-*/%&|^=<>]=?",
85 r"~")
86
87 Bracket = '[][(){}]'
88 Special = group(r'\r?\n', r'[:;.,`@]')
89 Funny = group(Operator, Bracket, Special)
90
91 PlainToken = group(Number, Funny, String, Name)
92 Token = Ignore + PlainToken
93
94 # First (or only) line of ' or " string.
95 ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
96 group("'", r'\\\r?\n'),
97 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
98 group('"', r'\\\r?\n'))
99 PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
100 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
101
102 tokenprog, pseudoprog, single3prog, double3prog = map(
103 re.compile, (Token, PseudoToken, Single3, Double3))
104 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
105 "'''": single3prog, '"""': double3prog,
106 "r'''": single3prog, 'r"""': double3prog,
107 "u'''": single3prog, 'u"""': double3prog,
108 "ur'''": single3prog, 'ur"""': double3prog,
109 "R'''": single3prog, 'R"""': double3prog,
110 "U'''": single3prog, 'U"""': double3prog,
111 "uR'''": single3prog, 'uR"""': double3prog,
112 "Ur'''": single3prog, 'Ur"""': double3prog,
113 "UR'''": single3prog, 'UR"""': double3prog,
114 "b'''": single3prog, 'b"""': double3prog,
115 "br'''": single3prog, 'br"""': double3prog,
116 "B'''": single3prog, 'B"""': double3prog,
117 "bR'''": single3prog, 'bR"""': double3prog,
118 "Br'''": single3prog, 'Br"""': double3prog,
119 "BR'''": single3prog, 'BR"""': double3prog,
120 'r': None, 'R': None, 'u': None, 'U': None,
121 'b': None, 'B': None}
122
123 triple_quoted = {}
124 for t in ("'''", '"""',
125 "r'''", 'r"""', "R'''", 'R"""',
126 "u'''", 'u"""', "U'''", 'U"""',
127 "ur'''", 'ur"""', "Ur'''", 'Ur"""',
128 "uR'''", 'uR"""', "UR'''", 'UR"""',
129 "b'''", 'b"""', "B'''", 'B"""',
130 "br'''", 'br"""', "Br'''", 'Br"""',
131 "bR'''", 'bR"""', "BR'''", 'BR"""'):
132 triple_quoted[t] = t
133 single_quoted = {}
134 for t in ("'", '"',
135 "r'", 'r"', "R'", 'R"',
136 "u'", 'u"', "U'", 'U"',
137 "ur'", 'ur"', "Ur'", 'Ur"',
138 "uR'", 'uR"', "UR'", 'UR"',
139 "b'", 'b"', "B'", 'B"',
140 "br'", 'br"', "Br'", 'Br"',
141 "bR'", 'bR"', "BR'", 'BR"' ):
142 single_quoted[t] = t
143
144 tabsize = 8
145
146 class TokenError(Exception): pass
147
148 class StopTokenizing(Exception): pass
149
150 def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
151 srow, scol = srow_scol
152 erow, ecol = erow_ecol
153 print "%d,%d-%d,%d:\t%s\t%s" % \
154 (srow, scol, erow, ecol, tok_name[type], repr(token))
155
156 def tokenize(readline, tokeneater=printtoken):
157 """
158 The tokenize() function accepts two parameters: one representing the
159 input stream, and one providing an output mechanism for tokenize().
160
161 The first parameter, readline, must be a callable object which provides
162 the same interface as the readline() method of built-in file objects.
163 Each call to the function should return one line of input as a string.
164
165 The second parameter, tokeneater, must also be a callable object. It is
166 called once for each token, with five arguments, corresponding to the
167 tuples generated by generate_tokens().
168 """
169 try:
170 tokenize_loop(readline, tokeneater)
171 except StopTokenizing:
172 pass
173
174 # backwards compatible interface
175 def tokenize_loop(readline, tokeneater):
176 for token_info in generate_tokens(readline):
177 tokeneater(*token_info)
178
179 class Untokenizer:
180
181 def __init__(self):
182 self.tokens = []
183 self.prev_row = 1
184 self.prev_col = 0
185
186 def add_whitespace(self, start):
187 row, col = start
188 if row < self.prev_row or row == self.prev_row and col < self.prev_col:
189 raise ValueError("start ({},{}) precedes previous end ({},{})"
190 .format(row, col, self.prev_row, self.prev_col))
191 row_offset = row - self.prev_row
192 if row_offset:
193 self.tokens.append("\\\n" * row_offset)
194 self.prev_col = 0
195 col_offset = col - self.prev_col
196 if col_offset:
197 self.tokens.append(" " * col_offset)
198
199 def untokenize(self, iterable):
200 it = iter(iterable)
201 for t in it:
202 if len(t) == 2:
203 self.compat(t, it)
204 break
205 tok_type, token, start, end, line = t
206 if tok_type == ENDMARKER:
207 break
208 self.add_whitespace(start)
209 self.tokens.append(token)
210 self.prev_row, self.prev_col = end
211 if tok_type in (NEWLINE, NL):
212 self.prev_row += 1
213 self.prev_col = 0
214 return "".join(self.tokens)
215
216 def compat(self, token, iterable):
217 indents = []
218 toks_append = self.tokens.append
219 startline = token[0] in (NEWLINE, NL)
220 prevstring = False
221
222 for tok in chain([token], iterable):
223 toknum, tokval = tok[:2]
224
225 if toknum in (NAME, NUMBER):
226 tokval += ' '
227
228 # Insert a space between two consecutive strings
229 if toknum == STRING:
230 if prevstring:
231 tokval = ' ' + tokval
232 prevstring = True
233 else:
234 prevstring = False
235
236 if toknum == INDENT:
237 indents.append(tokval)
238 continue
239 elif toknum == DEDENT:
240 indents.pop()
241 continue
242 elif toknum in (NEWLINE, NL):
243 startline = True
244 elif startline and indents:
245 toks_append(indents[-1])
246 startline = False
247 toks_append(tokval)
248
249 def untokenize(iterable):
250 """Transform tokens back into Python source code.
251
252 Each element returned by the iterable must be a token sequence
253 with at least two elements, a token number and token value. If
254 only two tokens are passed, the resulting output is poor.
255
256 Round-trip invariant for full input:
257 Untokenized source will match input source exactly
258
259 Round-trip invariant for limited intput:
260 # Output text will tokenize the back to the input
261 t1 = [tok[:2] for tok in generate_tokens(f.readline)]
262 newcode = untokenize(t1)
263 readline = iter(newcode.splitlines(1)).next
264 t2 = [tok[:2] for tok in generate_tokens(readline)]
265 assert t1 == t2
266 """
267 ut = Untokenizer()
268 return ut.untokenize(iterable)
269
270 def generate_tokens(readline):
271 """
272 The generate_tokens() generator requires one argument, readline, which
273 must be a callable object which provides the same interface as the
274 readline() method of built-in file objects. Each call to the function
275 should return one line of input as a string. Alternately, readline
276 can be a callable function terminating with StopIteration:
277 readline = open(myfile).next # Example of alternate readline
278
279 The generator produces 5-tuples with these members: the token type; the
280 token string; a 2-tuple (srow, scol) of ints specifying the row and
281 column where the token begins in the source; a 2-tuple (erow, ecol) of
282 ints specifying the row and column where the token ends in the source;
283 and the line on which the token was found. The line passed is the
284 logical line; continuation lines are included.
285 """
286 lnum = parenlev = continued = 0
287 namechars, numchars = string.ascii_letters + '_', '0123456789'
288 contstr, needcont = '', 0
289 contline = None
290 indents = [0]
291
292 while 1: # loop over lines in stream
293 try:
294 line = readline()
295 except StopIteration:
296 line = ''
297 lnum += 1
298 pos, max = 0, len(line)
299
300 if contstr: # continued string
301 if not line:
302 raise TokenError, ("EOF in multi-line string", strstart)
303 endmatch = endprog.match(line)
304 if endmatch:
305 pos = end = endmatch.end(0)
306 yield (STRING, contstr + line[:end],
307 strstart, (lnum, end), contline + line)
308 contstr, needcont = '', 0
309 contline = None
310 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
311 yield (ERRORTOKEN, contstr + line,
312 strstart, (lnum, len(line)), contline)
313 contstr = ''
314 contline = None
315 continue
316 else:
317 contstr = contstr + line
318 contline = contline + line
319 continue
320
321 elif parenlev == 0 and not continued: # new statement
322 if not line: break
323 column = 0
324 while pos < max: # measure leading whitespace
325 if line[pos] == ' ':
326 column += 1
327 elif line[pos] == '\t':
328 column = (column//tabsize + 1)*tabsize
329 elif line[pos] == '\f':
330 column = 0
331 else:
332 break
333 pos += 1
334 if pos == max:
335 break
336
337 if line[pos] in '#\r\n': # skip comments or blank lines
338 if line[pos] == '#':
339 comment_token = line[pos:].rstrip('\r\n')
340 nl_pos = pos + len(comment_token)
341 yield (COMMENT, comment_token,
342 (lnum, pos), (lnum, pos + len(comment_token)), line)
343 yield (NL, line[nl_pos:],
344 (lnum, nl_pos), (lnum, len(line)), line)
345 else:
346 yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
347 (lnum, pos), (lnum, len(line)), line)
348 continue
349
350 if column > indents[-1]: # count indents or dedents
351 indents.append(column)
352 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
353 while column < indents[-1]:
354 if column not in indents:
355 raise IndentationError(
356 "unindent does not match any outer indentation level",
357 ("<tokenize>", lnum, pos, line))
358 indents = indents[:-1]
359 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
360
361 else: # continued statement
362 if not line:
363 raise TokenError, ("EOF in multi-line statement", (lnum, 0))
364 continued = 0
365
366 while pos < max:
367 pseudomatch = pseudoprog.match(line, pos)
368 if pseudomatch: # scan for tokens
369 start, end = pseudomatch.span(1)
370 spos, epos, pos = (lnum, start), (lnum, end), end
371 if start == end:
372 continue
373 token, initial = line[start:end], line[start]
374
375 if initial in numchars or \
376 (initial == '.' and token != '.'): # ordinary number
377 yield (NUMBER, token, spos, epos, line)
378 elif initial in '\r\n':
379 yield (NL if parenlev > 0 else NEWLINE,
380 token, spos, epos, line)
381 elif initial == '#':
382 assert not token.endswith("\n")
383 yield (COMMENT, token, spos, epos, line)
384 elif token in triple_quoted:
385 endprog = endprogs[token]
386 endmatch = endprog.match(line, pos)
387 if endmatch: # all on one line
388 pos = endmatch.end(0)
389 token = line[start:pos]
390 yield (STRING, token, spos, (lnum, pos), line)
391 else:
392 strstart = (lnum, start) # multiple lines
393 contstr = line[start:]
394 contline = line
395 break
396 elif initial in single_quoted or \
397 token[:2] in single_quoted or \
398 token[:3] in single_quoted:
399 if token[-1] == '\n': # continued string
400 strstart = (lnum, start)
401 endprog = (endprogs[initial] or endprogs[token[1]] or
402 endprogs[token[2]])
403 contstr, needcont = line[start:], 1
404 contline = line
405 break
406 else: # ordinary string
407 yield (STRING, token, spos, epos, line)
408 elif initial in namechars: # ordinary name
409 yield (NAME, token, spos, epos, line)
410 elif initial == '\\': # continued stmt
411 continued = 1
412 else:
413 if initial in '([{':
414 parenlev += 1
415 elif initial in ')]}':
416 parenlev -= 1
417 yield (OP, token, spos, epos, line)
418 else:
419 yield (ERRORTOKEN, line[pos],
420 (lnum, pos), (lnum, pos+1), line)
421 pos += 1
422
423 for indent in indents[1:]: # pop remaining indent levels
424 yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
425 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
426
427 if __name__ == '__main__': # testing
428 import sys
429 if len(sys.argv) > 1:
430 tokenize(open(sys.argv[1]).readline)
431 else:
432 tokenize(sys.stdin.readline)