]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/tokenize.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / tokenize.py
CommitLineData
4710c53d 1"""Tokenization help for Python programs.\r
2\r
3generate_tokens(readline) is a generator that breaks a stream of\r
4text into Python tokens. It accepts a readline-like method which is called\r
5repeatedly to get the next line of input (or "" for EOF). It generates\r
65-tuples with these members:\r
7\r
8 the token type (see token.py)\r
9 the token (a string)\r
10 the starting (row, column) indices of the token (a 2-tuple of ints)\r
11 the ending (row, column) indices of the token (a 2-tuple of ints)\r
12 the original line (string)\r
13\r
14It is designed to match the working of the Python tokenizer exactly, except\r
15that it produces COMMENT tokens for comments and gives type OP for all\r
16operators\r
17\r
18Older entry points\r
19 tokenize_loop(readline, tokeneater)\r
20 tokenize(readline, tokeneater=printtoken)\r
21are the same, except instead of generating tokens, tokeneater is a callback\r
22function to which the 5 fields described above are passed as 5 arguments,\r
23each time a new token is found."""\r
24\r
25__author__ = 'Ka-Ping Yee <ping@lfw.org>'\r
26__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '\r
27 'Skip Montanaro, Raymond Hettinger')\r
28\r
29import string, re\r
30from token import *\r
31\r
32import token\r
33__all__ = [x for x in dir(token) if not x.startswith("_")]\r
34__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]\r
35del x\r
36del token\r
37\r
38COMMENT = N_TOKENS\r
39tok_name[COMMENT] = 'COMMENT'\r
40NL = N_TOKENS + 1\r
41tok_name[NL] = 'NL'\r
42N_TOKENS += 2\r
43\r
44def group(*choices): return '(' + '|'.join(choices) + ')'\r
45def any(*choices): return group(*choices) + '*'\r
46def maybe(*choices): return group(*choices) + '?'\r
47\r
48Whitespace = r'[ \f\t]*'\r
49Comment = r'#[^\r\n]*'\r
50Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)\r
51Name = r'[a-zA-Z_]\w*'\r
52\r
53Hexnumber = r'0[xX][\da-fA-F]+[lL]?'\r
54Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'\r
55Binnumber = r'0[bB][01]+[lL]?'\r
56Decnumber = r'[1-9]\d*[lL]?'\r
57Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)\r
58Exponent = r'[eE][-+]?\d+'\r
59Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)\r
60Expfloat = r'\d+' + Exponent\r
61Floatnumber = group(Pointfloat, Expfloat)\r
62Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')\r
63Number = group(Imagnumber, Floatnumber, Intnumber)\r
64\r
65# Tail end of ' string.\r
66Single = r"[^'\\]*(?:\\.[^'\\]*)*'"\r
67# Tail end of " string.\r
68Double = r'[^"\\]*(?:\\.[^"\\]*)*"'\r
69# Tail end of ''' string.\r
70Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"\r
71# Tail end of """ string.\r
72Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'\r
73Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""')\r
74# Single-line ' or " string.\r
75String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",\r
76 r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')\r
77\r
78# Because of leftmost-then-longest match semantics, be sure to put the\r
79# longest operators first (e.g., if = came before ==, == would get\r
80# recognized as two instances of =).\r
81Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",\r
82 r"//=?",\r
83 r"[+\-*/%&|^=<>]=?",\r
84 r"~")\r
85\r
86Bracket = '[][(){}]'\r
87Special = group(r'\r?\n', r'[:;.,`@]')\r
88Funny = group(Operator, Bracket, Special)\r
89\r
90PlainToken = group(Number, Funny, String, Name)\r
91Token = Ignore + PlainToken\r
92\r
93# First (or only) line of ' or " string.\r
94ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +\r
95 group("'", r'\\\r?\n'),\r
96 r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +\r
97 group('"', r'\\\r?\n'))\r
98PseudoExtras = group(r'\\\r?\n', Comment, Triple)\r
99PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)\r
100\r
101tokenprog, pseudoprog, single3prog, double3prog = map(\r
102 re.compile, (Token, PseudoToken, Single3, Double3))\r
103endprogs = {"'": re.compile(Single), '"': re.compile(Double),\r
104 "'''": single3prog, '"""': double3prog,\r
105 "r'''": single3prog, 'r"""': double3prog,\r
106 "u'''": single3prog, 'u"""': double3prog,\r
107 "ur'''": single3prog, 'ur"""': double3prog,\r
108 "R'''": single3prog, 'R"""': double3prog,\r
109 "U'''": single3prog, 'U"""': double3prog,\r
110 "uR'''": single3prog, 'uR"""': double3prog,\r
111 "Ur'''": single3prog, 'Ur"""': double3prog,\r
112 "UR'''": single3prog, 'UR"""': double3prog,\r
113 "b'''": single3prog, 'b"""': double3prog,\r
114 "br'''": single3prog, 'br"""': double3prog,\r
115 "B'''": single3prog, 'B"""': double3prog,\r
116 "bR'''": single3prog, 'bR"""': double3prog,\r
117 "Br'''": single3prog, 'Br"""': double3prog,\r
118 "BR'''": single3prog, 'BR"""': double3prog,\r
119 'r': None, 'R': None, 'u': None, 'U': None,\r
120 'b': None, 'B': None}\r
121\r
122triple_quoted = {}\r
123for t in ("'''", '"""',\r
124 "r'''", 'r"""', "R'''", 'R"""',\r
125 "u'''", 'u"""', "U'''", 'U"""',\r
126 "ur'''", 'ur"""', "Ur'''", 'Ur"""',\r
127 "uR'''", 'uR"""', "UR'''", 'UR"""',\r
128 "b'''", 'b"""', "B'''", 'B"""',\r
129 "br'''", 'br"""', "Br'''", 'Br"""',\r
130 "bR'''", 'bR"""', "BR'''", 'BR"""'):\r
131 triple_quoted[t] = t\r
132single_quoted = {}\r
133for t in ("'", '"',\r
134 "r'", 'r"', "R'", 'R"',\r
135 "u'", 'u"', "U'", 'U"',\r
136 "ur'", 'ur"', "Ur'", 'Ur"',\r
137 "uR'", 'uR"', "UR'", 'UR"',\r
138 "b'", 'b"', "B'", 'B"',\r
139 "br'", 'br"', "Br'", 'Br"',\r
140 "bR'", 'bR"', "BR'", 'BR"' ):\r
141 single_quoted[t] = t\r
142\r
143tabsize = 8\r
144\r
145class TokenError(Exception): pass\r
146\r
147class StopTokenizing(Exception): pass\r
148\r
149def printtoken(type, token, srow_scol, erow_ecol, line): # for testing\r
150 srow, scol = srow_scol\r
151 erow, ecol = erow_ecol\r
152 print "%d,%d-%d,%d:\t%s\t%s" % \\r
153 (srow, scol, erow, ecol, tok_name[type], repr(token))\r
154\r
155def tokenize(readline, tokeneater=printtoken):\r
156 """\r
157 The tokenize() function accepts two parameters: one representing the\r
158 input stream, and one providing an output mechanism for tokenize().\r
159\r
160 The first parameter, readline, must be a callable object which provides\r
161 the same interface as the readline() method of built-in file objects.\r
162 Each call to the function should return one line of input as a string.\r
163\r
164 The second parameter, tokeneater, must also be a callable object. It is\r
165 called once for each token, with five arguments, corresponding to the\r
166 tuples generated by generate_tokens().\r
167 """\r
168 try:\r
169 tokenize_loop(readline, tokeneater)\r
170 except StopTokenizing:\r
171 pass\r
172\r
173# backwards compatible interface\r
174def tokenize_loop(readline, tokeneater):\r
175 for token_info in generate_tokens(readline):\r
176 tokeneater(*token_info)\r
177\r
178class Untokenizer:\r
179\r
180 def __init__(self):\r
181 self.tokens = []\r
182 self.prev_row = 1\r
183 self.prev_col = 0\r
184\r
185 def add_whitespace(self, start):\r
186 row, col = start\r
187 assert row <= self.prev_row\r
188 col_offset = col - self.prev_col\r
189 if col_offset:\r
190 self.tokens.append(" " * col_offset)\r
191\r
192 def untokenize(self, iterable):\r
193 for t in iterable:\r
194 if len(t) == 2:\r
195 self.compat(t, iterable)\r
196 break\r
197 tok_type, token, start, end, line = t\r
198 self.add_whitespace(start)\r
199 self.tokens.append(token)\r
200 self.prev_row, self.prev_col = end\r
201 if tok_type in (NEWLINE, NL):\r
202 self.prev_row += 1\r
203 self.prev_col = 0\r
204 return "".join(self.tokens)\r
205\r
206 def compat(self, token, iterable):\r
207 startline = False\r
208 indents = []\r
209 toks_append = self.tokens.append\r
210 toknum, tokval = token\r
211 if toknum in (NAME, NUMBER):\r
212 tokval += ' '\r
213 if toknum in (NEWLINE, NL):\r
214 startline = True\r
215 prevstring = False\r
216 for tok in iterable:\r
217 toknum, tokval = tok[:2]\r
218\r
219 if toknum in (NAME, NUMBER):\r
220 tokval += ' '\r
221\r
222 # Insert a space between two consecutive strings\r
223 if toknum == STRING:\r
224 if prevstring:\r
225 tokval = ' ' + tokval\r
226 prevstring = True\r
227 else:\r
228 prevstring = False\r
229\r
230 if toknum == INDENT:\r
231 indents.append(tokval)\r
232 continue\r
233 elif toknum == DEDENT:\r
234 indents.pop()\r
235 continue\r
236 elif toknum in (NEWLINE, NL):\r
237 startline = True\r
238 elif startline and indents:\r
239 toks_append(indents[-1])\r
240 startline = False\r
241 toks_append(tokval)\r
242\r
243def untokenize(iterable):\r
244 """Transform tokens back into Python source code.\r
245\r
246 Each element returned by the iterable must be a token sequence\r
247 with at least two elements, a token number and token value. If\r
248 only two tokens are passed, the resulting output is poor.\r
249\r
250 Round-trip invariant for full input:\r
251 Untokenized source will match input source exactly\r
252\r
253 Round-trip invariant for limited intput:\r
254 # Output text will tokenize the back to the input\r
255 t1 = [tok[:2] for tok in generate_tokens(f.readline)]\r
256 newcode = untokenize(t1)\r
257 readline = iter(newcode.splitlines(1)).next\r
258 t2 = [tok[:2] for tok in generate_tokens(readline)]\r
259 assert t1 == t2\r
260 """\r
261 ut = Untokenizer()\r
262 return ut.untokenize(iterable)\r
263\r
264def generate_tokens(readline):\r
265 """\r
266 The generate_tokens() generator requires one argment, readline, which\r
267 must be a callable object which provides the same interface as the\r
268 readline() method of built-in file objects. Each call to the function\r
269 should return one line of input as a string. Alternately, readline\r
270 can be a callable function terminating with StopIteration:\r
271 readline = open(myfile).next # Example of alternate readline\r
272\r
273 The generator produces 5-tuples with these members: the token type; the\r
274 token string; a 2-tuple (srow, scol) of ints specifying the row and\r
275 column where the token begins in the source; a 2-tuple (erow, ecol) of\r
276 ints specifying the row and column where the token ends in the source;\r
277 and the line on which the token was found. The line passed is the\r
278 logical line; continuation lines are included.\r
279 """\r
280 lnum = parenlev = continued = 0\r
281 namechars, numchars = string.ascii_letters + '_', '0123456789'\r
282 contstr, needcont = '', 0\r
283 contline = None\r
284 indents = [0]\r
285\r
286 while 1: # loop over lines in stream\r
287 try:\r
288 line = readline()\r
289 except StopIteration:\r
290 line = ''\r
291 lnum += 1\r
292 pos, max = 0, len(line)\r
293\r
294 if contstr: # continued string\r
295 if not line:\r
296 raise TokenError, ("EOF in multi-line string", strstart)\r
297 endmatch = endprog.match(line)\r
298 if endmatch:\r
299 pos = end = endmatch.end(0)\r
300 yield (STRING, contstr + line[:end],\r
301 strstart, (lnum, end), contline + line)\r
302 contstr, needcont = '', 0\r
303 contline = None\r
304 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':\r
305 yield (ERRORTOKEN, contstr + line,\r
306 strstart, (lnum, len(line)), contline)\r
307 contstr = ''\r
308 contline = None\r
309 continue\r
310 else:\r
311 contstr = contstr + line\r
312 contline = contline + line\r
313 continue\r
314\r
315 elif parenlev == 0 and not continued: # new statement\r
316 if not line: break\r
317 column = 0\r
318 while pos < max: # measure leading whitespace\r
319 if line[pos] == ' ':\r
320 column += 1\r
321 elif line[pos] == '\t':\r
322 column = (column//tabsize + 1)*tabsize\r
323 elif line[pos] == '\f':\r
324 column = 0\r
325 else:\r
326 break\r
327 pos += 1\r
328 if pos == max:\r
329 break\r
330\r
331 if line[pos] in '#\r\n': # skip comments or blank lines\r
332 if line[pos] == '#':\r
333 comment_token = line[pos:].rstrip('\r\n')\r
334 nl_pos = pos + len(comment_token)\r
335 yield (COMMENT, comment_token,\r
336 (lnum, pos), (lnum, pos + len(comment_token)), line)\r
337 yield (NL, line[nl_pos:],\r
338 (lnum, nl_pos), (lnum, len(line)), line)\r
339 else:\r
340 yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],\r
341 (lnum, pos), (lnum, len(line)), line)\r
342 continue\r
343\r
344 if column > indents[-1]: # count indents or dedents\r
345 indents.append(column)\r
346 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)\r
347 while column < indents[-1]:\r
348 if column not in indents:\r
349 raise IndentationError(\r
350 "unindent does not match any outer indentation level",\r
351 ("<tokenize>", lnum, pos, line))\r
352 indents = indents[:-1]\r
353 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)\r
354\r
355 else: # continued statement\r
356 if not line:\r
357 raise TokenError, ("EOF in multi-line statement", (lnum, 0))\r
358 continued = 0\r
359\r
360 while pos < max:\r
361 pseudomatch = pseudoprog.match(line, pos)\r
362 if pseudomatch: # scan for tokens\r
363 start, end = pseudomatch.span(1)\r
364 spos, epos, pos = (lnum, start), (lnum, end), end\r
365 token, initial = line[start:end], line[start]\r
366\r
367 if initial in numchars or \\r
368 (initial == '.' and token != '.'): # ordinary number\r
369 yield (NUMBER, token, spos, epos, line)\r
370 elif initial in '\r\n':\r
371 yield (NL if parenlev > 0 else NEWLINE,\r
372 token, spos, epos, line)\r
373 elif initial == '#':\r
374 assert not token.endswith("\n")\r
375 yield (COMMENT, token, spos, epos, line)\r
376 elif token in triple_quoted:\r
377 endprog = endprogs[token]\r
378 endmatch = endprog.match(line, pos)\r
379 if endmatch: # all on one line\r
380 pos = endmatch.end(0)\r
381 token = line[start:pos]\r
382 yield (STRING, token, spos, (lnum, pos), line)\r
383 else:\r
384 strstart = (lnum, start) # multiple lines\r
385 contstr = line[start:]\r
386 contline = line\r
387 break\r
388 elif initial in single_quoted or \\r
389 token[:2] in single_quoted or \\r
390 token[:3] in single_quoted:\r
391 if token[-1] == '\n': # continued string\r
392 strstart = (lnum, start)\r
393 endprog = (endprogs[initial] or endprogs[token[1]] or\r
394 endprogs[token[2]])\r
395 contstr, needcont = line[start:], 1\r
396 contline = line\r
397 break\r
398 else: # ordinary string\r
399 yield (STRING, token, spos, epos, line)\r
400 elif initial in namechars: # ordinary name\r
401 yield (NAME, token, spos, epos, line)\r
402 elif initial == '\\': # continued stmt\r
403 continued = 1\r
404 else:\r
405 if initial in '([{':\r
406 parenlev += 1\r
407 elif initial in ')]}':\r
408 parenlev -= 1\r
409 yield (OP, token, spos, epos, line)\r
410 else:\r
411 yield (ERRORTOKEN, line[pos],\r
412 (lnum, pos), (lnum, pos+1), line)\r
413 pos += 1\r
414\r
415 for indent in indents[1:]: # pop remaining indent levels\r
416 yield (DEDENT, '', (lnum, 0), (lnum, 0), '')\r
417 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')\r
418\r
419if __name__ == '__main__': # testing\r
420 import sys\r
421 if len(sys.argv) > 1:\r
422 tokenize(open(sys.argv[1]).readline)\r
423 else:\r
424 tokenize(sys.stdin.readline)\r