]>
Commit | Line | Data |
---|---|---|
e0edde6f | 1 | # Copyright (c) 2010, 2011, 2012 Nicira, Inc. |
99155935 BP |
2 | # |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at: | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | ||
622749d8 | 15 | from __future__ import absolute_import |
6c7050b5 | 16 | |
622749d8 TW |
17 | import functools |
18 | import json | |
99155935 | 19 | import re |
99155935 BP |
20 | import sys |
21 | ||
75ff7116 TW |
22 | PARSER_C = 'C' |
23 | PARSER_PY = 'PYTHON' | |
c63b04d6 TW |
24 | try: |
25 | import ovs._json | |
75ff7116 | 26 | PARSER = PARSER_C |
c63b04d6 | 27 | except ImportError: |
75ff7116 | 28 | PARSER = PARSER_PY |
c63b04d6 | 29 | |
26bb0f31 EJ |
30 | __pychecker__ = 'no-stringiter' |
31 | ||
cba64103 | 32 | SPACES_PER_LEVEL = 2 |
0c4d144a | 33 | dumper = functools.partial(json.dumps, separators=(",", ":")) |
cba64103 BP |
34 | |
35 | ||
36 | def to_stream(obj, stream, pretty=False, sort_keys=True): | |
622749d8 TW |
37 | stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None, |
38 | sort_keys=sort_keys)) | |
99155935 | 39 | |
26bb0f31 | 40 | |
99155935 | 41 | def to_file(obj, name, pretty=False, sort_keys=True): |
622749d8 | 42 | with open(name, "w") as stream: |
99155935 | 43 | to_stream(obj, stream, pretty, sort_keys) |
99155935 | 44 | |
26bb0f31 | 45 | |
99155935 | 46 | def to_string(obj, pretty=False, sort_keys=True): |
622749d8 TW |
47 | return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None, |
48 | sort_keys=sort_keys) | |
99155935 | 49 | |
26bb0f31 | 50 | |
99155935 BP |
51 | def from_stream(stream): |
52 | p = Parser(check_trailer=True) | |
53 | while True: | |
54 | buf = stream.read(4096) | |
55 | if buf == "" or p.feed(buf) != len(buf): | |
56 | break | |
57 | return p.finish() | |
58 | ||
26bb0f31 | 59 | |
99155935 BP |
60 | def from_file(name): |
61 | stream = open(name, "r") | |
62 | try: | |
63 | return from_stream(stream) | |
64 | finally: | |
65 | stream.close() | |
66 | ||
26bb0f31 | 67 | |
99155935 | 68 | def from_string(s): |
0c4d144a | 69 | if not isinstance(s, str): |
25f599fb RB |
70 | # We assume the input is a string. We will only hit this case for a |
71 | # str in Python 2 which is not unicode, so we need to go ahead and | |
72 | # decode it. | |
73 | try: | |
0c4d144a | 74 | s = str(s, 'utf-8') |
25f599fb RB |
75 | except UnicodeDecodeError as e: |
76 | seq = ' '.join(["0x%2x" % ord(c) | |
77 | for c in e.object[e.start:e.end] if ord(c) >= 0x80]) | |
78 | return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq | |
99155935 BP |
79 | p = Parser(check_trailer=True) |
80 | p.feed(s) | |
81 | return p.finish() | |
82 | ||
26bb0f31 | 83 | |
99155935 | 84 | class Parser(object): |
a0631d92 | 85 | # Maximum height of parsing stack. # |
99155935 BP |
86 | MAX_HEIGHT = 1000 |
87 | ||
c63b04d6 | 88 | def __new__(cls, *args, **kwargs): |
75ff7116 | 89 | if PARSER == PARSER_C: |
c63b04d6 | 90 | return ovs._json.Parser(*args, **kwargs) |
75ff7116 | 91 | return super(Parser, cls).__new__(cls) |
c63b04d6 | 92 | |
99155935 BP |
93 | def __init__(self, check_trailer=False): |
94 | self.check_trailer = check_trailer | |
95 | ||
96 | # Lexical analysis. | |
97 | self.lex_state = Parser.__lex_start | |
98 | self.buffer = "" | |
99 | self.line_number = 0 | |
100 | self.column_number = 0 | |
101 | self.byte_number = 0 | |
26bb0f31 | 102 | |
99155935 BP |
103 | # Parsing. |
104 | self.parse_state = Parser.__parse_start | |
105 | self.stack = [] | |
106 | self.member_name = None | |
107 | ||
108 | # Parse status. | |
109 | self.done = False | |
110 | self.error = None | |
111 | ||
112 | def __lex_start_space(self, c): | |
113 | pass | |
26bb0f31 | 114 | |
99155935 BP |
115 | def __lex_start_alpha(self, c): |
116 | self.buffer = c | |
117 | self.lex_state = Parser.__lex_keyword | |
26bb0f31 | 118 | |
99155935 BP |
119 | def __lex_start_token(self, c): |
120 | self.__parser_input(c) | |
26bb0f31 | 121 | |
99155935 BP |
122 | def __lex_start_number(self, c): |
123 | self.buffer = c | |
124 | self.lex_state = Parser.__lex_number | |
26bb0f31 | 125 | |
28c781df | 126 | def __lex_start_string(self, _): |
99155935 | 127 | self.lex_state = Parser.__lex_string |
26bb0f31 | 128 | |
99155935 BP |
129 | def __lex_start_error(self, c): |
130 | if ord(c) >= 32 and ord(c) < 128: | |
131 | self.__error("invalid character '%s'" % c) | |
132 | else: | |
133 | self.__error("invalid character U+%04x" % ord(c)) | |
134 | ||
135 | __lex_start_actions = {} | |
136 | for c in " \t\n\r": | |
137 | __lex_start_actions[c] = __lex_start_space | |
138 | for c in "abcdefghijklmnopqrstuvwxyz": | |
139 | __lex_start_actions[c] = __lex_start_alpha | |
140 | for c in "[{]}:,": | |
141 | __lex_start_actions[c] = __lex_start_token | |
142 | for c in "-0123456789": | |
143 | __lex_start_actions[c] = __lex_start_number | |
144 | __lex_start_actions['"'] = __lex_start_string | |
26bb0f31 | 145 | |
99155935 BP |
146 | def __lex_start(self, c): |
147 | Parser.__lex_start_actions.get( | |
148 | c, Parser.__lex_start_error)(self, c) | |
149 | return True | |
150 | ||
151 | __lex_alpha = {} | |
152 | for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
153 | __lex_alpha[c] = True | |
26bb0f31 | 154 | |
99155935 BP |
155 | def __lex_finish_keyword(self): |
156 | if self.buffer == "false": | |
157 | self.__parser_input(False) | |
158 | elif self.buffer == "true": | |
159 | self.__parser_input(True) | |
160 | elif self.buffer == "null": | |
161 | self.__parser_input(None) | |
162 | else: | |
163 | self.__error("invalid keyword '%s'" % self.buffer) | |
26bb0f31 | 164 | |
99155935 BP |
165 | def __lex_keyword(self, c): |
166 | if c in Parser.__lex_alpha: | |
167 | self.buffer += c | |
168 | return True | |
169 | else: | |
170 | self.__lex_finish_keyword() | |
171 | return False | |
172 | ||
26bb0f31 | 173 | __number_re = re.compile("(-)?(0|[1-9][0-9]*)" |
145a7e88 | 174 | r"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$") |
26bb0f31 | 175 | |
99155935 BP |
176 | def __lex_finish_number(self): |
177 | s = self.buffer | |
178 | m = Parser.__number_re.match(s) | |
179 | if m: | |
26bb0f31 | 180 | sign, integer, fraction, exp = m.groups() |
99155935 | 181 | if (exp is not None and |
d36bbd37 | 182 | (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)): |
99155935 BP |
183 | self.__error("exponent outside valid range") |
184 | return | |
185 | ||
186 | if fraction is not None and len(fraction.lstrip('0')) == 0: | |
187 | fraction = None | |
188 | ||
189 | sig_string = integer | |
190 | if fraction is not None: | |
191 | sig_string += fraction | |
192 | significand = int(sig_string) | |
193 | ||
194 | pow10 = 0 | |
195 | if fraction is not None: | |
196 | pow10 -= len(fraction) | |
197 | if exp is not None: | |
8f808842 | 198 | pow10 += int(exp) |
99155935 BP |
199 | |
200 | if significand == 0: | |
201 | self.__parser_input(0) | |
202 | return | |
26bb0f31 | 203 | elif significand <= 2 ** 63: |
cd1b3f63 | 204 | while pow10 > 0 and significand <= 2 ** 63: |
99155935 BP |
205 | significand *= 10 |
206 | pow10 -= 1 | |
207 | while pow10 < 0 and significand % 10 == 0: | |
2c362f17 | 208 | significand //= 10 |
99155935 BP |
209 | pow10 += 1 |
210 | if (pow10 == 0 and | |
26bb0f31 EJ |
211 | ((not sign and significand < 2 ** 63) or |
212 | (sign and significand <= 2 ** 63))): | |
99155935 BP |
213 | if sign: |
214 | self.__parser_input(-significand) | |
215 | else: | |
216 | self.__parser_input(significand) | |
217 | return | |
218 | ||
219 | value = float(s) | |
220 | if value == float("inf") or value == float("-inf"): | |
221 | self.__error("number outside valid range") | |
222 | return | |
223 | if value == 0: | |
224 | # Suppress negative zero. | |
225 | value = 0 | |
226 | self.__parser_input(value) | |
227 | elif re.match("-?0[0-9]", s): | |
228 | self.__error("leading zeros not allowed") | |
229 | elif re.match("-([^0-9]|$)", s): | |
230 | self.__error("'-' must be followed by digit") | |
145a7e88 | 231 | elif re.match(r"-?(0|[1-9][0-9]*)\.([^0-9]|$)", s): |
99155935 BP |
232 | self.__error("decimal point must be followed by digit") |
233 | elif re.search("e[-+]?([^0-9]|$)", s): | |
234 | self.__error("exponent must contain at least one digit") | |
235 | else: | |
236 | self.__error("syntax error in number") | |
26bb0f31 | 237 | |
99155935 BP |
238 | def __lex_number(self, c): |
239 | if c in ".0123456789eE-+": | |
240 | self.buffer += c | |
241 | return True | |
242 | else: | |
243 | self.__lex_finish_number() | |
244 | return False | |
245 | ||
246 | __4hex_re = re.compile("[0-9a-fA-F]{4}") | |
26bb0f31 | 247 | |
99155935 BP |
248 | def __lex_4hex(self, s): |
249 | if len(s) < 4: | |
250 | self.__error("quoted string ends within \\u escape") | |
251 | elif not Parser.__4hex_re.match(s): | |
252 | self.__error("malformed \\u escape") | |
253 | elif s == "0000": | |
254 | self.__error("null bytes not supported in quoted strings") | |
255 | else: | |
256 | return int(s, 16) | |
26bb0f31 | 257 | |
99155935 BP |
258 | @staticmethod |
259 | def __is_leading_surrogate(c): | |
260 | """Returns true if 'c' is a Unicode code point for a leading | |
261 | surrogate.""" | |
262 | return c >= 0xd800 and c <= 0xdbff | |
26bb0f31 | 263 | |
99155935 BP |
264 | @staticmethod |
265 | def __is_trailing_surrogate(c): | |
266 | """Returns true if 'c' is a Unicode code point for a trailing | |
267 | surrogate.""" | |
268 | return c >= 0xdc00 and c <= 0xdfff | |
26bb0f31 | 269 | |
99155935 BP |
270 | @staticmethod |
271 | def __utf16_decode_surrogate_pair(leading, trailing): | |
272 | """Returns the unicode code point corresponding to leading surrogate | |
273 | 'leading' and trailing surrogate 'trailing'. The return value will not | |
274 | make any sense if 'leading' or 'trailing' are not in the correct ranges | |
275 | for leading or trailing surrogates.""" | |
276 | # Leading surrogate: 110110wwwwxxxxxx | |
277 | # Trailing surrogate: 110111xxxxxxxxxx | |
278 | # Code point: 000uuuuuxxxxxxxxxxxxxxxx | |
279 | w = (leading >> 6) & 0xf | |
280 | u = w + 1 | |
281 | x0 = leading & 0x3f | |
282 | x1 = trailing & 0x3ff | |
283 | return (u << 16) | (x0 << 10) | x1 | |
284 | __unescape = {'"': u'"', | |
285 | "\\": u"\\", | |
286 | "/": u"/", | |
287 | "b": u"\b", | |
288 | "f": u"\f", | |
289 | "n": u"\n", | |
290 | "r": u"\r", | |
291 | "t": u"\t"} | |
26bb0f31 | 292 | |
99155935 BP |
293 | def __lex_finish_string(self): |
294 | inp = self.buffer | |
295 | out = u"" | |
296 | while len(inp): | |
297 | backslash = inp.find('\\') | |
298 | if backslash == -1: | |
299 | out += inp | |
300 | break | |
301 | out += inp[:backslash] | |
302 | inp = inp[backslash + 1:] | |
303 | if inp == "": | |
304 | self.__error("quoted string may not end with backslash") | |
305 | return | |
306 | ||
307 | replacement = Parser.__unescape.get(inp[0]) | |
308 | if replacement is not None: | |
309 | out += replacement | |
310 | inp = inp[1:] | |
311 | continue | |
312 | elif inp[0] != u'u': | |
313 | self.__error("bad escape \\%s" % inp[0]) | |
314 | return | |
26bb0f31 | 315 | |
99155935 BP |
316 | c0 = self.__lex_4hex(inp[1:5]) |
317 | if c0 is None: | |
318 | return | |
319 | inp = inp[5:] | |
320 | ||
321 | if Parser.__is_leading_surrogate(c0): | |
322 | if inp[:2] != u'\\u': | |
323 | self.__error("malformed escaped surrogate pair") | |
324 | return | |
325 | c1 = self.__lex_4hex(inp[2:6]) | |
326 | if c1 is None: | |
327 | return | |
328 | if not Parser.__is_trailing_surrogate(c1): | |
329 | self.__error("second half of escaped surrogate pair is " | |
330 | "not trailing surrogate") | |
331 | return | |
332 | code_point = Parser.__utf16_decode_surrogate_pair(c0, c1) | |
333 | inp = inp[6:] | |
334 | else: | |
335 | code_point = c0 | |
0c4d144a | 336 | out += chr(code_point) |
99155935 BP |
337 | self.__parser_input('string', out) |
338 | ||
339 | def __lex_string_escape(self, c): | |
340 | self.buffer += c | |
341 | self.lex_state = Parser.__lex_string | |
342 | return True | |
26bb0f31 | 343 | |
99155935 BP |
344 | def __lex_string(self, c): |
345 | if c == '\\': | |
346 | self.buffer += c | |
347 | self.lex_state = Parser.__lex_string_escape | |
348 | elif c == '"': | |
349 | self.__lex_finish_string() | |
350 | elif ord(c) >= 0x20: | |
351 | self.buffer += c | |
352 | else: | |
353 | self.__error("U+%04X must be escaped in quoted string" % ord(c)) | |
354 | return True | |
355 | ||
356 | def __lex_input(self, c): | |
99155935 BP |
357 | eat = self.lex_state(self, c) |
358 | assert eat is True or eat is False | |
359 | return eat | |
360 | ||
28c781df | 361 | def __parse_start(self, token, unused_string): |
99155935 BP |
362 | if token == '{': |
363 | self.__push_object() | |
364 | elif token == '[': | |
365 | self.__push_array() | |
366 | else: | |
367 | self.__error("syntax error at beginning of input") | |
26bb0f31 | 368 | |
28c781df | 369 | def __parse_end(self, unused_token, unused_string): |
99155935 | 370 | self.__error("trailing garbage at end of input") |
26bb0f31 | 371 | |
99155935 BP |
372 | def __parse_object_init(self, token, string): |
373 | if token == '}': | |
374 | self.__parser_pop() | |
375 | else: | |
376 | self.__parse_object_name(token, string) | |
26bb0f31 | 377 | |
99155935 BP |
378 | def __parse_object_name(self, token, string): |
379 | if token == 'string': | |
380 | self.member_name = string | |
381 | self.parse_state = Parser.__parse_object_colon | |
382 | else: | |
383 | self.__error("syntax error parsing object expecting string") | |
26bb0f31 | 384 | |
28c781df | 385 | def __parse_object_colon(self, token, unused_string): |
99155935 BP |
386 | if token == ":": |
387 | self.parse_state = Parser.__parse_object_value | |
388 | else: | |
389 | self.__error("syntax error parsing object expecting ':'") | |
26bb0f31 | 390 | |
99155935 BP |
391 | def __parse_object_value(self, token, string): |
392 | self.__parse_value(token, string, Parser.__parse_object_next) | |
26bb0f31 | 393 | |
28c781df | 394 | def __parse_object_next(self, token, unused_string): |
99155935 BP |
395 | if token == ",": |
396 | self.parse_state = Parser.__parse_object_name | |
397 | elif token == "}": | |
398 | self.__parser_pop() | |
399 | else: | |
400 | self.__error("syntax error expecting '}' or ','") | |
26bb0f31 | 401 | |
99155935 BP |
402 | def __parse_array_init(self, token, string): |
403 | if token == ']': | |
404 | self.__parser_pop() | |
405 | else: | |
406 | self.__parse_array_value(token, string) | |
26bb0f31 | 407 | |
99155935 BP |
408 | def __parse_array_value(self, token, string): |
409 | self.__parse_value(token, string, Parser.__parse_array_next) | |
26bb0f31 | 410 | |
28c781df | 411 | def __parse_array_next(self, token, unused_string): |
99155935 BP |
412 | if token == ",": |
413 | self.parse_state = Parser.__parse_array_value | |
414 | elif token == "]": | |
415 | self.__parser_pop() | |
416 | else: | |
417 | self.__error("syntax error expecting ']' or ','") | |
26bb0f31 | 418 | |
99155935 BP |
419 | def __parser_input(self, token, string=None): |
420 | self.lex_state = Parser.__lex_start | |
421 | self.buffer = "" | |
99155935 | 422 | self.parse_state(self, token, string) |
99155935 BP |
423 | |
424 | def __put_value(self, value): | |
425 | top = self.stack[-1] | |
da2d45c6 | 426 | if isinstance(top, dict): |
99155935 BP |
427 | top[self.member_name] = value |
428 | else: | |
429 | top.append(value) | |
430 | ||
431 | def __parser_push(self, new_json, next_state): | |
432 | if len(self.stack) < Parser.MAX_HEIGHT: | |
433 | if len(self.stack) > 0: | |
434 | self.__put_value(new_json) | |
435 | self.stack.append(new_json) | |
436 | self.parse_state = next_state | |
437 | else: | |
438 | self.__error("input exceeds maximum nesting depth %d" % | |
439 | Parser.MAX_HEIGHT) | |
26bb0f31 | 440 | |
99155935 BP |
441 | def __push_object(self): |
442 | self.__parser_push({}, Parser.__parse_object_init) | |
26bb0f31 | 443 | |
99155935 BP |
444 | def __push_array(self): |
445 | self.__parser_push([], Parser.__parse_array_init) | |
446 | ||
447 | def __parser_pop(self): | |
448 | if len(self.stack) == 1: | |
449 | self.parse_state = Parser.__parse_end | |
450 | if not self.check_trailer: | |
451 | self.done = True | |
452 | else: | |
453 | self.stack.pop() | |
454 | top = self.stack[-1] | |
da2d45c6 | 455 | if isinstance(top, list): |
99155935 BP |
456 | self.parse_state = Parser.__parse_array_next |
457 | else: | |
458 | self.parse_state = Parser.__parse_object_next | |
459 | ||
460 | def __parse_value(self, token, string, next_state): | |
0c4d144a | 461 | number_types = [int] |
8f808842 RB |
462 | number_types.extend([float]) |
463 | number_types = tuple(number_types) | |
464 | if token in [False, None, True] or isinstance(token, number_types): | |
99155935 BP |
465 | self.__put_value(token) |
466 | elif token == 'string': | |
467 | self.__put_value(string) | |
468 | else: | |
469 | if token == '{': | |
470 | self.__push_object() | |
471 | elif token == '[': | |
472 | self.__push_array() | |
473 | else: | |
474 | self.__error("syntax error expecting value") | |
475 | return | |
476 | self.parse_state = next_state | |
477 | ||
478 | def __error(self, message): | |
479 | if self.error is None: | |
480 | self.error = ("line %d, column %d, byte %d: %s" | |
481 | % (self.line_number, self.column_number, | |
482 | self.byte_number, message)) | |
483 | self.done = True | |
484 | ||
485 | def feed(self, s): | |
486 | i = 0 | |
487 | while True: | |
488 | if self.done or i >= len(s): | |
489 | return i | |
c640c04f BP |
490 | |
491 | c = s[i] | |
492 | if self.__lex_input(c): | |
493 | self.byte_number += 1 | |
494 | if c == '\n': | |
495 | self.column_number = 0 | |
496 | self.line_number += 1 | |
497 | else: | |
498 | self.column_number += 1 | |
499 | ||
99155935 BP |
500 | i += 1 |
501 | ||
502 | def is_done(self): | |
503 | return self.done | |
504 | ||
505 | def finish(self): | |
506 | if self.lex_state == Parser.__lex_start: | |
507 | pass | |
508 | elif self.lex_state in (Parser.__lex_string, | |
509 | Parser.__lex_string_escape): | |
510 | self.__error("unexpected end of input in quoted string") | |
511 | else: | |
512 | self.__lex_input(" ") | |
513 | ||
514 | if self.parse_state == Parser.__parse_start: | |
515 | self.__error("empty input stream") | |
516 | elif self.parse_state != Parser.__parse_end: | |
517 | self.__error("unexpected end of input") | |
518 | ||
3c057118 | 519 | if self.error is None: |
99155935 BP |
520 | assert len(self.stack) == 1 |
521 | return self.stack.pop() | |
522 | else: | |
523 | return self.error |