]>
Commit | Line | Data |
---|---|---|
e0edde6f | 1 | # Copyright (c) 2010, 2011, 2012 Nicira, Inc. |
99155935 BP |
2 | # |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at: | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | ||
622749d8 | 15 | from __future__ import absolute_import |
6c7050b5 | 16 | |
622749d8 TW |
17 | import functools |
18 | import json | |
99155935 | 19 | import re |
99155935 BP |
20 | import sys |
21 | ||
cb96c1b2 | 22 | import six |
b3ac2947 | 23 | |
c63b04d6 TW |
24 | try: |
25 | import ovs._json | |
26 | except ImportError: | |
27 | pass | |
28 | ||
26bb0f31 EJ |
29 | __pychecker__ = 'no-stringiter' |
30 | ||
cba64103 | 31 | SPACES_PER_LEVEL = 2 |
e7164d96 LR |
32 | _dumper = functools.partial(json.dumps, separators=(",", ":")) |
33 | ||
34 | if six.PY2: | |
35 | def dumper(*args, **kwargs): | |
36 | return _dumper(*args, **kwargs).decode('raw-unicode-escape') | |
37 | else: | |
38 | dumper = _dumper | |
cba64103 BP |
39 | |
40 | ||
41 | def to_stream(obj, stream, pretty=False, sort_keys=True): | |
622749d8 TW |
42 | stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None, |
43 | sort_keys=sort_keys)) | |
99155935 | 44 | |
26bb0f31 | 45 | |
99155935 | 46 | def to_file(obj, name, pretty=False, sort_keys=True): |
622749d8 | 47 | with open(name, "w") as stream: |
99155935 | 48 | to_stream(obj, stream, pretty, sort_keys) |
99155935 | 49 | |
26bb0f31 | 50 | |
99155935 | 51 | def to_string(obj, pretty=False, sort_keys=True): |
622749d8 TW |
52 | return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None, |
53 | sort_keys=sort_keys) | |
99155935 | 54 | |
26bb0f31 | 55 | |
99155935 BP |
56 | def from_stream(stream): |
57 | p = Parser(check_trailer=True) | |
58 | while True: | |
59 | buf = stream.read(4096) | |
60 | if buf == "" or p.feed(buf) != len(buf): | |
61 | break | |
62 | return p.finish() | |
63 | ||
26bb0f31 | 64 | |
99155935 BP |
65 | def from_file(name): |
66 | stream = open(name, "r") | |
67 | try: | |
68 | return from_stream(stream) | |
69 | finally: | |
70 | stream.close() | |
71 | ||
26bb0f31 | 72 | |
99155935 | 73 | def from_string(s): |
25f599fb RB |
74 | if not isinstance(s, six.text_type): |
75 | # We assume the input is a string. We will only hit this case for a | |
76 | # str in Python 2 which is not unicode, so we need to go ahead and | |
77 | # decode it. | |
78 | try: | |
79 | s = six.text_type(s, 'utf-8') | |
80 | except UnicodeDecodeError as e: | |
81 | seq = ' '.join(["0x%2x" % ord(c) | |
82 | for c in e.object[e.start:e.end] if ord(c) >= 0x80]) | |
83 | return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq | |
99155935 BP |
84 | p = Parser(check_trailer=True) |
85 | p.feed(s) | |
86 | return p.finish() | |
87 | ||
26bb0f31 | 88 | |
99155935 | 89 | class Parser(object): |
a0631d92 | 90 | # Maximum height of parsing stack. # |
99155935 BP |
91 | MAX_HEIGHT = 1000 |
92 | ||
c63b04d6 TW |
93 | def __new__(cls, *args, **kwargs): |
94 | try: | |
95 | return ovs._json.Parser(*args, **kwargs) | |
96 | except NameError: | |
97 | return super(Parser, cls).__new__(cls) | |
98 | ||
99155935 BP |
99 | def __init__(self, check_trailer=False): |
100 | self.check_trailer = check_trailer | |
101 | ||
102 | # Lexical analysis. | |
103 | self.lex_state = Parser.__lex_start | |
104 | self.buffer = "" | |
105 | self.line_number = 0 | |
106 | self.column_number = 0 | |
107 | self.byte_number = 0 | |
26bb0f31 | 108 | |
99155935 BP |
109 | # Parsing. |
110 | self.parse_state = Parser.__parse_start | |
111 | self.stack = [] | |
112 | self.member_name = None | |
113 | ||
114 | # Parse status. | |
115 | self.done = False | |
116 | self.error = None | |
117 | ||
118 | def __lex_start_space(self, c): | |
119 | pass | |
26bb0f31 | 120 | |
99155935 BP |
121 | def __lex_start_alpha(self, c): |
122 | self.buffer = c | |
123 | self.lex_state = Parser.__lex_keyword | |
26bb0f31 | 124 | |
99155935 BP |
125 | def __lex_start_token(self, c): |
126 | self.__parser_input(c) | |
26bb0f31 | 127 | |
99155935 BP |
128 | def __lex_start_number(self, c): |
129 | self.buffer = c | |
130 | self.lex_state = Parser.__lex_number | |
26bb0f31 | 131 | |
28c781df | 132 | def __lex_start_string(self, _): |
99155935 | 133 | self.lex_state = Parser.__lex_string |
26bb0f31 | 134 | |
99155935 BP |
135 | def __lex_start_error(self, c): |
136 | if ord(c) >= 32 and ord(c) < 128: | |
137 | self.__error("invalid character '%s'" % c) | |
138 | else: | |
139 | self.__error("invalid character U+%04x" % ord(c)) | |
140 | ||
141 | __lex_start_actions = {} | |
142 | for c in " \t\n\r": | |
143 | __lex_start_actions[c] = __lex_start_space | |
144 | for c in "abcdefghijklmnopqrstuvwxyz": | |
145 | __lex_start_actions[c] = __lex_start_alpha | |
146 | for c in "[{]}:,": | |
147 | __lex_start_actions[c] = __lex_start_token | |
148 | for c in "-0123456789": | |
149 | __lex_start_actions[c] = __lex_start_number | |
150 | __lex_start_actions['"'] = __lex_start_string | |
26bb0f31 | 151 | |
99155935 BP |
152 | def __lex_start(self, c): |
153 | Parser.__lex_start_actions.get( | |
154 | c, Parser.__lex_start_error)(self, c) | |
155 | return True | |
156 | ||
157 | __lex_alpha = {} | |
158 | for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
159 | __lex_alpha[c] = True | |
26bb0f31 | 160 | |
99155935 BP |
161 | def __lex_finish_keyword(self): |
162 | if self.buffer == "false": | |
163 | self.__parser_input(False) | |
164 | elif self.buffer == "true": | |
165 | self.__parser_input(True) | |
166 | elif self.buffer == "null": | |
167 | self.__parser_input(None) | |
168 | else: | |
169 | self.__error("invalid keyword '%s'" % self.buffer) | |
26bb0f31 | 170 | |
99155935 BP |
171 | def __lex_keyword(self, c): |
172 | if c in Parser.__lex_alpha: | |
173 | self.buffer += c | |
174 | return True | |
175 | else: | |
176 | self.__lex_finish_keyword() | |
177 | return False | |
178 | ||
26bb0f31 EJ |
179 | __number_re = re.compile("(-)?(0|[1-9][0-9]*)" |
180 | "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$") | |
181 | ||
99155935 BP |
182 | def __lex_finish_number(self): |
183 | s = self.buffer | |
184 | m = Parser.__number_re.match(s) | |
185 | if m: | |
26bb0f31 | 186 | sign, integer, fraction, exp = m.groups() |
99155935 | 187 | if (exp is not None and |
d36bbd37 | 188 | (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)): |
99155935 BP |
189 | self.__error("exponent outside valid range") |
190 | return | |
191 | ||
192 | if fraction is not None and len(fraction.lstrip('0')) == 0: | |
193 | fraction = None | |
194 | ||
195 | sig_string = integer | |
196 | if fraction is not None: | |
197 | sig_string += fraction | |
198 | significand = int(sig_string) | |
199 | ||
200 | pow10 = 0 | |
201 | if fraction is not None: | |
202 | pow10 -= len(fraction) | |
203 | if exp is not None: | |
8f808842 | 204 | pow10 += int(exp) |
99155935 BP |
205 | |
206 | if significand == 0: | |
207 | self.__parser_input(0) | |
208 | return | |
26bb0f31 | 209 | elif significand <= 2 ** 63: |
cd1b3f63 | 210 | while pow10 > 0 and significand <= 2 ** 63: |
99155935 BP |
211 | significand *= 10 |
212 | pow10 -= 1 | |
213 | while pow10 < 0 and significand % 10 == 0: | |
2c362f17 | 214 | significand //= 10 |
99155935 BP |
215 | pow10 += 1 |
216 | if (pow10 == 0 and | |
26bb0f31 EJ |
217 | ((not sign and significand < 2 ** 63) or |
218 | (sign and significand <= 2 ** 63))): | |
99155935 BP |
219 | if sign: |
220 | self.__parser_input(-significand) | |
221 | else: | |
222 | self.__parser_input(significand) | |
223 | return | |
224 | ||
225 | value = float(s) | |
226 | if value == float("inf") or value == float("-inf"): | |
227 | self.__error("number outside valid range") | |
228 | return | |
229 | if value == 0: | |
230 | # Suppress negative zero. | |
231 | value = 0 | |
232 | self.__parser_input(value) | |
233 | elif re.match("-?0[0-9]", s): | |
234 | self.__error("leading zeros not allowed") | |
235 | elif re.match("-([^0-9]|$)", s): | |
236 | self.__error("'-' must be followed by digit") | |
237 | elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s): | |
238 | self.__error("decimal point must be followed by digit") | |
239 | elif re.search("e[-+]?([^0-9]|$)", s): | |
240 | self.__error("exponent must contain at least one digit") | |
241 | else: | |
242 | self.__error("syntax error in number") | |
26bb0f31 | 243 | |
99155935 BP |
244 | def __lex_number(self, c): |
245 | if c in ".0123456789eE-+": | |
246 | self.buffer += c | |
247 | return True | |
248 | else: | |
249 | self.__lex_finish_number() | |
250 | return False | |
251 | ||
252 | __4hex_re = re.compile("[0-9a-fA-F]{4}") | |
26bb0f31 | 253 | |
99155935 BP |
254 | def __lex_4hex(self, s): |
255 | if len(s) < 4: | |
256 | self.__error("quoted string ends within \\u escape") | |
257 | elif not Parser.__4hex_re.match(s): | |
258 | self.__error("malformed \\u escape") | |
259 | elif s == "0000": | |
260 | self.__error("null bytes not supported in quoted strings") | |
261 | else: | |
262 | return int(s, 16) | |
26bb0f31 | 263 | |
99155935 BP |
264 | @staticmethod |
265 | def __is_leading_surrogate(c): | |
266 | """Returns true if 'c' is a Unicode code point for a leading | |
267 | surrogate.""" | |
268 | return c >= 0xd800 and c <= 0xdbff | |
26bb0f31 | 269 | |
99155935 BP |
270 | @staticmethod |
271 | def __is_trailing_surrogate(c): | |
272 | """Returns true if 'c' is a Unicode code point for a trailing | |
273 | surrogate.""" | |
274 | return c >= 0xdc00 and c <= 0xdfff | |
26bb0f31 | 275 | |
99155935 BP |
276 | @staticmethod |
277 | def __utf16_decode_surrogate_pair(leading, trailing): | |
278 | """Returns the unicode code point corresponding to leading surrogate | |
279 | 'leading' and trailing surrogate 'trailing'. The return value will not | |
280 | make any sense if 'leading' or 'trailing' are not in the correct ranges | |
281 | for leading or trailing surrogates.""" | |
282 | # Leading surrogate: 110110wwwwxxxxxx | |
283 | # Trailing surrogate: 110111xxxxxxxxxx | |
284 | # Code point: 000uuuuuxxxxxxxxxxxxxxxx | |
285 | w = (leading >> 6) & 0xf | |
286 | u = w + 1 | |
287 | x0 = leading & 0x3f | |
288 | x1 = trailing & 0x3ff | |
289 | return (u << 16) | (x0 << 10) | x1 | |
290 | __unescape = {'"': u'"', | |
291 | "\\": u"\\", | |
292 | "/": u"/", | |
293 | "b": u"\b", | |
294 | "f": u"\f", | |
295 | "n": u"\n", | |
296 | "r": u"\r", | |
297 | "t": u"\t"} | |
26bb0f31 | 298 | |
99155935 BP |
299 | def __lex_finish_string(self): |
300 | inp = self.buffer | |
301 | out = u"" | |
302 | while len(inp): | |
303 | backslash = inp.find('\\') | |
304 | if backslash == -1: | |
305 | out += inp | |
306 | break | |
307 | out += inp[:backslash] | |
308 | inp = inp[backslash + 1:] | |
309 | if inp == "": | |
310 | self.__error("quoted string may not end with backslash") | |
311 | return | |
312 | ||
313 | replacement = Parser.__unescape.get(inp[0]) | |
314 | if replacement is not None: | |
315 | out += replacement | |
316 | inp = inp[1:] | |
317 | continue | |
318 | elif inp[0] != u'u': | |
319 | self.__error("bad escape \\%s" % inp[0]) | |
320 | return | |
26bb0f31 | 321 | |
99155935 BP |
322 | c0 = self.__lex_4hex(inp[1:5]) |
323 | if c0 is None: | |
324 | return | |
325 | inp = inp[5:] | |
326 | ||
327 | if Parser.__is_leading_surrogate(c0): | |
328 | if inp[:2] != u'\\u': | |
329 | self.__error("malformed escaped surrogate pair") | |
330 | return | |
331 | c1 = self.__lex_4hex(inp[2:6]) | |
332 | if c1 is None: | |
333 | return | |
334 | if not Parser.__is_trailing_surrogate(c1): | |
335 | self.__error("second half of escaped surrogate pair is " | |
336 | "not trailing surrogate") | |
337 | return | |
338 | code_point = Parser.__utf16_decode_surrogate_pair(c0, c1) | |
339 | inp = inp[6:] | |
340 | else: | |
341 | code_point = c0 | |
eac25f50 | 342 | out += six.unichr(code_point) |
99155935 BP |
343 | self.__parser_input('string', out) |
344 | ||
345 | def __lex_string_escape(self, c): | |
346 | self.buffer += c | |
347 | self.lex_state = Parser.__lex_string | |
348 | return True | |
26bb0f31 | 349 | |
99155935 BP |
350 | def __lex_string(self, c): |
351 | if c == '\\': | |
352 | self.buffer += c | |
353 | self.lex_state = Parser.__lex_string_escape | |
354 | elif c == '"': | |
355 | self.__lex_finish_string() | |
356 | elif ord(c) >= 0x20: | |
357 | self.buffer += c | |
358 | else: | |
359 | self.__error("U+%04X must be escaped in quoted string" % ord(c)) | |
360 | return True | |
361 | ||
362 | def __lex_input(self, c): | |
99155935 BP |
363 | eat = self.lex_state(self, c) |
364 | assert eat is True or eat is False | |
365 | return eat | |
366 | ||
28c781df | 367 | def __parse_start(self, token, unused_string): |
99155935 BP |
368 | if token == '{': |
369 | self.__push_object() | |
370 | elif token == '[': | |
371 | self.__push_array() | |
372 | else: | |
373 | self.__error("syntax error at beginning of input") | |
26bb0f31 | 374 | |
28c781df | 375 | def __parse_end(self, unused_token, unused_string): |
99155935 | 376 | self.__error("trailing garbage at end of input") |
26bb0f31 | 377 | |
99155935 BP |
378 | def __parse_object_init(self, token, string): |
379 | if token == '}': | |
380 | self.__parser_pop() | |
381 | else: | |
382 | self.__parse_object_name(token, string) | |
26bb0f31 | 383 | |
99155935 BP |
384 | def __parse_object_name(self, token, string): |
385 | if token == 'string': | |
386 | self.member_name = string | |
387 | self.parse_state = Parser.__parse_object_colon | |
388 | else: | |
389 | self.__error("syntax error parsing object expecting string") | |
26bb0f31 | 390 | |
28c781df | 391 | def __parse_object_colon(self, token, unused_string): |
99155935 BP |
392 | if token == ":": |
393 | self.parse_state = Parser.__parse_object_value | |
394 | else: | |
395 | self.__error("syntax error parsing object expecting ':'") | |
26bb0f31 | 396 | |
99155935 BP |
397 | def __parse_object_value(self, token, string): |
398 | self.__parse_value(token, string, Parser.__parse_object_next) | |
26bb0f31 | 399 | |
28c781df | 400 | def __parse_object_next(self, token, unused_string): |
99155935 BP |
401 | if token == ",": |
402 | self.parse_state = Parser.__parse_object_name | |
403 | elif token == "}": | |
404 | self.__parser_pop() | |
405 | else: | |
406 | self.__error("syntax error expecting '}' or ','") | |
26bb0f31 | 407 | |
99155935 BP |
408 | def __parse_array_init(self, token, string): |
409 | if token == ']': | |
410 | self.__parser_pop() | |
411 | else: | |
412 | self.__parse_array_value(token, string) | |
26bb0f31 | 413 | |
99155935 BP |
414 | def __parse_array_value(self, token, string): |
415 | self.__parse_value(token, string, Parser.__parse_array_next) | |
26bb0f31 | 416 | |
28c781df | 417 | def __parse_array_next(self, token, unused_string): |
99155935 BP |
418 | if token == ",": |
419 | self.parse_state = Parser.__parse_array_value | |
420 | elif token == "]": | |
421 | self.__parser_pop() | |
422 | else: | |
423 | self.__error("syntax error expecting ']' or ','") | |
26bb0f31 | 424 | |
99155935 BP |
425 | def __parser_input(self, token, string=None): |
426 | self.lex_state = Parser.__lex_start | |
427 | self.buffer = "" | |
99155935 | 428 | self.parse_state(self, token, string) |
99155935 BP |
429 | |
430 | def __put_value(self, value): | |
431 | top = self.stack[-1] | |
da2d45c6 | 432 | if isinstance(top, dict): |
99155935 BP |
433 | top[self.member_name] = value |
434 | else: | |
435 | top.append(value) | |
436 | ||
437 | def __parser_push(self, new_json, next_state): | |
438 | if len(self.stack) < Parser.MAX_HEIGHT: | |
439 | if len(self.stack) > 0: | |
440 | self.__put_value(new_json) | |
441 | self.stack.append(new_json) | |
442 | self.parse_state = next_state | |
443 | else: | |
444 | self.__error("input exceeds maximum nesting depth %d" % | |
445 | Parser.MAX_HEIGHT) | |
26bb0f31 | 446 | |
99155935 BP |
447 | def __push_object(self): |
448 | self.__parser_push({}, Parser.__parse_object_init) | |
26bb0f31 | 449 | |
99155935 BP |
450 | def __push_array(self): |
451 | self.__parser_push([], Parser.__parse_array_init) | |
452 | ||
453 | def __parser_pop(self): | |
454 | if len(self.stack) == 1: | |
455 | self.parse_state = Parser.__parse_end | |
456 | if not self.check_trailer: | |
457 | self.done = True | |
458 | else: | |
459 | self.stack.pop() | |
460 | top = self.stack[-1] | |
da2d45c6 | 461 | if isinstance(top, list): |
99155935 BP |
462 | self.parse_state = Parser.__parse_array_next |
463 | else: | |
464 | self.parse_state = Parser.__parse_object_next | |
465 | ||
466 | def __parse_value(self, token, string, next_state): | |
8f808842 RB |
467 | number_types = list(six.integer_types) |
468 | number_types.extend([float]) | |
469 | number_types = tuple(number_types) | |
470 | if token in [False, None, True] or isinstance(token, number_types): | |
99155935 BP |
471 | self.__put_value(token) |
472 | elif token == 'string': | |
473 | self.__put_value(string) | |
474 | else: | |
475 | if token == '{': | |
476 | self.__push_object() | |
477 | elif token == '[': | |
478 | self.__push_array() | |
479 | else: | |
480 | self.__error("syntax error expecting value") | |
481 | return | |
482 | self.parse_state = next_state | |
483 | ||
484 | def __error(self, message): | |
485 | if self.error is None: | |
486 | self.error = ("line %d, column %d, byte %d: %s" | |
487 | % (self.line_number, self.column_number, | |
488 | self.byte_number, message)) | |
489 | self.done = True | |
490 | ||
491 | def feed(self, s): | |
492 | i = 0 | |
493 | while True: | |
494 | if self.done or i >= len(s): | |
495 | return i | |
c640c04f BP |
496 | |
497 | c = s[i] | |
498 | if self.__lex_input(c): | |
499 | self.byte_number += 1 | |
500 | if c == '\n': | |
501 | self.column_number = 0 | |
502 | self.line_number += 1 | |
503 | else: | |
504 | self.column_number += 1 | |
505 | ||
99155935 BP |
506 | i += 1 |
507 | ||
508 | def is_done(self): | |
509 | return self.done | |
510 | ||
511 | def finish(self): | |
512 | if self.lex_state == Parser.__lex_start: | |
513 | pass | |
514 | elif self.lex_state in (Parser.__lex_string, | |
515 | Parser.__lex_string_escape): | |
516 | self.__error("unexpected end of input in quoted string") | |
517 | else: | |
518 | self.__lex_input(" ") | |
519 | ||
520 | if self.parse_state == Parser.__parse_start: | |
521 | self.__error("empty input stream") | |
522 | elif self.parse_state != Parser.__parse_end: | |
523 | self.__error("unexpected end of input") | |
524 | ||
3c057118 | 525 | if self.error is None: |
99155935 BP |
526 | assert len(self.stack) == 1 |
527 | return self.stack.pop() | |
528 | else: | |
529 | return self.error |