]>
Commit | Line | Data |
---|---|---|
be44585c | 1 | # Copyright (c) 2010, 2011 Nicira Networks |
99155935 BP |
2 | # |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at: | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | ||
15 | import re | |
16 | import StringIO | |
17 | import sys | |
18 | ||
26bb0f31 EJ |
19 | __pychecker__ = 'no-stringiter' |
20 | ||
99155935 BP |
21 | escapes = {ord('"'): u"\\\"", |
22 | ord("\\"): u"\\\\", | |
23 | ord("\b"): u"\\b", | |
24 | ord("\f"): u"\\f", | |
25 | ord("\n"): u"\\n", | |
26 | ord("\r"): u"\\r", | |
27 | ord("\t"): u"\\t"} | |
9b46cccc BP |
28 | for esc in range(32): |
29 | if esc not in escapes: | |
30 | escapes[esc] = u"\\u%04x" % esc | |
99155935 | 31 | |
cba64103 | 32 | SPACES_PER_LEVEL = 2 |
99155935 | 33 | |
cba64103 BP |
34 | |
35 | class _Serializer(object): | |
36 | def __init__(self, stream, pretty, sort_keys): | |
37 | self.stream = stream | |
38 | self.pretty = pretty | |
39 | self.sort_keys = sort_keys | |
40 | self.depth = 0 | |
41 | ||
42 | def __serialize_string(self, s): | |
43 | self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s)) | |
44 | ||
45 | def __indent_line(self): | |
46 | if self.pretty: | |
47 | self.stream.write('\n') | |
48 | self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth)) | |
49 | ||
50 | def serialize(self, obj): | |
51 | if obj is None: | |
52 | self.stream.write(u"null") | |
53 | elif obj is False: | |
54 | self.stream.write(u"false") | |
55 | elif obj is True: | |
56 | self.stream.write(u"true") | |
57 | elif type(obj) in (int, long): | |
58 | self.stream.write(u"%d" % obj) | |
59 | elif type(obj) == float: | |
60 | self.stream.write("%.15g" % obj) | |
61 | elif type(obj) == unicode: | |
62 | self.__serialize_string(obj) | |
63 | elif type(obj) == str: | |
64 | self.__serialize_string(unicode(obj)) | |
65 | elif type(obj) == dict: | |
66 | self.stream.write(u"{") | |
67 | ||
68 | self.depth += 1 | |
69 | self.__indent_line() | |
70 | ||
71 | if self.sort_keys: | |
72 | items = sorted(obj.items()) | |
73 | else: | |
74 | items = obj.iteritems() | |
75 | for i, (key, value) in enumerate(items): | |
76 | if i > 0: | |
77 | self.stream.write(u",") | |
78 | self.__indent_line() | |
79 | self.__serialize_string(unicode(key)) | |
80 | self.stream.write(u":") | |
81 | if self.pretty: | |
82 | self.stream.write(u' ') | |
83 | self.serialize(value) | |
84 | ||
85 | self.stream.write(u"}") | |
86 | self.depth -= 1 | |
87 | elif type(obj) in (list, tuple): | |
88 | self.stream.write(u"[") | |
89 | self.depth += 1 | |
90 | ||
91 | if obj: | |
92 | self.__indent_line() | |
93 | ||
94 | for i, value in enumerate(obj): | |
95 | if i > 0: | |
96 | self.stream.write(u",") | |
97 | self.__indent_line() | |
98 | self.serialize(value) | |
99 | ||
100 | self.depth -= 1 | |
101 | self.stream.write(u"]") | |
99155935 | 102 | else: |
cba64103 BP |
103 | raise Exception("can't serialize %s as JSON" % obj) |
104 | ||
105 | ||
106 | def to_stream(obj, stream, pretty=False, sort_keys=True): | |
107 | _Serializer(stream, pretty, sort_keys).serialize(obj) | |
99155935 | 108 | |
26bb0f31 | 109 | |
99155935 BP |
110 | def to_file(obj, name, pretty=False, sort_keys=True): |
111 | stream = open(name, "w") | |
112 | try: | |
113 | to_stream(obj, stream, pretty, sort_keys) | |
114 | finally: | |
115 | stream.close() | |
116 | ||
26bb0f31 | 117 | |
99155935 BP |
118 | def to_string(obj, pretty=False, sort_keys=True): |
119 | output = StringIO.StringIO() | |
120 | to_stream(obj, output, pretty, sort_keys) | |
121 | s = output.getvalue() | |
122 | output.close() | |
123 | return s | |
124 | ||
26bb0f31 | 125 | |
99155935 BP |
126 | def from_stream(stream): |
127 | p = Parser(check_trailer=True) | |
128 | while True: | |
129 | buf = stream.read(4096) | |
130 | if buf == "" or p.feed(buf) != len(buf): | |
131 | break | |
132 | return p.finish() | |
133 | ||
26bb0f31 | 134 | |
99155935 BP |
135 | def from_file(name): |
136 | stream = open(name, "r") | |
137 | try: | |
138 | return from_stream(stream) | |
139 | finally: | |
140 | stream.close() | |
141 | ||
26bb0f31 | 142 | |
99155935 BP |
143 | def from_string(s): |
144 | try: | |
145 | s = unicode(s, 'utf-8') | |
146 | except UnicodeDecodeError, e: | |
070de9bd BP |
147 | seq = ' '.join(["0x%2x" % ord(c) |
148 | for c in e.object[e.start:e.end] if ord(c) >= 0x80]) | |
be44585c | 149 | return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq) |
99155935 BP |
150 | p = Parser(check_trailer=True) |
151 | p.feed(s) | |
152 | return p.finish() | |
153 | ||
26bb0f31 | 154 | |
99155935 BP |
155 | class Parser(object): |
156 | ## Maximum height of parsing stack. ## | |
157 | MAX_HEIGHT = 1000 | |
158 | ||
159 | def __init__(self, check_trailer=False): | |
160 | self.check_trailer = check_trailer | |
161 | ||
162 | # Lexical analysis. | |
163 | self.lex_state = Parser.__lex_start | |
164 | self.buffer = "" | |
165 | self.line_number = 0 | |
166 | self.column_number = 0 | |
167 | self.byte_number = 0 | |
26bb0f31 | 168 | |
99155935 BP |
169 | # Parsing. |
170 | self.parse_state = Parser.__parse_start | |
171 | self.stack = [] | |
172 | self.member_name = None | |
173 | ||
174 | # Parse status. | |
175 | self.done = False | |
176 | self.error = None | |
177 | ||
178 | def __lex_start_space(self, c): | |
179 | pass | |
26bb0f31 | 180 | |
99155935 BP |
181 | def __lex_start_alpha(self, c): |
182 | self.buffer = c | |
183 | self.lex_state = Parser.__lex_keyword | |
26bb0f31 | 184 | |
99155935 BP |
185 | def __lex_start_token(self, c): |
186 | self.__parser_input(c) | |
26bb0f31 | 187 | |
99155935 BP |
188 | def __lex_start_number(self, c): |
189 | self.buffer = c | |
190 | self.lex_state = Parser.__lex_number | |
26bb0f31 | 191 | |
28c781df | 192 | def __lex_start_string(self, _): |
99155935 | 193 | self.lex_state = Parser.__lex_string |
26bb0f31 | 194 | |
99155935 BP |
195 | def __lex_start_error(self, c): |
196 | if ord(c) >= 32 and ord(c) < 128: | |
197 | self.__error("invalid character '%s'" % c) | |
198 | else: | |
199 | self.__error("invalid character U+%04x" % ord(c)) | |
200 | ||
201 | __lex_start_actions = {} | |
202 | for c in " \t\n\r": | |
203 | __lex_start_actions[c] = __lex_start_space | |
204 | for c in "abcdefghijklmnopqrstuvwxyz": | |
205 | __lex_start_actions[c] = __lex_start_alpha | |
206 | for c in "[{]}:,": | |
207 | __lex_start_actions[c] = __lex_start_token | |
208 | for c in "-0123456789": | |
209 | __lex_start_actions[c] = __lex_start_number | |
210 | __lex_start_actions['"'] = __lex_start_string | |
26bb0f31 | 211 | |
99155935 BP |
212 | def __lex_start(self, c): |
213 | Parser.__lex_start_actions.get( | |
214 | c, Parser.__lex_start_error)(self, c) | |
215 | return True | |
216 | ||
217 | __lex_alpha = {} | |
218 | for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
219 | __lex_alpha[c] = True | |
26bb0f31 | 220 | |
99155935 BP |
221 | def __lex_finish_keyword(self): |
222 | if self.buffer == "false": | |
223 | self.__parser_input(False) | |
224 | elif self.buffer == "true": | |
225 | self.__parser_input(True) | |
226 | elif self.buffer == "null": | |
227 | self.__parser_input(None) | |
228 | else: | |
229 | self.__error("invalid keyword '%s'" % self.buffer) | |
26bb0f31 | 230 | |
99155935 BP |
231 | def __lex_keyword(self, c): |
232 | if c in Parser.__lex_alpha: | |
233 | self.buffer += c | |
234 | return True | |
235 | else: | |
236 | self.__lex_finish_keyword() | |
237 | return False | |
238 | ||
26bb0f31 EJ |
239 | __number_re = re.compile("(-)?(0|[1-9][0-9]*)" |
240 | "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$") | |
241 | ||
99155935 BP |
242 | def __lex_finish_number(self): |
243 | s = self.buffer | |
244 | m = Parser.__number_re.match(s) | |
245 | if m: | |
26bb0f31 | 246 | sign, integer, fraction, exp = m.groups() |
99155935 BP |
247 | if (exp is not None and |
248 | (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)): | |
249 | self.__error("exponent outside valid range") | |
250 | return | |
251 | ||
252 | if fraction is not None and len(fraction.lstrip('0')) == 0: | |
253 | fraction = None | |
254 | ||
255 | sig_string = integer | |
256 | if fraction is not None: | |
257 | sig_string += fraction | |
258 | significand = int(sig_string) | |
259 | ||
260 | pow10 = 0 | |
261 | if fraction is not None: | |
262 | pow10 -= len(fraction) | |
263 | if exp is not None: | |
264 | pow10 += long(exp) | |
265 | ||
266 | if significand == 0: | |
267 | self.__parser_input(0) | |
268 | return | |
26bb0f31 | 269 | elif significand <= 2 ** 63: |
cd1b3f63 | 270 | while pow10 > 0 and significand <= 2 ** 63: |
99155935 BP |
271 | significand *= 10 |
272 | pow10 -= 1 | |
273 | while pow10 < 0 and significand % 10 == 0: | |
274 | significand /= 10 | |
275 | pow10 += 1 | |
276 | if (pow10 == 0 and | |
26bb0f31 EJ |
277 | ((not sign and significand < 2 ** 63) or |
278 | (sign and significand <= 2 ** 63))): | |
99155935 BP |
279 | if sign: |
280 | self.__parser_input(-significand) | |
281 | else: | |
282 | self.__parser_input(significand) | |
283 | return | |
284 | ||
285 | value = float(s) | |
286 | if value == float("inf") or value == float("-inf"): | |
287 | self.__error("number outside valid range") | |
288 | return | |
289 | if value == 0: | |
290 | # Suppress negative zero. | |
291 | value = 0 | |
292 | self.__parser_input(value) | |
293 | elif re.match("-?0[0-9]", s): | |
294 | self.__error("leading zeros not allowed") | |
295 | elif re.match("-([^0-9]|$)", s): | |
296 | self.__error("'-' must be followed by digit") | |
297 | elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s): | |
298 | self.__error("decimal point must be followed by digit") | |
299 | elif re.search("e[-+]?([^0-9]|$)", s): | |
300 | self.__error("exponent must contain at least one digit") | |
301 | else: | |
302 | self.__error("syntax error in number") | |
26bb0f31 | 303 | |
99155935 BP |
304 | def __lex_number(self, c): |
305 | if c in ".0123456789eE-+": | |
306 | self.buffer += c | |
307 | return True | |
308 | else: | |
309 | self.__lex_finish_number() | |
310 | return False | |
311 | ||
312 | __4hex_re = re.compile("[0-9a-fA-F]{4}") | |
26bb0f31 | 313 | |
99155935 BP |
314 | def __lex_4hex(self, s): |
315 | if len(s) < 4: | |
316 | self.__error("quoted string ends within \\u escape") | |
317 | elif not Parser.__4hex_re.match(s): | |
318 | self.__error("malformed \\u escape") | |
319 | elif s == "0000": | |
320 | self.__error("null bytes not supported in quoted strings") | |
321 | else: | |
322 | return int(s, 16) | |
26bb0f31 | 323 | |
99155935 BP |
324 | @staticmethod |
325 | def __is_leading_surrogate(c): | |
326 | """Returns true if 'c' is a Unicode code point for a leading | |
327 | surrogate.""" | |
328 | return c >= 0xd800 and c <= 0xdbff | |
26bb0f31 | 329 | |
99155935 BP |
330 | @staticmethod |
331 | def __is_trailing_surrogate(c): | |
332 | """Returns true if 'c' is a Unicode code point for a trailing | |
333 | surrogate.""" | |
334 | return c >= 0xdc00 and c <= 0xdfff | |
26bb0f31 | 335 | |
99155935 BP |
336 | @staticmethod |
337 | def __utf16_decode_surrogate_pair(leading, trailing): | |
338 | """Returns the unicode code point corresponding to leading surrogate | |
339 | 'leading' and trailing surrogate 'trailing'. The return value will not | |
340 | make any sense if 'leading' or 'trailing' are not in the correct ranges | |
341 | for leading or trailing surrogates.""" | |
342 | # Leading surrogate: 110110wwwwxxxxxx | |
343 | # Trailing surrogate: 110111xxxxxxxxxx | |
344 | # Code point: 000uuuuuxxxxxxxxxxxxxxxx | |
345 | w = (leading >> 6) & 0xf | |
346 | u = w + 1 | |
347 | x0 = leading & 0x3f | |
348 | x1 = trailing & 0x3ff | |
349 | return (u << 16) | (x0 << 10) | x1 | |
350 | __unescape = {'"': u'"', | |
351 | "\\": u"\\", | |
352 | "/": u"/", | |
353 | "b": u"\b", | |
354 | "f": u"\f", | |
355 | "n": u"\n", | |
356 | "r": u"\r", | |
357 | "t": u"\t"} | |
26bb0f31 | 358 | |
99155935 BP |
359 | def __lex_finish_string(self): |
360 | inp = self.buffer | |
361 | out = u"" | |
362 | while len(inp): | |
363 | backslash = inp.find('\\') | |
364 | if backslash == -1: | |
365 | out += inp | |
366 | break | |
367 | out += inp[:backslash] | |
368 | inp = inp[backslash + 1:] | |
369 | if inp == "": | |
370 | self.__error("quoted string may not end with backslash") | |
371 | return | |
372 | ||
373 | replacement = Parser.__unescape.get(inp[0]) | |
374 | if replacement is not None: | |
375 | out += replacement | |
376 | inp = inp[1:] | |
377 | continue | |
378 | elif inp[0] != u'u': | |
379 | self.__error("bad escape \\%s" % inp[0]) | |
380 | return | |
26bb0f31 | 381 | |
99155935 BP |
382 | c0 = self.__lex_4hex(inp[1:5]) |
383 | if c0 is None: | |
384 | return | |
385 | inp = inp[5:] | |
386 | ||
387 | if Parser.__is_leading_surrogate(c0): | |
388 | if inp[:2] != u'\\u': | |
389 | self.__error("malformed escaped surrogate pair") | |
390 | return | |
391 | c1 = self.__lex_4hex(inp[2:6]) | |
392 | if c1 is None: | |
393 | return | |
394 | if not Parser.__is_trailing_surrogate(c1): | |
395 | self.__error("second half of escaped surrogate pair is " | |
396 | "not trailing surrogate") | |
397 | return | |
398 | code_point = Parser.__utf16_decode_surrogate_pair(c0, c1) | |
399 | inp = inp[6:] | |
400 | else: | |
401 | code_point = c0 | |
402 | out += unichr(code_point) | |
403 | self.__parser_input('string', out) | |
404 | ||
405 | def __lex_string_escape(self, c): | |
406 | self.buffer += c | |
407 | self.lex_state = Parser.__lex_string | |
408 | return True | |
26bb0f31 | 409 | |
99155935 BP |
410 | def __lex_string(self, c): |
411 | if c == '\\': | |
412 | self.buffer += c | |
413 | self.lex_state = Parser.__lex_string_escape | |
414 | elif c == '"': | |
415 | self.__lex_finish_string() | |
416 | elif ord(c) >= 0x20: | |
417 | self.buffer += c | |
418 | else: | |
419 | self.__error("U+%04X must be escaped in quoted string" % ord(c)) | |
420 | return True | |
421 | ||
422 | def __lex_input(self, c): | |
423 | self.byte_number += 1 | |
424 | if c == '\n': | |
425 | self.column_number = 0 | |
426 | self.line_number += 1 | |
427 | else: | |
428 | self.column_number += 1 | |
429 | ||
430 | eat = self.lex_state(self, c) | |
431 | assert eat is True or eat is False | |
432 | return eat | |
433 | ||
28c781df | 434 | def __parse_start(self, token, unused_string): |
99155935 BP |
435 | if token == '{': |
436 | self.__push_object() | |
437 | elif token == '[': | |
438 | self.__push_array() | |
439 | else: | |
440 | self.__error("syntax error at beginning of input") | |
26bb0f31 | 441 | |
28c781df | 442 | def __parse_end(self, unused_token, unused_string): |
99155935 | 443 | self.__error("trailing garbage at end of input") |
26bb0f31 | 444 | |
99155935 BP |
445 | def __parse_object_init(self, token, string): |
446 | if token == '}': | |
447 | self.__parser_pop() | |
448 | else: | |
449 | self.__parse_object_name(token, string) | |
26bb0f31 | 450 | |
99155935 BP |
451 | def __parse_object_name(self, token, string): |
452 | if token == 'string': | |
453 | self.member_name = string | |
454 | self.parse_state = Parser.__parse_object_colon | |
455 | else: | |
456 | self.__error("syntax error parsing object expecting string") | |
26bb0f31 | 457 | |
28c781df | 458 | def __parse_object_colon(self, token, unused_string): |
99155935 BP |
459 | if token == ":": |
460 | self.parse_state = Parser.__parse_object_value | |
461 | else: | |
462 | self.__error("syntax error parsing object expecting ':'") | |
26bb0f31 | 463 | |
99155935 BP |
464 | def __parse_object_value(self, token, string): |
465 | self.__parse_value(token, string, Parser.__parse_object_next) | |
26bb0f31 | 466 | |
28c781df | 467 | def __parse_object_next(self, token, unused_string): |
99155935 BP |
468 | if token == ",": |
469 | self.parse_state = Parser.__parse_object_name | |
470 | elif token == "}": | |
471 | self.__parser_pop() | |
472 | else: | |
473 | self.__error("syntax error expecting '}' or ','") | |
26bb0f31 | 474 | |
99155935 BP |
475 | def __parse_array_init(self, token, string): |
476 | if token == ']': | |
477 | self.__parser_pop() | |
478 | else: | |
479 | self.__parse_array_value(token, string) | |
26bb0f31 | 480 | |
99155935 BP |
481 | def __parse_array_value(self, token, string): |
482 | self.__parse_value(token, string, Parser.__parse_array_next) | |
26bb0f31 | 483 | |
28c781df | 484 | def __parse_array_next(self, token, unused_string): |
99155935 BP |
485 | if token == ",": |
486 | self.parse_state = Parser.__parse_array_value | |
487 | elif token == "]": | |
488 | self.__parser_pop() | |
489 | else: | |
490 | self.__error("syntax error expecting ']' or ','") | |
26bb0f31 | 491 | |
99155935 BP |
492 | def __parser_input(self, token, string=None): |
493 | self.lex_state = Parser.__lex_start | |
494 | self.buffer = "" | |
99155935 | 495 | self.parse_state(self, token, string) |
99155935 BP |
496 | |
497 | def __put_value(self, value): | |
498 | top = self.stack[-1] | |
499 | if type(top) == dict: | |
500 | top[self.member_name] = value | |
501 | else: | |
502 | top.append(value) | |
503 | ||
504 | def __parser_push(self, new_json, next_state): | |
505 | if len(self.stack) < Parser.MAX_HEIGHT: | |
506 | if len(self.stack) > 0: | |
507 | self.__put_value(new_json) | |
508 | self.stack.append(new_json) | |
509 | self.parse_state = next_state | |
510 | else: | |
511 | self.__error("input exceeds maximum nesting depth %d" % | |
512 | Parser.MAX_HEIGHT) | |
26bb0f31 | 513 | |
99155935 BP |
514 | def __push_object(self): |
515 | self.__parser_push({}, Parser.__parse_object_init) | |
26bb0f31 | 516 | |
99155935 BP |
517 | def __push_array(self): |
518 | self.__parser_push([], Parser.__parse_array_init) | |
519 | ||
520 | def __parser_pop(self): | |
521 | if len(self.stack) == 1: | |
522 | self.parse_state = Parser.__parse_end | |
523 | if not self.check_trailer: | |
524 | self.done = True | |
525 | else: | |
526 | self.stack.pop() | |
527 | top = self.stack[-1] | |
528 | if type(top) == list: | |
529 | self.parse_state = Parser.__parse_array_next | |
530 | else: | |
531 | self.parse_state = Parser.__parse_object_next | |
532 | ||
533 | def __parse_value(self, token, string, next_state): | |
534 | if token in [False, None, True] or type(token) in [int, long, float]: | |
535 | self.__put_value(token) | |
536 | elif token == 'string': | |
537 | self.__put_value(string) | |
538 | else: | |
539 | if token == '{': | |
540 | self.__push_object() | |
541 | elif token == '[': | |
542 | self.__push_array() | |
543 | else: | |
544 | self.__error("syntax error expecting value") | |
545 | return | |
546 | self.parse_state = next_state | |
547 | ||
548 | def __error(self, message): | |
549 | if self.error is None: | |
550 | self.error = ("line %d, column %d, byte %d: %s" | |
551 | % (self.line_number, self.column_number, | |
552 | self.byte_number, message)) | |
553 | self.done = True | |
554 | ||
555 | def feed(self, s): | |
556 | i = 0 | |
557 | while True: | |
558 | if self.done or i >= len(s): | |
559 | return i | |
560 | if self.__lex_input(s[i]): | |
561 | i += 1 | |
562 | ||
563 | def is_done(self): | |
564 | return self.done | |
565 | ||
566 | def finish(self): | |
567 | if self.lex_state == Parser.__lex_start: | |
568 | pass | |
569 | elif self.lex_state in (Parser.__lex_string, | |
570 | Parser.__lex_string_escape): | |
571 | self.__error("unexpected end of input in quoted string") | |
572 | else: | |
573 | self.__lex_input(" ") | |
574 | ||
575 | if self.parse_state == Parser.__parse_start: | |
576 | self.__error("empty input stream") | |
577 | elif self.parse_state != Parser.__parse_end: | |
578 | self.__error("unexpected end of input") | |
579 | ||
580 | if self.error == None: | |
581 | assert len(self.stack) == 1 | |
582 | return self.stack.pop() | |
583 | else: | |
584 | return self.error |