]>
Commit | Line | Data |
---|---|---|
be44585c | 1 | # Copyright (c) 2010, 2011 Nicira Networks |
99155935 BP |
2 | # |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at: | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | ||
15 | import re | |
16 | import StringIO | |
17 | import sys | |
18 | ||
19 | escapes = {ord('"'): u"\\\"", | |
20 | ord("\\"): u"\\\\", | |
21 | ord("\b"): u"\\b", | |
22 | ord("\f"): u"\\f", | |
23 | ord("\n"): u"\\n", | |
24 | ord("\r"): u"\\r", | |
25 | ord("\t"): u"\\t"} | |
26 | for i in range(32): | |
27 | if i not in escapes: | |
28 | escapes[i] = u"\\u%04x" % i | |
29 | ||
30 | def __dump_string(stream, s): | |
31 | stream.write(u"\"") | |
32 | for c in s: | |
33 | x = ord(c) | |
34 | escape = escapes.get(x) | |
35 | if escape: | |
36 | stream.write(escape) | |
37 | else: | |
38 | stream.write(c) | |
39 | stream.write(u"\"") | |
40 | ||
41 | def to_stream(obj, stream, pretty=False, sort_keys=True): | |
42 | if obj is None: | |
43 | stream.write(u"null") | |
44 | elif obj is False: | |
45 | stream.write(u"false") | |
46 | elif obj is True: | |
47 | stream.write(u"true") | |
48 | elif type(obj) in (int, long): | |
49 | stream.write(u"%d" % obj) | |
50 | elif type(obj) == float: | |
51 | stream.write("%.15g" % obj) | |
52 | elif type(obj) == unicode: | |
53 | __dump_string(stream, obj) | |
54 | elif type(obj) == str: | |
55 | __dump_string(stream, unicode(obj)) | |
56 | elif type(obj) == dict: | |
57 | stream.write(u"{") | |
58 | if sort_keys: | |
59 | items = sorted(obj.items()) | |
60 | else: | |
61 | items = obj.iteritems() | |
62 | i = 0 | |
63 | for key, value in items: | |
64 | if i > 0: | |
65 | stream.write(u",") | |
66 | i += 1 | |
67 | __dump_string(stream, unicode(key)) | |
68 | stream.write(u":") | |
69 | to_stream(value, stream, pretty, sort_keys) | |
70 | stream.write(u"}") | |
71 | elif type(obj) in (list, tuple): | |
72 | stream.write(u"[") | |
73 | i = 0 | |
74 | for value in obj: | |
75 | if i > 0: | |
76 | stream.write(u",") | |
77 | i += 1 | |
78 | to_stream(value, stream, pretty, sort_keys) | |
79 | stream.write(u"]") | |
80 | else: | |
81 | raise Error("can't serialize %s as JSON" % obj) | |
82 | ||
83 | def to_file(obj, name, pretty=False, sort_keys=True): | |
84 | stream = open(name, "w") | |
85 | try: | |
86 | to_stream(obj, stream, pretty, sort_keys) | |
87 | finally: | |
88 | stream.close() | |
89 | ||
90 | def to_string(obj, pretty=False, sort_keys=True): | |
91 | output = StringIO.StringIO() | |
92 | to_stream(obj, output, pretty, sort_keys) | |
93 | s = output.getvalue() | |
94 | output.close() | |
95 | return s | |
96 | ||
97 | def from_stream(stream): | |
98 | p = Parser(check_trailer=True) | |
99 | while True: | |
100 | buf = stream.read(4096) | |
101 | if buf == "" or p.feed(buf) != len(buf): | |
102 | break | |
103 | return p.finish() | |
104 | ||
105 | def from_file(name): | |
106 | stream = open(name, "r") | |
107 | try: | |
108 | return from_stream(stream) | |
109 | finally: | |
110 | stream.close() | |
111 | ||
112 | def from_string(s): | |
113 | try: | |
114 | s = unicode(s, 'utf-8') | |
115 | except UnicodeDecodeError, e: | |
116 | seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]]) | |
be44585c | 117 | return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq) |
99155935 BP |
118 | p = Parser(check_trailer=True) |
119 | p.feed(s) | |
120 | return p.finish() | |
121 | ||
122 | class Parser(object): | |
123 | ## Maximum height of parsing stack. ## | |
124 | MAX_HEIGHT = 1000 | |
125 | ||
126 | def __init__(self, check_trailer=False): | |
127 | self.check_trailer = check_trailer | |
128 | ||
129 | # Lexical analysis. | |
130 | self.lex_state = Parser.__lex_start | |
131 | self.buffer = "" | |
132 | self.line_number = 0 | |
133 | self.column_number = 0 | |
134 | self.byte_number = 0 | |
135 | ||
136 | # Parsing. | |
137 | self.parse_state = Parser.__parse_start | |
138 | self.stack = [] | |
139 | self.member_name = None | |
140 | ||
141 | # Parse status. | |
142 | self.done = False | |
143 | self.error = None | |
144 | ||
145 | def __lex_start_space(self, c): | |
146 | pass | |
147 | def __lex_start_alpha(self, c): | |
148 | self.buffer = c | |
149 | self.lex_state = Parser.__lex_keyword | |
150 | def __lex_start_token(self, c): | |
151 | self.__parser_input(c) | |
152 | def __lex_start_number(self, c): | |
153 | self.buffer = c | |
154 | self.lex_state = Parser.__lex_number | |
155 | def __lex_start_string(self, c): | |
156 | self.lex_state = Parser.__lex_string | |
157 | def __lex_start_error(self, c): | |
158 | if ord(c) >= 32 and ord(c) < 128: | |
159 | self.__error("invalid character '%s'" % c) | |
160 | else: | |
161 | self.__error("invalid character U+%04x" % ord(c)) | |
162 | ||
163 | __lex_start_actions = {} | |
164 | for c in " \t\n\r": | |
165 | __lex_start_actions[c] = __lex_start_space | |
166 | for c in "abcdefghijklmnopqrstuvwxyz": | |
167 | __lex_start_actions[c] = __lex_start_alpha | |
168 | for c in "[{]}:,": | |
169 | __lex_start_actions[c] = __lex_start_token | |
170 | for c in "-0123456789": | |
171 | __lex_start_actions[c] = __lex_start_number | |
172 | __lex_start_actions['"'] = __lex_start_string | |
173 | def __lex_start(self, c): | |
174 | Parser.__lex_start_actions.get( | |
175 | c, Parser.__lex_start_error)(self, c) | |
176 | return True | |
177 | ||
178 | __lex_alpha = {} | |
179 | for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
180 | __lex_alpha[c] = True | |
181 | def __lex_finish_keyword(self): | |
182 | if self.buffer == "false": | |
183 | self.__parser_input(False) | |
184 | elif self.buffer == "true": | |
185 | self.__parser_input(True) | |
186 | elif self.buffer == "null": | |
187 | self.__parser_input(None) | |
188 | else: | |
189 | self.__error("invalid keyword '%s'" % self.buffer) | |
190 | def __lex_keyword(self, c): | |
191 | if c in Parser.__lex_alpha: | |
192 | self.buffer += c | |
193 | return True | |
194 | else: | |
195 | self.__lex_finish_keyword() | |
196 | return False | |
197 | ||
198 | __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$") | |
199 | def __lex_finish_number(self): | |
200 | s = self.buffer | |
201 | m = Parser.__number_re.match(s) | |
202 | if m: | |
203 | sign, integer, fraction, exp = m.groups() | |
204 | if (exp is not None and | |
205 | (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)): | |
206 | self.__error("exponent outside valid range") | |
207 | return | |
208 | ||
209 | if fraction is not None and len(fraction.lstrip('0')) == 0: | |
210 | fraction = None | |
211 | ||
212 | sig_string = integer | |
213 | if fraction is not None: | |
214 | sig_string += fraction | |
215 | significand = int(sig_string) | |
216 | ||
217 | pow10 = 0 | |
218 | if fraction is not None: | |
219 | pow10 -= len(fraction) | |
220 | if exp is not None: | |
221 | pow10 += long(exp) | |
222 | ||
223 | if significand == 0: | |
224 | self.__parser_input(0) | |
225 | return | |
226 | elif significand <= 2**63: | |
227 | while pow10 > 0 and significand <= 2*63: | |
228 | significand *= 10 | |
229 | pow10 -= 1 | |
230 | while pow10 < 0 and significand % 10 == 0: | |
231 | significand /= 10 | |
232 | pow10 += 1 | |
233 | if (pow10 == 0 and | |
234 | ((not sign and significand < 2**63) or | |
235 | (sign and significand <= 2**63))): | |
236 | if sign: | |
237 | self.__parser_input(-significand) | |
238 | else: | |
239 | self.__parser_input(significand) | |
240 | return | |
241 | ||
242 | value = float(s) | |
243 | if value == float("inf") or value == float("-inf"): | |
244 | self.__error("number outside valid range") | |
245 | return | |
246 | if value == 0: | |
247 | # Suppress negative zero. | |
248 | value = 0 | |
249 | self.__parser_input(value) | |
250 | elif re.match("-?0[0-9]", s): | |
251 | self.__error("leading zeros not allowed") | |
252 | elif re.match("-([^0-9]|$)", s): | |
253 | self.__error("'-' must be followed by digit") | |
254 | elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s): | |
255 | self.__error("decimal point must be followed by digit") | |
256 | elif re.search("e[-+]?([^0-9]|$)", s): | |
257 | self.__error("exponent must contain at least one digit") | |
258 | else: | |
259 | self.__error("syntax error in number") | |
260 | ||
261 | def __lex_number(self, c): | |
262 | if c in ".0123456789eE-+": | |
263 | self.buffer += c | |
264 | return True | |
265 | else: | |
266 | self.__lex_finish_number() | |
267 | return False | |
268 | ||
269 | __4hex_re = re.compile("[0-9a-fA-F]{4}") | |
270 | def __lex_4hex(self, s): | |
271 | if len(s) < 4: | |
272 | self.__error("quoted string ends within \\u escape") | |
273 | elif not Parser.__4hex_re.match(s): | |
274 | self.__error("malformed \\u escape") | |
275 | elif s == "0000": | |
276 | self.__error("null bytes not supported in quoted strings") | |
277 | else: | |
278 | return int(s, 16) | |
279 | @staticmethod | |
280 | def __is_leading_surrogate(c): | |
281 | """Returns true if 'c' is a Unicode code point for a leading | |
282 | surrogate.""" | |
283 | return c >= 0xd800 and c <= 0xdbff | |
284 | @staticmethod | |
285 | def __is_trailing_surrogate(c): | |
286 | """Returns true if 'c' is a Unicode code point for a trailing | |
287 | surrogate.""" | |
288 | return c >= 0xdc00 and c <= 0xdfff | |
289 | @staticmethod | |
290 | def __utf16_decode_surrogate_pair(leading, trailing): | |
291 | """Returns the unicode code point corresponding to leading surrogate | |
292 | 'leading' and trailing surrogate 'trailing'. The return value will not | |
293 | make any sense if 'leading' or 'trailing' are not in the correct ranges | |
294 | for leading or trailing surrogates.""" | |
295 | # Leading surrogate: 110110wwwwxxxxxx | |
296 | # Trailing surrogate: 110111xxxxxxxxxx | |
297 | # Code point: 000uuuuuxxxxxxxxxxxxxxxx | |
298 | w = (leading >> 6) & 0xf | |
299 | u = w + 1 | |
300 | x0 = leading & 0x3f | |
301 | x1 = trailing & 0x3ff | |
302 | return (u << 16) | (x0 << 10) | x1 | |
303 | __unescape = {'"': u'"', | |
304 | "\\": u"\\", | |
305 | "/": u"/", | |
306 | "b": u"\b", | |
307 | "f": u"\f", | |
308 | "n": u"\n", | |
309 | "r": u"\r", | |
310 | "t": u"\t"} | |
311 | def __lex_finish_string(self): | |
312 | inp = self.buffer | |
313 | out = u"" | |
314 | while len(inp): | |
315 | backslash = inp.find('\\') | |
316 | if backslash == -1: | |
317 | out += inp | |
318 | break | |
319 | out += inp[:backslash] | |
320 | inp = inp[backslash + 1:] | |
321 | if inp == "": | |
322 | self.__error("quoted string may not end with backslash") | |
323 | return | |
324 | ||
325 | replacement = Parser.__unescape.get(inp[0]) | |
326 | if replacement is not None: | |
327 | out += replacement | |
328 | inp = inp[1:] | |
329 | continue | |
330 | elif inp[0] != u'u': | |
331 | self.__error("bad escape \\%s" % inp[0]) | |
332 | return | |
333 | ||
334 | c0 = self.__lex_4hex(inp[1:5]) | |
335 | if c0 is None: | |
336 | return | |
337 | inp = inp[5:] | |
338 | ||
339 | if Parser.__is_leading_surrogate(c0): | |
340 | if inp[:2] != u'\\u': | |
341 | self.__error("malformed escaped surrogate pair") | |
342 | return | |
343 | c1 = self.__lex_4hex(inp[2:6]) | |
344 | if c1 is None: | |
345 | return | |
346 | if not Parser.__is_trailing_surrogate(c1): | |
347 | self.__error("second half of escaped surrogate pair is " | |
348 | "not trailing surrogate") | |
349 | return | |
350 | code_point = Parser.__utf16_decode_surrogate_pair(c0, c1) | |
351 | inp = inp[6:] | |
352 | else: | |
353 | code_point = c0 | |
354 | out += unichr(code_point) | |
355 | self.__parser_input('string', out) | |
356 | ||
357 | def __lex_string_escape(self, c): | |
358 | self.buffer += c | |
359 | self.lex_state = Parser.__lex_string | |
360 | return True | |
361 | def __lex_string(self, c): | |
362 | if c == '\\': | |
363 | self.buffer += c | |
364 | self.lex_state = Parser.__lex_string_escape | |
365 | elif c == '"': | |
366 | self.__lex_finish_string() | |
367 | elif ord(c) >= 0x20: | |
368 | self.buffer += c | |
369 | else: | |
370 | self.__error("U+%04X must be escaped in quoted string" % ord(c)) | |
371 | return True | |
372 | ||
373 | def __lex_input(self, c): | |
374 | self.byte_number += 1 | |
375 | if c == '\n': | |
376 | self.column_number = 0 | |
377 | self.line_number += 1 | |
378 | else: | |
379 | self.column_number += 1 | |
380 | ||
381 | eat = self.lex_state(self, c) | |
382 | assert eat is True or eat is False | |
383 | return eat | |
384 | ||
385 | def __parse_start(self, token, string): | |
386 | if token == '{': | |
387 | self.__push_object() | |
388 | elif token == '[': | |
389 | self.__push_array() | |
390 | else: | |
391 | self.__error("syntax error at beginning of input") | |
392 | def __parse_end(self, token, string): | |
393 | self.__error("trailing garbage at end of input") | |
394 | def __parse_object_init(self, token, string): | |
395 | if token == '}': | |
396 | self.__parser_pop() | |
397 | else: | |
398 | self.__parse_object_name(token, string) | |
399 | def __parse_object_name(self, token, string): | |
400 | if token == 'string': | |
401 | self.member_name = string | |
402 | self.parse_state = Parser.__parse_object_colon | |
403 | else: | |
404 | self.__error("syntax error parsing object expecting string") | |
405 | def __parse_object_colon(self, token, string): | |
406 | if token == ":": | |
407 | self.parse_state = Parser.__parse_object_value | |
408 | else: | |
409 | self.__error("syntax error parsing object expecting ':'") | |
410 | def __parse_object_value(self, token, string): | |
411 | self.__parse_value(token, string, Parser.__parse_object_next) | |
412 | def __parse_object_next(self, token, string): | |
413 | if token == ",": | |
414 | self.parse_state = Parser.__parse_object_name | |
415 | elif token == "}": | |
416 | self.__parser_pop() | |
417 | else: | |
418 | self.__error("syntax error expecting '}' or ','") | |
419 | def __parse_array_init(self, token, string): | |
420 | if token == ']': | |
421 | self.__parser_pop() | |
422 | else: | |
423 | self.__parse_array_value(token, string) | |
424 | def __parse_array_value(self, token, string): | |
425 | self.__parse_value(token, string, Parser.__parse_array_next) | |
426 | def __parse_array_next(self, token, string): | |
427 | if token == ",": | |
428 | self.parse_state = Parser.__parse_array_value | |
429 | elif token == "]": | |
430 | self.__parser_pop() | |
431 | else: | |
432 | self.__error("syntax error expecting ']' or ','") | |
433 | def __parser_input(self, token, string=None): | |
434 | self.lex_state = Parser.__lex_start | |
435 | self.buffer = "" | |
436 | #old_state = self.parse_state | |
437 | self.parse_state(self, token, string) | |
438 | #print ("token=%s string=%s old_state=%s new_state=%s" | |
439 | # % (token, string, old_state, self.parse_state)) | |
440 | ||
441 | def __put_value(self, value): | |
442 | top = self.stack[-1] | |
443 | if type(top) == dict: | |
444 | top[self.member_name] = value | |
445 | else: | |
446 | top.append(value) | |
447 | ||
448 | def __parser_push(self, new_json, next_state): | |
449 | if len(self.stack) < Parser.MAX_HEIGHT: | |
450 | if len(self.stack) > 0: | |
451 | self.__put_value(new_json) | |
452 | self.stack.append(new_json) | |
453 | self.parse_state = next_state | |
454 | else: | |
455 | self.__error("input exceeds maximum nesting depth %d" % | |
456 | Parser.MAX_HEIGHT) | |
457 | def __push_object(self): | |
458 | self.__parser_push({}, Parser.__parse_object_init) | |
459 | def __push_array(self): | |
460 | self.__parser_push([], Parser.__parse_array_init) | |
461 | ||
462 | def __parser_pop(self): | |
463 | if len(self.stack) == 1: | |
464 | self.parse_state = Parser.__parse_end | |
465 | if not self.check_trailer: | |
466 | self.done = True | |
467 | else: | |
468 | self.stack.pop() | |
469 | top = self.stack[-1] | |
470 | if type(top) == list: | |
471 | self.parse_state = Parser.__parse_array_next | |
472 | else: | |
473 | self.parse_state = Parser.__parse_object_next | |
474 | ||
475 | def __parse_value(self, token, string, next_state): | |
476 | if token in [False, None, True] or type(token) in [int, long, float]: | |
477 | self.__put_value(token) | |
478 | elif token == 'string': | |
479 | self.__put_value(string) | |
480 | else: | |
481 | if token == '{': | |
482 | self.__push_object() | |
483 | elif token == '[': | |
484 | self.__push_array() | |
485 | else: | |
486 | self.__error("syntax error expecting value") | |
487 | return | |
488 | self.parse_state = next_state | |
489 | ||
490 | def __error(self, message): | |
491 | if self.error is None: | |
492 | self.error = ("line %d, column %d, byte %d: %s" | |
493 | % (self.line_number, self.column_number, | |
494 | self.byte_number, message)) | |
495 | self.done = True | |
496 | ||
497 | def feed(self, s): | |
498 | i = 0 | |
499 | while True: | |
500 | if self.done or i >= len(s): | |
501 | return i | |
502 | if self.__lex_input(s[i]): | |
503 | i += 1 | |
504 | ||
505 | def is_done(self): | |
506 | return self.done | |
507 | ||
508 | def finish(self): | |
509 | if self.lex_state == Parser.__lex_start: | |
510 | pass | |
511 | elif self.lex_state in (Parser.__lex_string, | |
512 | Parser.__lex_string_escape): | |
513 | self.__error("unexpected end of input in quoted string") | |
514 | else: | |
515 | self.__lex_input(" ") | |
516 | ||
517 | if self.parse_state == Parser.__parse_start: | |
518 | self.__error("empty input stream") | |
519 | elif self.parse_state != Parser.__parse_end: | |
520 | self.__error("unexpected end of input") | |
521 | ||
522 | if self.error == None: | |
523 | assert len(self.stack) == 1 | |
524 | return self.stack.pop() | |
525 | else: | |
526 | return self.error |