]> git.proxmox.com Git - ovs.git/blame - python/ovs/json.py
debian: Remove obsolete reference to var/log/core.
[ovs.git] / python / ovs / json.py
CommitLineData
be44585c 1# Copyright (c) 2010, 2011 Nicira Networks
99155935
BP
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import re
16import StringIO
17import sys
18
19escapes = {ord('"'): u"\\\"",
20 ord("\\"): u"\\\\",
21 ord("\b"): u"\\b",
22 ord("\f"): u"\\f",
23 ord("\n"): u"\\n",
24 ord("\r"): u"\\r",
25 ord("\t"): u"\\t"}
26for i in range(32):
27 if i not in escapes:
28 escapes[i] = u"\\u%04x" % i
29
30def __dump_string(stream, s):
31 stream.write(u"\"")
32 for c in s:
33 x = ord(c)
34 escape = escapes.get(x)
35 if escape:
36 stream.write(escape)
37 else:
38 stream.write(c)
39 stream.write(u"\"")
40
41def to_stream(obj, stream, pretty=False, sort_keys=True):
42 if obj is None:
43 stream.write(u"null")
44 elif obj is False:
45 stream.write(u"false")
46 elif obj is True:
47 stream.write(u"true")
48 elif type(obj) in (int, long):
49 stream.write(u"%d" % obj)
50 elif type(obj) == float:
51 stream.write("%.15g" % obj)
52 elif type(obj) == unicode:
53 __dump_string(stream, obj)
54 elif type(obj) == str:
55 __dump_string(stream, unicode(obj))
56 elif type(obj) == dict:
57 stream.write(u"{")
58 if sort_keys:
59 items = sorted(obj.items())
60 else:
61 items = obj.iteritems()
62 i = 0
63 for key, value in items:
64 if i > 0:
65 stream.write(u",")
66 i += 1
67 __dump_string(stream, unicode(key))
68 stream.write(u":")
69 to_stream(value, stream, pretty, sort_keys)
70 stream.write(u"}")
71 elif type(obj) in (list, tuple):
72 stream.write(u"[")
73 i = 0
74 for value in obj:
75 if i > 0:
76 stream.write(u",")
77 i += 1
78 to_stream(value, stream, pretty, sort_keys)
79 stream.write(u"]")
80 else:
81 raise Error("can't serialize %s as JSON" % obj)
82
83def to_file(obj, name, pretty=False, sort_keys=True):
84 stream = open(name, "w")
85 try:
86 to_stream(obj, stream, pretty, sort_keys)
87 finally:
88 stream.close()
89
90def to_string(obj, pretty=False, sort_keys=True):
91 output = StringIO.StringIO()
92 to_stream(obj, output, pretty, sort_keys)
93 s = output.getvalue()
94 output.close()
95 return s
96
97def from_stream(stream):
98 p = Parser(check_trailer=True)
99 while True:
100 buf = stream.read(4096)
101 if buf == "" or p.feed(buf) != len(buf):
102 break
103 return p.finish()
104
105def from_file(name):
106 stream = open(name, "r")
107 try:
108 return from_stream(stream)
109 finally:
110 stream.close()
111
112def from_string(s):
113 try:
114 s = unicode(s, 'utf-8')
115 except UnicodeDecodeError, e:
116 seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
be44585c 117 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
99155935
BP
118 p = Parser(check_trailer=True)
119 p.feed(s)
120 return p.finish()
121
122class Parser(object):
123 ## Maximum height of parsing stack. ##
124 MAX_HEIGHT = 1000
125
126 def __init__(self, check_trailer=False):
127 self.check_trailer = check_trailer
128
129 # Lexical analysis.
130 self.lex_state = Parser.__lex_start
131 self.buffer = ""
132 self.line_number = 0
133 self.column_number = 0
134 self.byte_number = 0
135
136 # Parsing.
137 self.parse_state = Parser.__parse_start
138 self.stack = []
139 self.member_name = None
140
141 # Parse status.
142 self.done = False
143 self.error = None
144
145 def __lex_start_space(self, c):
146 pass
147 def __lex_start_alpha(self, c):
148 self.buffer = c
149 self.lex_state = Parser.__lex_keyword
150 def __lex_start_token(self, c):
151 self.__parser_input(c)
152 def __lex_start_number(self, c):
153 self.buffer = c
154 self.lex_state = Parser.__lex_number
155 def __lex_start_string(self, c):
156 self.lex_state = Parser.__lex_string
157 def __lex_start_error(self, c):
158 if ord(c) >= 32 and ord(c) < 128:
159 self.__error("invalid character '%s'" % c)
160 else:
161 self.__error("invalid character U+%04x" % ord(c))
162
163 __lex_start_actions = {}
164 for c in " \t\n\r":
165 __lex_start_actions[c] = __lex_start_space
166 for c in "abcdefghijklmnopqrstuvwxyz":
167 __lex_start_actions[c] = __lex_start_alpha
168 for c in "[{]}:,":
169 __lex_start_actions[c] = __lex_start_token
170 for c in "-0123456789":
171 __lex_start_actions[c] = __lex_start_number
172 __lex_start_actions['"'] = __lex_start_string
173 def __lex_start(self, c):
174 Parser.__lex_start_actions.get(
175 c, Parser.__lex_start_error)(self, c)
176 return True
177
178 __lex_alpha = {}
179 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
180 __lex_alpha[c] = True
181 def __lex_finish_keyword(self):
182 if self.buffer == "false":
183 self.__parser_input(False)
184 elif self.buffer == "true":
185 self.__parser_input(True)
186 elif self.buffer == "null":
187 self.__parser_input(None)
188 else:
189 self.__error("invalid keyword '%s'" % self.buffer)
190 def __lex_keyword(self, c):
191 if c in Parser.__lex_alpha:
192 self.buffer += c
193 return True
194 else:
195 self.__lex_finish_keyword()
196 return False
197
198 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
199 def __lex_finish_number(self):
200 s = self.buffer
201 m = Parser.__number_re.match(s)
202 if m:
203 sign, integer, fraction, exp = m.groups()
204 if (exp is not None and
205 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
206 self.__error("exponent outside valid range")
207 return
208
209 if fraction is not None and len(fraction.lstrip('0')) == 0:
210 fraction = None
211
212 sig_string = integer
213 if fraction is not None:
214 sig_string += fraction
215 significand = int(sig_string)
216
217 pow10 = 0
218 if fraction is not None:
219 pow10 -= len(fraction)
220 if exp is not None:
221 pow10 += long(exp)
222
223 if significand == 0:
224 self.__parser_input(0)
225 return
226 elif significand <= 2**63:
227 while pow10 > 0 and significand <= 2*63:
228 significand *= 10
229 pow10 -= 1
230 while pow10 < 0 and significand % 10 == 0:
231 significand /= 10
232 pow10 += 1
233 if (pow10 == 0 and
234 ((not sign and significand < 2**63) or
235 (sign and significand <= 2**63))):
236 if sign:
237 self.__parser_input(-significand)
238 else:
239 self.__parser_input(significand)
240 return
241
242 value = float(s)
243 if value == float("inf") or value == float("-inf"):
244 self.__error("number outside valid range")
245 return
246 if value == 0:
247 # Suppress negative zero.
248 value = 0
249 self.__parser_input(value)
250 elif re.match("-?0[0-9]", s):
251 self.__error("leading zeros not allowed")
252 elif re.match("-([^0-9]|$)", s):
253 self.__error("'-' must be followed by digit")
254 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
255 self.__error("decimal point must be followed by digit")
256 elif re.search("e[-+]?([^0-9]|$)", s):
257 self.__error("exponent must contain at least one digit")
258 else:
259 self.__error("syntax error in number")
260
261 def __lex_number(self, c):
262 if c in ".0123456789eE-+":
263 self.buffer += c
264 return True
265 else:
266 self.__lex_finish_number()
267 return False
268
269 __4hex_re = re.compile("[0-9a-fA-F]{4}")
270 def __lex_4hex(self, s):
271 if len(s) < 4:
272 self.__error("quoted string ends within \\u escape")
273 elif not Parser.__4hex_re.match(s):
274 self.__error("malformed \\u escape")
275 elif s == "0000":
276 self.__error("null bytes not supported in quoted strings")
277 else:
278 return int(s, 16)
279 @staticmethod
280 def __is_leading_surrogate(c):
281 """Returns true if 'c' is a Unicode code point for a leading
282 surrogate."""
283 return c >= 0xd800 and c <= 0xdbff
284 @staticmethod
285 def __is_trailing_surrogate(c):
286 """Returns true if 'c' is a Unicode code point for a trailing
287 surrogate."""
288 return c >= 0xdc00 and c <= 0xdfff
289 @staticmethod
290 def __utf16_decode_surrogate_pair(leading, trailing):
291 """Returns the unicode code point corresponding to leading surrogate
292 'leading' and trailing surrogate 'trailing'. The return value will not
293 make any sense if 'leading' or 'trailing' are not in the correct ranges
294 for leading or trailing surrogates."""
295 # Leading surrogate: 110110wwwwxxxxxx
296 # Trailing surrogate: 110111xxxxxxxxxx
297 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
298 w = (leading >> 6) & 0xf
299 u = w + 1
300 x0 = leading & 0x3f
301 x1 = trailing & 0x3ff
302 return (u << 16) | (x0 << 10) | x1
303 __unescape = {'"': u'"',
304 "\\": u"\\",
305 "/": u"/",
306 "b": u"\b",
307 "f": u"\f",
308 "n": u"\n",
309 "r": u"\r",
310 "t": u"\t"}
311 def __lex_finish_string(self):
312 inp = self.buffer
313 out = u""
314 while len(inp):
315 backslash = inp.find('\\')
316 if backslash == -1:
317 out += inp
318 break
319 out += inp[:backslash]
320 inp = inp[backslash + 1:]
321 if inp == "":
322 self.__error("quoted string may not end with backslash")
323 return
324
325 replacement = Parser.__unescape.get(inp[0])
326 if replacement is not None:
327 out += replacement
328 inp = inp[1:]
329 continue
330 elif inp[0] != u'u':
331 self.__error("bad escape \\%s" % inp[0])
332 return
333
334 c0 = self.__lex_4hex(inp[1:5])
335 if c0 is None:
336 return
337 inp = inp[5:]
338
339 if Parser.__is_leading_surrogate(c0):
340 if inp[:2] != u'\\u':
341 self.__error("malformed escaped surrogate pair")
342 return
343 c1 = self.__lex_4hex(inp[2:6])
344 if c1 is None:
345 return
346 if not Parser.__is_trailing_surrogate(c1):
347 self.__error("second half of escaped surrogate pair is "
348 "not trailing surrogate")
349 return
350 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
351 inp = inp[6:]
352 else:
353 code_point = c0
354 out += unichr(code_point)
355 self.__parser_input('string', out)
356
357 def __lex_string_escape(self, c):
358 self.buffer += c
359 self.lex_state = Parser.__lex_string
360 return True
361 def __lex_string(self, c):
362 if c == '\\':
363 self.buffer += c
364 self.lex_state = Parser.__lex_string_escape
365 elif c == '"':
366 self.__lex_finish_string()
367 elif ord(c) >= 0x20:
368 self.buffer += c
369 else:
370 self.__error("U+%04X must be escaped in quoted string" % ord(c))
371 return True
372
373 def __lex_input(self, c):
374 self.byte_number += 1
375 if c == '\n':
376 self.column_number = 0
377 self.line_number += 1
378 else:
379 self.column_number += 1
380
381 eat = self.lex_state(self, c)
382 assert eat is True or eat is False
383 return eat
384
385 def __parse_start(self, token, string):
386 if token == '{':
387 self.__push_object()
388 elif token == '[':
389 self.__push_array()
390 else:
391 self.__error("syntax error at beginning of input")
392 def __parse_end(self, token, string):
393 self.__error("trailing garbage at end of input")
394 def __parse_object_init(self, token, string):
395 if token == '}':
396 self.__parser_pop()
397 else:
398 self.__parse_object_name(token, string)
399 def __parse_object_name(self, token, string):
400 if token == 'string':
401 self.member_name = string
402 self.parse_state = Parser.__parse_object_colon
403 else:
404 self.__error("syntax error parsing object expecting string")
405 def __parse_object_colon(self, token, string):
406 if token == ":":
407 self.parse_state = Parser.__parse_object_value
408 else:
409 self.__error("syntax error parsing object expecting ':'")
410 def __parse_object_value(self, token, string):
411 self.__parse_value(token, string, Parser.__parse_object_next)
412 def __parse_object_next(self, token, string):
413 if token == ",":
414 self.parse_state = Parser.__parse_object_name
415 elif token == "}":
416 self.__parser_pop()
417 else:
418 self.__error("syntax error expecting '}' or ','")
419 def __parse_array_init(self, token, string):
420 if token == ']':
421 self.__parser_pop()
422 else:
423 self.__parse_array_value(token, string)
424 def __parse_array_value(self, token, string):
425 self.__parse_value(token, string, Parser.__parse_array_next)
426 def __parse_array_next(self, token, string):
427 if token == ",":
428 self.parse_state = Parser.__parse_array_value
429 elif token == "]":
430 self.__parser_pop()
431 else:
432 self.__error("syntax error expecting ']' or ','")
433 def __parser_input(self, token, string=None):
434 self.lex_state = Parser.__lex_start
435 self.buffer = ""
436 #old_state = self.parse_state
437 self.parse_state(self, token, string)
438 #print ("token=%s string=%s old_state=%s new_state=%s"
439 # % (token, string, old_state, self.parse_state))
440
441 def __put_value(self, value):
442 top = self.stack[-1]
443 if type(top) == dict:
444 top[self.member_name] = value
445 else:
446 top.append(value)
447
448 def __parser_push(self, new_json, next_state):
449 if len(self.stack) < Parser.MAX_HEIGHT:
450 if len(self.stack) > 0:
451 self.__put_value(new_json)
452 self.stack.append(new_json)
453 self.parse_state = next_state
454 else:
455 self.__error("input exceeds maximum nesting depth %d" %
456 Parser.MAX_HEIGHT)
457 def __push_object(self):
458 self.__parser_push({}, Parser.__parse_object_init)
459 def __push_array(self):
460 self.__parser_push([], Parser.__parse_array_init)
461
462 def __parser_pop(self):
463 if len(self.stack) == 1:
464 self.parse_state = Parser.__parse_end
465 if not self.check_trailer:
466 self.done = True
467 else:
468 self.stack.pop()
469 top = self.stack[-1]
470 if type(top) == list:
471 self.parse_state = Parser.__parse_array_next
472 else:
473 self.parse_state = Parser.__parse_object_next
474
475 def __parse_value(self, token, string, next_state):
476 if token in [False, None, True] or type(token) in [int, long, float]:
477 self.__put_value(token)
478 elif token == 'string':
479 self.__put_value(string)
480 else:
481 if token == '{':
482 self.__push_object()
483 elif token == '[':
484 self.__push_array()
485 else:
486 self.__error("syntax error expecting value")
487 return
488 self.parse_state = next_state
489
490 def __error(self, message):
491 if self.error is None:
492 self.error = ("line %d, column %d, byte %d: %s"
493 % (self.line_number, self.column_number,
494 self.byte_number, message))
495 self.done = True
496
497 def feed(self, s):
498 i = 0
499 while True:
500 if self.done or i >= len(s):
501 return i
502 if self.__lex_input(s[i]):
503 i += 1
504
505 def is_done(self):
506 return self.done
507
508 def finish(self):
509 if self.lex_state == Parser.__lex_start:
510 pass
511 elif self.lex_state in (Parser.__lex_string,
512 Parser.__lex_string_escape):
513 self.__error("unexpected end of input in quoted string")
514 else:
515 self.__lex_input(" ")
516
517 if self.parse_state == Parser.__parse_start:
518 self.__error("empty input stream")
519 elif self.parse_state != Parser.__parse_end:
520 self.__error("unexpected end of input")
521
522 if self.error == None:
523 assert len(self.stack) == 1
524 return self.stack.pop()
525 else:
526 return self.error