]> git.proxmox.com Git - ovs.git/blame - python/ovs/json.py
ovs.db.types: Always initialize ref_type attribute.
[ovs.git] / python / ovs / json.py
CommitLineData
be44585c 1# Copyright (c) 2010, 2011 Nicira Networks
99155935
BP
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import re
16import StringIO
17import sys
18
19escapes = {ord('"'): u"\\\"",
20 ord("\\"): u"\\\\",
21 ord("\b"): u"\\b",
22 ord("\f"): u"\\f",
23 ord("\n"): u"\\n",
24 ord("\r"): u"\\r",
25 ord("\t"): u"\\t"}
26for i in range(32):
27 if i not in escapes:
28 escapes[i] = u"\\u%04x" % i
29
30def __dump_string(stream, s):
e0b23327 31 stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
99155935
BP
32
33def to_stream(obj, stream, pretty=False, sort_keys=True):
34 if obj is None:
35 stream.write(u"null")
36 elif obj is False:
37 stream.write(u"false")
38 elif obj is True:
39 stream.write(u"true")
40 elif type(obj) in (int, long):
41 stream.write(u"%d" % obj)
42 elif type(obj) == float:
43 stream.write("%.15g" % obj)
44 elif type(obj) == unicode:
45 __dump_string(stream, obj)
46 elif type(obj) == str:
47 __dump_string(stream, unicode(obj))
48 elif type(obj) == dict:
49 stream.write(u"{")
50 if sort_keys:
51 items = sorted(obj.items())
52 else:
53 items = obj.iteritems()
367da738 54 for i, (key, value) in enumerate(items):
99155935
BP
55 if i > 0:
56 stream.write(u",")
99155935
BP
57 __dump_string(stream, unicode(key))
58 stream.write(u":")
59 to_stream(value, stream, pretty, sort_keys)
60 stream.write(u"}")
61 elif type(obj) in (list, tuple):
62 stream.write(u"[")
367da738 63 for i, value in enumerate(obj):
99155935
BP
64 if i > 0:
65 stream.write(u",")
99155935
BP
66 to_stream(value, stream, pretty, sort_keys)
67 stream.write(u"]")
68 else:
6732237b 69 raise Exception("can't serialize %s as JSON" % obj)
99155935
BP
70
71def to_file(obj, name, pretty=False, sort_keys=True):
72 stream = open(name, "w")
73 try:
74 to_stream(obj, stream, pretty, sort_keys)
75 finally:
76 stream.close()
77
78def to_string(obj, pretty=False, sort_keys=True):
79 output = StringIO.StringIO()
80 to_stream(obj, output, pretty, sort_keys)
81 s = output.getvalue()
82 output.close()
83 return s
84
85def from_stream(stream):
86 p = Parser(check_trailer=True)
87 while True:
88 buf = stream.read(4096)
89 if buf == "" or p.feed(buf) != len(buf):
90 break
91 return p.finish()
92
93def from_file(name):
94 stream = open(name, "r")
95 try:
96 return from_stream(stream)
97 finally:
98 stream.close()
99
100def from_string(s):
101 try:
102 s = unicode(s, 'utf-8')
103 except UnicodeDecodeError, e:
070de9bd
BP
104 seq = ' '.join(["0x%2x" % ord(c)
105 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
be44585c 106 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
99155935
BP
107 p = Parser(check_trailer=True)
108 p.feed(s)
109 return p.finish()
110
111class Parser(object):
112 ## Maximum height of parsing stack. ##
113 MAX_HEIGHT = 1000
114
115 def __init__(self, check_trailer=False):
116 self.check_trailer = check_trailer
117
118 # Lexical analysis.
119 self.lex_state = Parser.__lex_start
120 self.buffer = ""
121 self.line_number = 0
122 self.column_number = 0
123 self.byte_number = 0
124
125 # Parsing.
126 self.parse_state = Parser.__parse_start
127 self.stack = []
128 self.member_name = None
129
130 # Parse status.
131 self.done = False
132 self.error = None
133
134 def __lex_start_space(self, c):
135 pass
136 def __lex_start_alpha(self, c):
137 self.buffer = c
138 self.lex_state = Parser.__lex_keyword
139 def __lex_start_token(self, c):
140 self.__parser_input(c)
141 def __lex_start_number(self, c):
142 self.buffer = c
143 self.lex_state = Parser.__lex_number
144 def __lex_start_string(self, c):
145 self.lex_state = Parser.__lex_string
146 def __lex_start_error(self, c):
147 if ord(c) >= 32 and ord(c) < 128:
148 self.__error("invalid character '%s'" % c)
149 else:
150 self.__error("invalid character U+%04x" % ord(c))
151
152 __lex_start_actions = {}
153 for c in " \t\n\r":
154 __lex_start_actions[c] = __lex_start_space
155 for c in "abcdefghijklmnopqrstuvwxyz":
156 __lex_start_actions[c] = __lex_start_alpha
157 for c in "[{]}:,":
158 __lex_start_actions[c] = __lex_start_token
159 for c in "-0123456789":
160 __lex_start_actions[c] = __lex_start_number
161 __lex_start_actions['"'] = __lex_start_string
162 def __lex_start(self, c):
163 Parser.__lex_start_actions.get(
164 c, Parser.__lex_start_error)(self, c)
165 return True
166
167 __lex_alpha = {}
168 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
169 __lex_alpha[c] = True
170 def __lex_finish_keyword(self):
171 if self.buffer == "false":
172 self.__parser_input(False)
173 elif self.buffer == "true":
174 self.__parser_input(True)
175 elif self.buffer == "null":
176 self.__parser_input(None)
177 else:
178 self.__error("invalid keyword '%s'" % self.buffer)
179 def __lex_keyword(self, c):
180 if c in Parser.__lex_alpha:
181 self.buffer += c
182 return True
183 else:
184 self.__lex_finish_keyword()
185 return False
186
187 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
188 def __lex_finish_number(self):
189 s = self.buffer
190 m = Parser.__number_re.match(s)
191 if m:
192 sign, integer, fraction, exp = m.groups()
193 if (exp is not None and
194 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
195 self.__error("exponent outside valid range")
196 return
197
198 if fraction is not None and len(fraction.lstrip('0')) == 0:
199 fraction = None
200
201 sig_string = integer
202 if fraction is not None:
203 sig_string += fraction
204 significand = int(sig_string)
205
206 pow10 = 0
207 if fraction is not None:
208 pow10 -= len(fraction)
209 if exp is not None:
210 pow10 += long(exp)
211
212 if significand == 0:
213 self.__parser_input(0)
214 return
215 elif significand <= 2**63:
216 while pow10 > 0 and significand <= 2*63:
217 significand *= 10
218 pow10 -= 1
219 while pow10 < 0 and significand % 10 == 0:
220 significand /= 10
221 pow10 += 1
222 if (pow10 == 0 and
223 ((not sign and significand < 2**63) or
224 (sign and significand <= 2**63))):
225 if sign:
226 self.__parser_input(-significand)
227 else:
228 self.__parser_input(significand)
229 return
230
231 value = float(s)
232 if value == float("inf") or value == float("-inf"):
233 self.__error("number outside valid range")
234 return
235 if value == 0:
236 # Suppress negative zero.
237 value = 0
238 self.__parser_input(value)
239 elif re.match("-?0[0-9]", s):
240 self.__error("leading zeros not allowed")
241 elif re.match("-([^0-9]|$)", s):
242 self.__error("'-' must be followed by digit")
243 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
244 self.__error("decimal point must be followed by digit")
245 elif re.search("e[-+]?([^0-9]|$)", s):
246 self.__error("exponent must contain at least one digit")
247 else:
248 self.__error("syntax error in number")
249
250 def __lex_number(self, c):
251 if c in ".0123456789eE-+":
252 self.buffer += c
253 return True
254 else:
255 self.__lex_finish_number()
256 return False
257
258 __4hex_re = re.compile("[0-9a-fA-F]{4}")
259 def __lex_4hex(self, s):
260 if len(s) < 4:
261 self.__error("quoted string ends within \\u escape")
262 elif not Parser.__4hex_re.match(s):
263 self.__error("malformed \\u escape")
264 elif s == "0000":
265 self.__error("null bytes not supported in quoted strings")
266 else:
267 return int(s, 16)
268 @staticmethod
269 def __is_leading_surrogate(c):
270 """Returns true if 'c' is a Unicode code point for a leading
271 surrogate."""
272 return c >= 0xd800 and c <= 0xdbff
273 @staticmethod
274 def __is_trailing_surrogate(c):
275 """Returns true if 'c' is a Unicode code point for a trailing
276 surrogate."""
277 return c >= 0xdc00 and c <= 0xdfff
278 @staticmethod
279 def __utf16_decode_surrogate_pair(leading, trailing):
280 """Returns the unicode code point corresponding to leading surrogate
281 'leading' and trailing surrogate 'trailing'. The return value will not
282 make any sense if 'leading' or 'trailing' are not in the correct ranges
283 for leading or trailing surrogates."""
284 # Leading surrogate: 110110wwwwxxxxxx
285 # Trailing surrogate: 110111xxxxxxxxxx
286 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
287 w = (leading >> 6) & 0xf
288 u = w + 1
289 x0 = leading & 0x3f
290 x1 = trailing & 0x3ff
291 return (u << 16) | (x0 << 10) | x1
292 __unescape = {'"': u'"',
293 "\\": u"\\",
294 "/": u"/",
295 "b": u"\b",
296 "f": u"\f",
297 "n": u"\n",
298 "r": u"\r",
299 "t": u"\t"}
300 def __lex_finish_string(self):
301 inp = self.buffer
302 out = u""
303 while len(inp):
304 backslash = inp.find('\\')
305 if backslash == -1:
306 out += inp
307 break
308 out += inp[:backslash]
309 inp = inp[backslash + 1:]
310 if inp == "":
311 self.__error("quoted string may not end with backslash")
312 return
313
314 replacement = Parser.__unescape.get(inp[0])
315 if replacement is not None:
316 out += replacement
317 inp = inp[1:]
318 continue
319 elif inp[0] != u'u':
320 self.__error("bad escape \\%s" % inp[0])
321 return
322
323 c0 = self.__lex_4hex(inp[1:5])
324 if c0 is None:
325 return
326 inp = inp[5:]
327
328 if Parser.__is_leading_surrogate(c0):
329 if inp[:2] != u'\\u':
330 self.__error("malformed escaped surrogate pair")
331 return
332 c1 = self.__lex_4hex(inp[2:6])
333 if c1 is None:
334 return
335 if not Parser.__is_trailing_surrogate(c1):
336 self.__error("second half of escaped surrogate pair is "
337 "not trailing surrogate")
338 return
339 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
340 inp = inp[6:]
341 else:
342 code_point = c0
343 out += unichr(code_point)
344 self.__parser_input('string', out)
345
346 def __lex_string_escape(self, c):
347 self.buffer += c
348 self.lex_state = Parser.__lex_string
349 return True
350 def __lex_string(self, c):
351 if c == '\\':
352 self.buffer += c
353 self.lex_state = Parser.__lex_string_escape
354 elif c == '"':
355 self.__lex_finish_string()
356 elif ord(c) >= 0x20:
357 self.buffer += c
358 else:
359 self.__error("U+%04X must be escaped in quoted string" % ord(c))
360 return True
361
362 def __lex_input(self, c):
363 self.byte_number += 1
364 if c == '\n':
365 self.column_number = 0
366 self.line_number += 1
367 else:
368 self.column_number += 1
369
370 eat = self.lex_state(self, c)
371 assert eat is True or eat is False
372 return eat
373
374 def __parse_start(self, token, string):
375 if token == '{':
376 self.__push_object()
377 elif token == '[':
378 self.__push_array()
379 else:
380 self.__error("syntax error at beginning of input")
381 def __parse_end(self, token, string):
382 self.__error("trailing garbage at end of input")
383 def __parse_object_init(self, token, string):
384 if token == '}':
385 self.__parser_pop()
386 else:
387 self.__parse_object_name(token, string)
388 def __parse_object_name(self, token, string):
389 if token == 'string':
390 self.member_name = string
391 self.parse_state = Parser.__parse_object_colon
392 else:
393 self.__error("syntax error parsing object expecting string")
394 def __parse_object_colon(self, token, string):
395 if token == ":":
396 self.parse_state = Parser.__parse_object_value
397 else:
398 self.__error("syntax error parsing object expecting ':'")
399 def __parse_object_value(self, token, string):
400 self.__parse_value(token, string, Parser.__parse_object_next)
401 def __parse_object_next(self, token, string):
402 if token == ",":
403 self.parse_state = Parser.__parse_object_name
404 elif token == "}":
405 self.__parser_pop()
406 else:
407 self.__error("syntax error expecting '}' or ','")
408 def __parse_array_init(self, token, string):
409 if token == ']':
410 self.__parser_pop()
411 else:
412 self.__parse_array_value(token, string)
413 def __parse_array_value(self, token, string):
414 self.__parse_value(token, string, Parser.__parse_array_next)
415 def __parse_array_next(self, token, string):
416 if token == ",":
417 self.parse_state = Parser.__parse_array_value
418 elif token == "]":
419 self.__parser_pop()
420 else:
421 self.__error("syntax error expecting ']' or ','")
422 def __parser_input(self, token, string=None):
423 self.lex_state = Parser.__lex_start
424 self.buffer = ""
425 #old_state = self.parse_state
426 self.parse_state(self, token, string)
427 #print ("token=%s string=%s old_state=%s new_state=%s"
428 # % (token, string, old_state, self.parse_state))
429
430 def __put_value(self, value):
431 top = self.stack[-1]
432 if type(top) == dict:
433 top[self.member_name] = value
434 else:
435 top.append(value)
436
437 def __parser_push(self, new_json, next_state):
438 if len(self.stack) < Parser.MAX_HEIGHT:
439 if len(self.stack) > 0:
440 self.__put_value(new_json)
441 self.stack.append(new_json)
442 self.parse_state = next_state
443 else:
444 self.__error("input exceeds maximum nesting depth %d" %
445 Parser.MAX_HEIGHT)
446 def __push_object(self):
447 self.__parser_push({}, Parser.__parse_object_init)
448 def __push_array(self):
449 self.__parser_push([], Parser.__parse_array_init)
450
451 def __parser_pop(self):
452 if len(self.stack) == 1:
453 self.parse_state = Parser.__parse_end
454 if not self.check_trailer:
455 self.done = True
456 else:
457 self.stack.pop()
458 top = self.stack[-1]
459 if type(top) == list:
460 self.parse_state = Parser.__parse_array_next
461 else:
462 self.parse_state = Parser.__parse_object_next
463
464 def __parse_value(self, token, string, next_state):
465 if token in [False, None, True] or type(token) in [int, long, float]:
466 self.__put_value(token)
467 elif token == 'string':
468 self.__put_value(string)
469 else:
470 if token == '{':
471 self.__push_object()
472 elif token == '[':
473 self.__push_array()
474 else:
475 self.__error("syntax error expecting value")
476 return
477 self.parse_state = next_state
478
479 def __error(self, message):
480 if self.error is None:
481 self.error = ("line %d, column %d, byte %d: %s"
482 % (self.line_number, self.column_number,
483 self.byte_number, message))
484 self.done = True
485
486 def feed(self, s):
487 i = 0
488 while True:
489 if self.done or i >= len(s):
490 return i
491 if self.__lex_input(s[i]):
492 i += 1
493
494 def is_done(self):
495 return self.done
496
497 def finish(self):
498 if self.lex_state == Parser.__lex_start:
499 pass
500 elif self.lex_state in (Parser.__lex_string,
501 Parser.__lex_string_escape):
502 self.__error("unexpected end of input in quoted string")
503 else:
504 self.__lex_input(" ")
505
506 if self.parse_state == Parser.__parse_start:
507 self.__error("empty input stream")
508 elif self.parse_state != Parser.__parse_end:
509 self.__error("unexpected end of input")
510
511 if self.error == None:
512 assert len(self.stack) == 1
513 return self.stack.pop()
514 else:
515 return self.error