]> git.proxmox.com Git - mirror_ovs.git/blame - python/ovs/json.py
python: Don't use StringIO directly.
[mirror_ovs.git] / python / ovs / json.py
CommitLineData
e0edde6f 1# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
99155935
BP
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import re
99155935
BP
16import sys
17
cb96c1b2 18import six
b3ac2947
RB
19from six.moves import range
20
26bb0f31
EJ
21__pychecker__ = 'no-stringiter'
22
99155935
BP
23escapes = {ord('"'): u"\\\"",
24 ord("\\"): u"\\\\",
25 ord("\b"): u"\\b",
26 ord("\f"): u"\\f",
27 ord("\n"): u"\\n",
28 ord("\r"): u"\\r",
29 ord("\t"): u"\\t"}
9b46cccc
BP
30for esc in range(32):
31 if esc not in escapes:
32 escapes[esc] = u"\\u%04x" % esc
99155935 33
cba64103 34SPACES_PER_LEVEL = 2
99155935 35
cba64103
BP
36
37class _Serializer(object):
38 def __init__(self, stream, pretty, sort_keys):
39 self.stream = stream
40 self.pretty = pretty
41 self.sort_keys = sort_keys
42 self.depth = 0
43
44 def __serialize_string(self, s):
45 self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
46
47 def __indent_line(self):
48 if self.pretty:
49 self.stream.write('\n')
50 self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
51
52 def serialize(self, obj):
53 if obj is None:
54 self.stream.write(u"null")
55 elif obj is False:
56 self.stream.write(u"false")
57 elif obj is True:
58 self.stream.write(u"true")
8f808842 59 elif isinstance(obj, six.integer_types):
cba64103 60 self.stream.write(u"%d" % obj)
8f808842 61 elif isinstance(obj, float):
cba64103 62 self.stream.write("%.15g" % obj)
25f599fb
RB
63 elif isinstance(obj, six.text_type):
64 # unicode() on Python 2, or str() in Python 3 (always unicode)
cba64103 65 self.__serialize_string(obj)
8f808842 66 elif isinstance(obj, str):
25f599fb
RB
67 # This is for Python 2, where this comes out to unicode(str()).
68 # For Python 3, it's str(str()), but it's harmless.
69 self.__serialize_string(six.text_type(obj))
8f808842 70 elif isinstance(obj, dict):
cba64103
BP
71 self.stream.write(u"{")
72
73 self.depth += 1
74 self.__indent_line()
75
76 if self.sort_keys:
77 items = sorted(obj.items())
78 else:
cb96c1b2 79 items = six.iteritems(obj)
cba64103
BP
80 for i, (key, value) in enumerate(items):
81 if i > 0:
82 self.stream.write(u",")
83 self.__indent_line()
25f599fb 84 self.__serialize_string(six.text_type(key))
cba64103
BP
85 self.stream.write(u":")
86 if self.pretty:
87 self.stream.write(u' ')
88 self.serialize(value)
89
90 self.stream.write(u"}")
91 self.depth -= 1
8f808842 92 elif isinstance(obj, (list, tuple)):
cba64103
BP
93 self.stream.write(u"[")
94 self.depth += 1
95
96 if obj:
97 self.__indent_line()
98
99 for i, value in enumerate(obj):
100 if i > 0:
101 self.stream.write(u",")
102 self.__indent_line()
103 self.serialize(value)
104
105 self.depth -= 1
106 self.stream.write(u"]")
99155935 107 else:
cba64103
BP
108 raise Exception("can't serialize %s as JSON" % obj)
109
110
111def to_stream(obj, stream, pretty=False, sort_keys=True):
112 _Serializer(stream, pretty, sort_keys).serialize(obj)
99155935 113
26bb0f31 114
99155935
BP
115def to_file(obj, name, pretty=False, sort_keys=True):
116 stream = open(name, "w")
117 try:
118 to_stream(obj, stream, pretty, sort_keys)
119 finally:
120 stream.close()
121
26bb0f31 122
99155935 123def to_string(obj, pretty=False, sort_keys=True):
981e9560 124 output = six.StringIO()
99155935
BP
125 to_stream(obj, output, pretty, sort_keys)
126 s = output.getvalue()
127 output.close()
128 return s
129
26bb0f31 130
99155935
BP
131def from_stream(stream):
132 p = Parser(check_trailer=True)
133 while True:
134 buf = stream.read(4096)
135 if buf == "" or p.feed(buf) != len(buf):
136 break
137 return p.finish()
138
26bb0f31 139
99155935
BP
140def from_file(name):
141 stream = open(name, "r")
142 try:
143 return from_stream(stream)
144 finally:
145 stream.close()
146
26bb0f31 147
99155935 148def from_string(s):
25f599fb
RB
149 if not isinstance(s, six.text_type):
150 # We assume the input is a string. We will only hit this case for a
151 # str in Python 2 which is not unicode, so we need to go ahead and
152 # decode it.
153 try:
154 s = six.text_type(s, 'utf-8')
155 except UnicodeDecodeError as e:
156 seq = ' '.join(["0x%2x" % ord(c)
157 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
158 return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
99155935
BP
159 p = Parser(check_trailer=True)
160 p.feed(s)
161 return p.finish()
162
26bb0f31 163
99155935 164class Parser(object):
a0631d92 165 # Maximum height of parsing stack. #
99155935
BP
166 MAX_HEIGHT = 1000
167
168 def __init__(self, check_trailer=False):
169 self.check_trailer = check_trailer
170
171 # Lexical analysis.
172 self.lex_state = Parser.__lex_start
173 self.buffer = ""
174 self.line_number = 0
175 self.column_number = 0
176 self.byte_number = 0
26bb0f31 177
99155935
BP
178 # Parsing.
179 self.parse_state = Parser.__parse_start
180 self.stack = []
181 self.member_name = None
182
183 # Parse status.
184 self.done = False
185 self.error = None
186
187 def __lex_start_space(self, c):
188 pass
26bb0f31 189
99155935
BP
190 def __lex_start_alpha(self, c):
191 self.buffer = c
192 self.lex_state = Parser.__lex_keyword
26bb0f31 193
99155935
BP
194 def __lex_start_token(self, c):
195 self.__parser_input(c)
26bb0f31 196
99155935
BP
197 def __lex_start_number(self, c):
198 self.buffer = c
199 self.lex_state = Parser.__lex_number
26bb0f31 200
28c781df 201 def __lex_start_string(self, _):
99155935 202 self.lex_state = Parser.__lex_string
26bb0f31 203
99155935
BP
204 def __lex_start_error(self, c):
205 if ord(c) >= 32 and ord(c) < 128:
206 self.__error("invalid character '%s'" % c)
207 else:
208 self.__error("invalid character U+%04x" % ord(c))
209
210 __lex_start_actions = {}
211 for c in " \t\n\r":
212 __lex_start_actions[c] = __lex_start_space
213 for c in "abcdefghijklmnopqrstuvwxyz":
214 __lex_start_actions[c] = __lex_start_alpha
215 for c in "[{]}:,":
216 __lex_start_actions[c] = __lex_start_token
217 for c in "-0123456789":
218 __lex_start_actions[c] = __lex_start_number
219 __lex_start_actions['"'] = __lex_start_string
26bb0f31 220
99155935
BP
221 def __lex_start(self, c):
222 Parser.__lex_start_actions.get(
223 c, Parser.__lex_start_error)(self, c)
224 return True
225
226 __lex_alpha = {}
227 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
228 __lex_alpha[c] = True
26bb0f31 229
99155935
BP
230 def __lex_finish_keyword(self):
231 if self.buffer == "false":
232 self.__parser_input(False)
233 elif self.buffer == "true":
234 self.__parser_input(True)
235 elif self.buffer == "null":
236 self.__parser_input(None)
237 else:
238 self.__error("invalid keyword '%s'" % self.buffer)
26bb0f31 239
99155935
BP
240 def __lex_keyword(self, c):
241 if c in Parser.__lex_alpha:
242 self.buffer += c
243 return True
244 else:
245 self.__lex_finish_keyword()
246 return False
247
26bb0f31
EJ
248 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
249 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
250
99155935
BP
251 def __lex_finish_number(self):
252 s = self.buffer
253 m = Parser.__number_re.match(s)
254 if m:
26bb0f31 255 sign, integer, fraction, exp = m.groups()
99155935 256 if (exp is not None and
8f808842 257 (int(exp) > sys.maxint or int(exp) < -sys.maxint - 1)):
99155935
BP
258 self.__error("exponent outside valid range")
259 return
260
261 if fraction is not None and len(fraction.lstrip('0')) == 0:
262 fraction = None
263
264 sig_string = integer
265 if fraction is not None:
266 sig_string += fraction
267 significand = int(sig_string)
268
269 pow10 = 0
270 if fraction is not None:
271 pow10 -= len(fraction)
272 if exp is not None:
8f808842 273 pow10 += int(exp)
99155935
BP
274
275 if significand == 0:
276 self.__parser_input(0)
277 return
26bb0f31 278 elif significand <= 2 ** 63:
cd1b3f63 279 while pow10 > 0 and significand <= 2 ** 63:
99155935
BP
280 significand *= 10
281 pow10 -= 1
282 while pow10 < 0 and significand % 10 == 0:
283 significand /= 10
284 pow10 += 1
285 if (pow10 == 0 and
26bb0f31
EJ
286 ((not sign and significand < 2 ** 63) or
287 (sign and significand <= 2 ** 63))):
99155935
BP
288 if sign:
289 self.__parser_input(-significand)
290 else:
291 self.__parser_input(significand)
292 return
293
294 value = float(s)
295 if value == float("inf") or value == float("-inf"):
296 self.__error("number outside valid range")
297 return
298 if value == 0:
299 # Suppress negative zero.
300 value = 0
301 self.__parser_input(value)
302 elif re.match("-?0[0-9]", s):
303 self.__error("leading zeros not allowed")
304 elif re.match("-([^0-9]|$)", s):
305 self.__error("'-' must be followed by digit")
306 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
307 self.__error("decimal point must be followed by digit")
308 elif re.search("e[-+]?([^0-9]|$)", s):
309 self.__error("exponent must contain at least one digit")
310 else:
311 self.__error("syntax error in number")
26bb0f31 312
99155935
BP
313 def __lex_number(self, c):
314 if c in ".0123456789eE-+":
315 self.buffer += c
316 return True
317 else:
318 self.__lex_finish_number()
319 return False
320
321 __4hex_re = re.compile("[0-9a-fA-F]{4}")
26bb0f31 322
99155935
BP
323 def __lex_4hex(self, s):
324 if len(s) < 4:
325 self.__error("quoted string ends within \\u escape")
326 elif not Parser.__4hex_re.match(s):
327 self.__error("malformed \\u escape")
328 elif s == "0000":
329 self.__error("null bytes not supported in quoted strings")
330 else:
331 return int(s, 16)
26bb0f31 332
99155935
BP
333 @staticmethod
334 def __is_leading_surrogate(c):
335 """Returns true if 'c' is a Unicode code point for a leading
336 surrogate."""
337 return c >= 0xd800 and c <= 0xdbff
26bb0f31 338
99155935
BP
339 @staticmethod
340 def __is_trailing_surrogate(c):
341 """Returns true if 'c' is a Unicode code point for a trailing
342 surrogate."""
343 return c >= 0xdc00 and c <= 0xdfff
26bb0f31 344
99155935
BP
345 @staticmethod
346 def __utf16_decode_surrogate_pair(leading, trailing):
347 """Returns the unicode code point corresponding to leading surrogate
348 'leading' and trailing surrogate 'trailing'. The return value will not
349 make any sense if 'leading' or 'trailing' are not in the correct ranges
350 for leading or trailing surrogates."""
351 # Leading surrogate: 110110wwwwxxxxxx
352 # Trailing surrogate: 110111xxxxxxxxxx
353 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
354 w = (leading >> 6) & 0xf
355 u = w + 1
356 x0 = leading & 0x3f
357 x1 = trailing & 0x3ff
358 return (u << 16) | (x0 << 10) | x1
359 __unescape = {'"': u'"',
360 "\\": u"\\",
361 "/": u"/",
362 "b": u"\b",
363 "f": u"\f",
364 "n": u"\n",
365 "r": u"\r",
366 "t": u"\t"}
26bb0f31 367
99155935
BP
368 def __lex_finish_string(self):
369 inp = self.buffer
370 out = u""
371 while len(inp):
372 backslash = inp.find('\\')
373 if backslash == -1:
374 out += inp
375 break
376 out += inp[:backslash]
377 inp = inp[backslash + 1:]
378 if inp == "":
379 self.__error("quoted string may not end with backslash")
380 return
381
382 replacement = Parser.__unescape.get(inp[0])
383 if replacement is not None:
384 out += replacement
385 inp = inp[1:]
386 continue
387 elif inp[0] != u'u':
388 self.__error("bad escape \\%s" % inp[0])
389 return
26bb0f31 390
99155935
BP
391 c0 = self.__lex_4hex(inp[1:5])
392 if c0 is None:
393 return
394 inp = inp[5:]
395
396 if Parser.__is_leading_surrogate(c0):
397 if inp[:2] != u'\\u':
398 self.__error("malformed escaped surrogate pair")
399 return
400 c1 = self.__lex_4hex(inp[2:6])
401 if c1 is None:
402 return
403 if not Parser.__is_trailing_surrogate(c1):
404 self.__error("second half of escaped surrogate pair is "
405 "not trailing surrogate")
406 return
407 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
408 inp = inp[6:]
409 else:
410 code_point = c0
411 out += unichr(code_point)
412 self.__parser_input('string', out)
413
414 def __lex_string_escape(self, c):
415 self.buffer += c
416 self.lex_state = Parser.__lex_string
417 return True
26bb0f31 418
99155935
BP
419 def __lex_string(self, c):
420 if c == '\\':
421 self.buffer += c
422 self.lex_state = Parser.__lex_string_escape
423 elif c == '"':
424 self.__lex_finish_string()
425 elif ord(c) >= 0x20:
426 self.buffer += c
427 else:
428 self.__error("U+%04X must be escaped in quoted string" % ord(c))
429 return True
430
431 def __lex_input(self, c):
99155935
BP
432 eat = self.lex_state(self, c)
433 assert eat is True or eat is False
434 return eat
435
28c781df 436 def __parse_start(self, token, unused_string):
99155935
BP
437 if token == '{':
438 self.__push_object()
439 elif token == '[':
440 self.__push_array()
441 else:
442 self.__error("syntax error at beginning of input")
26bb0f31 443
28c781df 444 def __parse_end(self, unused_token, unused_string):
99155935 445 self.__error("trailing garbage at end of input")
26bb0f31 446
99155935
BP
447 def __parse_object_init(self, token, string):
448 if token == '}':
449 self.__parser_pop()
450 else:
451 self.__parse_object_name(token, string)
26bb0f31 452
99155935
BP
453 def __parse_object_name(self, token, string):
454 if token == 'string':
455 self.member_name = string
456 self.parse_state = Parser.__parse_object_colon
457 else:
458 self.__error("syntax error parsing object expecting string")
26bb0f31 459
28c781df 460 def __parse_object_colon(self, token, unused_string):
99155935
BP
461 if token == ":":
462 self.parse_state = Parser.__parse_object_value
463 else:
464 self.__error("syntax error parsing object expecting ':'")
26bb0f31 465
99155935
BP
466 def __parse_object_value(self, token, string):
467 self.__parse_value(token, string, Parser.__parse_object_next)
26bb0f31 468
28c781df 469 def __parse_object_next(self, token, unused_string):
99155935
BP
470 if token == ",":
471 self.parse_state = Parser.__parse_object_name
472 elif token == "}":
473 self.__parser_pop()
474 else:
475 self.__error("syntax error expecting '}' or ','")
26bb0f31 476
99155935
BP
477 def __parse_array_init(self, token, string):
478 if token == ']':
479 self.__parser_pop()
480 else:
481 self.__parse_array_value(token, string)
26bb0f31 482
99155935
BP
483 def __parse_array_value(self, token, string):
484 self.__parse_value(token, string, Parser.__parse_array_next)
26bb0f31 485
28c781df 486 def __parse_array_next(self, token, unused_string):
99155935
BP
487 if token == ",":
488 self.parse_state = Parser.__parse_array_value
489 elif token == "]":
490 self.__parser_pop()
491 else:
492 self.__error("syntax error expecting ']' or ','")
26bb0f31 493
99155935
BP
494 def __parser_input(self, token, string=None):
495 self.lex_state = Parser.__lex_start
496 self.buffer = ""
99155935 497 self.parse_state(self, token, string)
99155935
BP
498
499 def __put_value(self, value):
500 top = self.stack[-1]
501 if type(top) == dict:
502 top[self.member_name] = value
503 else:
504 top.append(value)
505
506 def __parser_push(self, new_json, next_state):
507 if len(self.stack) < Parser.MAX_HEIGHT:
508 if len(self.stack) > 0:
509 self.__put_value(new_json)
510 self.stack.append(new_json)
511 self.parse_state = next_state
512 else:
513 self.__error("input exceeds maximum nesting depth %d" %
514 Parser.MAX_HEIGHT)
26bb0f31 515
99155935
BP
516 def __push_object(self):
517 self.__parser_push({}, Parser.__parse_object_init)
26bb0f31 518
99155935
BP
519 def __push_array(self):
520 self.__parser_push([], Parser.__parse_array_init)
521
522 def __parser_pop(self):
523 if len(self.stack) == 1:
524 self.parse_state = Parser.__parse_end
525 if not self.check_trailer:
526 self.done = True
527 else:
528 self.stack.pop()
529 top = self.stack[-1]
530 if type(top) == list:
531 self.parse_state = Parser.__parse_array_next
532 else:
533 self.parse_state = Parser.__parse_object_next
534
535 def __parse_value(self, token, string, next_state):
8f808842
RB
536 number_types = list(six.integer_types)
537 number_types.extend([float])
538 number_types = tuple(number_types)
539 if token in [False, None, True] or isinstance(token, number_types):
99155935
BP
540 self.__put_value(token)
541 elif token == 'string':
542 self.__put_value(string)
543 else:
544 if token == '{':
545 self.__push_object()
546 elif token == '[':
547 self.__push_array()
548 else:
549 self.__error("syntax error expecting value")
550 return
551 self.parse_state = next_state
552
553 def __error(self, message):
554 if self.error is None:
555 self.error = ("line %d, column %d, byte %d: %s"
556 % (self.line_number, self.column_number,
557 self.byte_number, message))
558 self.done = True
559
560 def feed(self, s):
561 i = 0
562 while True:
563 if self.done or i >= len(s):
564 return i
c640c04f
BP
565
566 c = s[i]
567 if self.__lex_input(c):
568 self.byte_number += 1
569 if c == '\n':
570 self.column_number = 0
571 self.line_number += 1
572 else:
573 self.column_number += 1
574
99155935
BP
575 i += 1
576
577 def is_done(self):
578 return self.done
579
580 def finish(self):
581 if self.lex_state == Parser.__lex_start:
582 pass
583 elif self.lex_state in (Parser.__lex_string,
584 Parser.__lex_string_escape):
585 self.__error("unexpected end of input in quoted string")
586 else:
587 self.__lex_input(" ")
588
589 if self.parse_state == Parser.__parse_start:
590 self.__error("empty input stream")
591 elif self.parse_state != Parser.__parse_end:
592 self.__error("unexpected end of input")
593
3c057118 594 if self.error is None:
99155935
BP
595 assert len(self.stack) == 1
596 return self.stack.pop()
597 else:
598 return self.error