]> git.proxmox.com Git - ovs.git/blob - python/ovs/json.py
Test the Python C JSON extension
[ovs.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 from __future__ import absolute_import
16
17 import functools
18 import json
19 import re
20 import sys
21
22 import six
23
24 PARSER_C = 'C'
25 PARSER_PY = 'PYTHON'
26 try:
27 import ovs._json
28 PARSER = PARSER_C
29 except ImportError:
30 PARSER = PARSER_PY
31
32 __pychecker__ = 'no-stringiter'
33
34 SPACES_PER_LEVEL = 2
35 _dumper = functools.partial(json.dumps, separators=(",", ":"))
36
37 if six.PY2:
38 def dumper(*args, **kwargs):
39 return _dumper(*args, **kwargs).decode('raw-unicode-escape')
40 else:
41 dumper = _dumper
42
43
44 def to_stream(obj, stream, pretty=False, sort_keys=True):
45 stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
46 sort_keys=sort_keys))
47
48
49 def to_file(obj, name, pretty=False, sort_keys=True):
50 with open(name, "w") as stream:
51 to_stream(obj, stream, pretty, sort_keys)
52
53
54 def to_string(obj, pretty=False, sort_keys=True):
55 return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
56 sort_keys=sort_keys)
57
58
59 def from_stream(stream):
60 p = Parser(check_trailer=True)
61 while True:
62 buf = stream.read(4096)
63 if buf == "" or p.feed(buf) != len(buf):
64 break
65 return p.finish()
66
67
68 def from_file(name):
69 stream = open(name, "r")
70 try:
71 return from_stream(stream)
72 finally:
73 stream.close()
74
75
76 def from_string(s):
77 if not isinstance(s, six.text_type):
78 # We assume the input is a string. We will only hit this case for a
79 # str in Python 2 which is not unicode, so we need to go ahead and
80 # decode it.
81 try:
82 s = six.text_type(s, 'utf-8')
83 except UnicodeDecodeError as e:
84 seq = ' '.join(["0x%2x" % ord(c)
85 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
86 return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
87 p = Parser(check_trailer=True)
88 p.feed(s)
89 return p.finish()
90
91
92 class Parser(object):
93 # Maximum height of parsing stack. #
94 MAX_HEIGHT = 1000
95
96 def __new__(cls, *args, **kwargs):
97 if PARSER == PARSER_C:
98 return ovs._json.Parser(*args, **kwargs)
99 return super(Parser, cls).__new__(cls)
100
101 def __init__(self, check_trailer=False):
102 self.check_trailer = check_trailer
103
104 # Lexical analysis.
105 self.lex_state = Parser.__lex_start
106 self.buffer = ""
107 self.line_number = 0
108 self.column_number = 0
109 self.byte_number = 0
110
111 # Parsing.
112 self.parse_state = Parser.__parse_start
113 self.stack = []
114 self.member_name = None
115
116 # Parse status.
117 self.done = False
118 self.error = None
119
120 def __lex_start_space(self, c):
121 pass
122
123 def __lex_start_alpha(self, c):
124 self.buffer = c
125 self.lex_state = Parser.__lex_keyword
126
127 def __lex_start_token(self, c):
128 self.__parser_input(c)
129
130 def __lex_start_number(self, c):
131 self.buffer = c
132 self.lex_state = Parser.__lex_number
133
134 def __lex_start_string(self, _):
135 self.lex_state = Parser.__lex_string
136
137 def __lex_start_error(self, c):
138 if ord(c) >= 32 and ord(c) < 128:
139 self.__error("invalid character '%s'" % c)
140 else:
141 self.__error("invalid character U+%04x" % ord(c))
142
143 __lex_start_actions = {}
144 for c in " \t\n\r":
145 __lex_start_actions[c] = __lex_start_space
146 for c in "abcdefghijklmnopqrstuvwxyz":
147 __lex_start_actions[c] = __lex_start_alpha
148 for c in "[{]}:,":
149 __lex_start_actions[c] = __lex_start_token
150 for c in "-0123456789":
151 __lex_start_actions[c] = __lex_start_number
152 __lex_start_actions['"'] = __lex_start_string
153
154 def __lex_start(self, c):
155 Parser.__lex_start_actions.get(
156 c, Parser.__lex_start_error)(self, c)
157 return True
158
159 __lex_alpha = {}
160 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
161 __lex_alpha[c] = True
162
163 def __lex_finish_keyword(self):
164 if self.buffer == "false":
165 self.__parser_input(False)
166 elif self.buffer == "true":
167 self.__parser_input(True)
168 elif self.buffer == "null":
169 self.__parser_input(None)
170 else:
171 self.__error("invalid keyword '%s'" % self.buffer)
172
173 def __lex_keyword(self, c):
174 if c in Parser.__lex_alpha:
175 self.buffer += c
176 return True
177 else:
178 self.__lex_finish_keyword()
179 return False
180
181 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
182 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
183
184 def __lex_finish_number(self):
185 s = self.buffer
186 m = Parser.__number_re.match(s)
187 if m:
188 sign, integer, fraction, exp = m.groups()
189 if (exp is not None and
190 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
191 self.__error("exponent outside valid range")
192 return
193
194 if fraction is not None and len(fraction.lstrip('0')) == 0:
195 fraction = None
196
197 sig_string = integer
198 if fraction is not None:
199 sig_string += fraction
200 significand = int(sig_string)
201
202 pow10 = 0
203 if fraction is not None:
204 pow10 -= len(fraction)
205 if exp is not None:
206 pow10 += int(exp)
207
208 if significand == 0:
209 self.__parser_input(0)
210 return
211 elif significand <= 2 ** 63:
212 while pow10 > 0 and significand <= 2 ** 63:
213 significand *= 10
214 pow10 -= 1
215 while pow10 < 0 and significand % 10 == 0:
216 significand //= 10
217 pow10 += 1
218 if (pow10 == 0 and
219 ((not sign and significand < 2 ** 63) or
220 (sign and significand <= 2 ** 63))):
221 if sign:
222 self.__parser_input(-significand)
223 else:
224 self.__parser_input(significand)
225 return
226
227 value = float(s)
228 if value == float("inf") or value == float("-inf"):
229 self.__error("number outside valid range")
230 return
231 if value == 0:
232 # Suppress negative zero.
233 value = 0
234 self.__parser_input(value)
235 elif re.match("-?0[0-9]", s):
236 self.__error("leading zeros not allowed")
237 elif re.match("-([^0-9]|$)", s):
238 self.__error("'-' must be followed by digit")
239 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
240 self.__error("decimal point must be followed by digit")
241 elif re.search("e[-+]?([^0-9]|$)", s):
242 self.__error("exponent must contain at least one digit")
243 else:
244 self.__error("syntax error in number")
245
246 def __lex_number(self, c):
247 if c in ".0123456789eE-+":
248 self.buffer += c
249 return True
250 else:
251 self.__lex_finish_number()
252 return False
253
254 __4hex_re = re.compile("[0-9a-fA-F]{4}")
255
256 def __lex_4hex(self, s):
257 if len(s) < 4:
258 self.__error("quoted string ends within \\u escape")
259 elif not Parser.__4hex_re.match(s):
260 self.__error("malformed \\u escape")
261 elif s == "0000":
262 self.__error("null bytes not supported in quoted strings")
263 else:
264 return int(s, 16)
265
266 @staticmethod
267 def __is_leading_surrogate(c):
268 """Returns true if 'c' is a Unicode code point for a leading
269 surrogate."""
270 return c >= 0xd800 and c <= 0xdbff
271
272 @staticmethod
273 def __is_trailing_surrogate(c):
274 """Returns true if 'c' is a Unicode code point for a trailing
275 surrogate."""
276 return c >= 0xdc00 and c <= 0xdfff
277
278 @staticmethod
279 def __utf16_decode_surrogate_pair(leading, trailing):
280 """Returns the unicode code point corresponding to leading surrogate
281 'leading' and trailing surrogate 'trailing'. The return value will not
282 make any sense if 'leading' or 'trailing' are not in the correct ranges
283 for leading or trailing surrogates."""
284 # Leading surrogate: 110110wwwwxxxxxx
285 # Trailing surrogate: 110111xxxxxxxxxx
286 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
287 w = (leading >> 6) & 0xf
288 u = w + 1
289 x0 = leading & 0x3f
290 x1 = trailing & 0x3ff
291 return (u << 16) | (x0 << 10) | x1
292 __unescape = {'"': u'"',
293 "\\": u"\\",
294 "/": u"/",
295 "b": u"\b",
296 "f": u"\f",
297 "n": u"\n",
298 "r": u"\r",
299 "t": u"\t"}
300
301 def __lex_finish_string(self):
302 inp = self.buffer
303 out = u""
304 while len(inp):
305 backslash = inp.find('\\')
306 if backslash == -1:
307 out += inp
308 break
309 out += inp[:backslash]
310 inp = inp[backslash + 1:]
311 if inp == "":
312 self.__error("quoted string may not end with backslash")
313 return
314
315 replacement = Parser.__unescape.get(inp[0])
316 if replacement is not None:
317 out += replacement
318 inp = inp[1:]
319 continue
320 elif inp[0] != u'u':
321 self.__error("bad escape \\%s" % inp[0])
322 return
323
324 c0 = self.__lex_4hex(inp[1:5])
325 if c0 is None:
326 return
327 inp = inp[5:]
328
329 if Parser.__is_leading_surrogate(c0):
330 if inp[:2] != u'\\u':
331 self.__error("malformed escaped surrogate pair")
332 return
333 c1 = self.__lex_4hex(inp[2:6])
334 if c1 is None:
335 return
336 if not Parser.__is_trailing_surrogate(c1):
337 self.__error("second half of escaped surrogate pair is "
338 "not trailing surrogate")
339 return
340 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
341 inp = inp[6:]
342 else:
343 code_point = c0
344 out += six.unichr(code_point)
345 self.__parser_input('string', out)
346
347 def __lex_string_escape(self, c):
348 self.buffer += c
349 self.lex_state = Parser.__lex_string
350 return True
351
352 def __lex_string(self, c):
353 if c == '\\':
354 self.buffer += c
355 self.lex_state = Parser.__lex_string_escape
356 elif c == '"':
357 self.__lex_finish_string()
358 elif ord(c) >= 0x20:
359 self.buffer += c
360 else:
361 self.__error("U+%04X must be escaped in quoted string" % ord(c))
362 return True
363
364 def __lex_input(self, c):
365 eat = self.lex_state(self, c)
366 assert eat is True or eat is False
367 return eat
368
369 def __parse_start(self, token, unused_string):
370 if token == '{':
371 self.__push_object()
372 elif token == '[':
373 self.__push_array()
374 else:
375 self.__error("syntax error at beginning of input")
376
377 def __parse_end(self, unused_token, unused_string):
378 self.__error("trailing garbage at end of input")
379
380 def __parse_object_init(self, token, string):
381 if token == '}':
382 self.__parser_pop()
383 else:
384 self.__parse_object_name(token, string)
385
386 def __parse_object_name(self, token, string):
387 if token == 'string':
388 self.member_name = string
389 self.parse_state = Parser.__parse_object_colon
390 else:
391 self.__error("syntax error parsing object expecting string")
392
393 def __parse_object_colon(self, token, unused_string):
394 if token == ":":
395 self.parse_state = Parser.__parse_object_value
396 else:
397 self.__error("syntax error parsing object expecting ':'")
398
399 def __parse_object_value(self, token, string):
400 self.__parse_value(token, string, Parser.__parse_object_next)
401
402 def __parse_object_next(self, token, unused_string):
403 if token == ",":
404 self.parse_state = Parser.__parse_object_name
405 elif token == "}":
406 self.__parser_pop()
407 else:
408 self.__error("syntax error expecting '}' or ','")
409
410 def __parse_array_init(self, token, string):
411 if token == ']':
412 self.__parser_pop()
413 else:
414 self.__parse_array_value(token, string)
415
416 def __parse_array_value(self, token, string):
417 self.__parse_value(token, string, Parser.__parse_array_next)
418
419 def __parse_array_next(self, token, unused_string):
420 if token == ",":
421 self.parse_state = Parser.__parse_array_value
422 elif token == "]":
423 self.__parser_pop()
424 else:
425 self.__error("syntax error expecting ']' or ','")
426
427 def __parser_input(self, token, string=None):
428 self.lex_state = Parser.__lex_start
429 self.buffer = ""
430 self.parse_state(self, token, string)
431
432 def __put_value(self, value):
433 top = self.stack[-1]
434 if isinstance(top, dict):
435 top[self.member_name] = value
436 else:
437 top.append(value)
438
439 def __parser_push(self, new_json, next_state):
440 if len(self.stack) < Parser.MAX_HEIGHT:
441 if len(self.stack) > 0:
442 self.__put_value(new_json)
443 self.stack.append(new_json)
444 self.parse_state = next_state
445 else:
446 self.__error("input exceeds maximum nesting depth %d" %
447 Parser.MAX_HEIGHT)
448
449 def __push_object(self):
450 self.__parser_push({}, Parser.__parse_object_init)
451
452 def __push_array(self):
453 self.__parser_push([], Parser.__parse_array_init)
454
455 def __parser_pop(self):
456 if len(self.stack) == 1:
457 self.parse_state = Parser.__parse_end
458 if not self.check_trailer:
459 self.done = True
460 else:
461 self.stack.pop()
462 top = self.stack[-1]
463 if isinstance(top, list):
464 self.parse_state = Parser.__parse_array_next
465 else:
466 self.parse_state = Parser.__parse_object_next
467
468 def __parse_value(self, token, string, next_state):
469 number_types = list(six.integer_types)
470 number_types.extend([float])
471 number_types = tuple(number_types)
472 if token in [False, None, True] or isinstance(token, number_types):
473 self.__put_value(token)
474 elif token == 'string':
475 self.__put_value(string)
476 else:
477 if token == '{':
478 self.__push_object()
479 elif token == '[':
480 self.__push_array()
481 else:
482 self.__error("syntax error expecting value")
483 return
484 self.parse_state = next_state
485
486 def __error(self, message):
487 if self.error is None:
488 self.error = ("line %d, column %d, byte %d: %s"
489 % (self.line_number, self.column_number,
490 self.byte_number, message))
491 self.done = True
492
493 def feed(self, s):
494 i = 0
495 while True:
496 if self.done or i >= len(s):
497 return i
498
499 c = s[i]
500 if self.__lex_input(c):
501 self.byte_number += 1
502 if c == '\n':
503 self.column_number = 0
504 self.line_number += 1
505 else:
506 self.column_number += 1
507
508 i += 1
509
510 def is_done(self):
511 return self.done
512
513 def finish(self):
514 if self.lex_state == Parser.__lex_start:
515 pass
516 elif self.lex_state in (Parser.__lex_string,
517 Parser.__lex_string_escape):
518 self.__error("unexpected end of input in quoted string")
519 else:
520 self.__lex_input(" ")
521
522 if self.parse_state == Parser.__parse_start:
523 self.__error("empty input stream")
524 elif self.parse_state != Parser.__parse_end:
525 self.__error("unexpected end of input")
526
527 if self.error is None:
528 assert len(self.stack) == 1
529 return self.stack.pop()
530 else:
531 return self.error