]> git.proxmox.com Git - ovs.git/blob - python/ovs/json.py
Remove dependency on python3-six
[ovs.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 from __future__ import absolute_import
16
17 import functools
18 import json
19 import re
20 import sys
21
22 PARSER_C = 'C'
23 PARSER_PY = 'PYTHON'
24 try:
25 import ovs._json
26 PARSER = PARSER_C
27 except ImportError:
28 PARSER = PARSER_PY
29
30 __pychecker__ = 'no-stringiter'
31
32 SPACES_PER_LEVEL = 2
33 dumper = functools.partial(json.dumps, separators=(",", ":"))
34
35
36 def to_stream(obj, stream, pretty=False, sort_keys=True):
37 stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
38 sort_keys=sort_keys))
39
40
41 def to_file(obj, name, pretty=False, sort_keys=True):
42 with open(name, "w") as stream:
43 to_stream(obj, stream, pretty, sort_keys)
44
45
46 def to_string(obj, pretty=False, sort_keys=True):
47 return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
48 sort_keys=sort_keys)
49
50
51 def from_stream(stream):
52 p = Parser(check_trailer=True)
53 while True:
54 buf = stream.read(4096)
55 if buf == "" or p.feed(buf) != len(buf):
56 break
57 return p.finish()
58
59
60 def from_file(name):
61 stream = open(name, "r")
62 try:
63 return from_stream(stream)
64 finally:
65 stream.close()
66
67
68 def from_string(s):
69 if not isinstance(s, str):
70 # We assume the input is a string. We will only hit this case for a
71 # str in Python 2 which is not unicode, so we need to go ahead and
72 # decode it.
73 try:
74 s = str(s, 'utf-8')
75 except UnicodeDecodeError as e:
76 seq = ' '.join(["0x%2x" % ord(c)
77 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
78 return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
79 p = Parser(check_trailer=True)
80 p.feed(s)
81 return p.finish()
82
83
84 class Parser(object):
85 # Maximum height of parsing stack. #
86 MAX_HEIGHT = 1000
87
88 def __new__(cls, *args, **kwargs):
89 if PARSER == PARSER_C:
90 return ovs._json.Parser(*args, **kwargs)
91 return super(Parser, cls).__new__(cls)
92
93 def __init__(self, check_trailer=False):
94 self.check_trailer = check_trailer
95
96 # Lexical analysis.
97 self.lex_state = Parser.__lex_start
98 self.buffer = ""
99 self.line_number = 0
100 self.column_number = 0
101 self.byte_number = 0
102
103 # Parsing.
104 self.parse_state = Parser.__parse_start
105 self.stack = []
106 self.member_name = None
107
108 # Parse status.
109 self.done = False
110 self.error = None
111
112 def __lex_start_space(self, c):
113 pass
114
115 def __lex_start_alpha(self, c):
116 self.buffer = c
117 self.lex_state = Parser.__lex_keyword
118
119 def __lex_start_token(self, c):
120 self.__parser_input(c)
121
122 def __lex_start_number(self, c):
123 self.buffer = c
124 self.lex_state = Parser.__lex_number
125
126 def __lex_start_string(self, _):
127 self.lex_state = Parser.__lex_string
128
129 def __lex_start_error(self, c):
130 if ord(c) >= 32 and ord(c) < 128:
131 self.__error("invalid character '%s'" % c)
132 else:
133 self.__error("invalid character U+%04x" % ord(c))
134
135 __lex_start_actions = {}
136 for c in " \t\n\r":
137 __lex_start_actions[c] = __lex_start_space
138 for c in "abcdefghijklmnopqrstuvwxyz":
139 __lex_start_actions[c] = __lex_start_alpha
140 for c in "[{]}:,":
141 __lex_start_actions[c] = __lex_start_token
142 for c in "-0123456789":
143 __lex_start_actions[c] = __lex_start_number
144 __lex_start_actions['"'] = __lex_start_string
145
146 def __lex_start(self, c):
147 Parser.__lex_start_actions.get(
148 c, Parser.__lex_start_error)(self, c)
149 return True
150
151 __lex_alpha = {}
152 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
153 __lex_alpha[c] = True
154
155 def __lex_finish_keyword(self):
156 if self.buffer == "false":
157 self.__parser_input(False)
158 elif self.buffer == "true":
159 self.__parser_input(True)
160 elif self.buffer == "null":
161 self.__parser_input(None)
162 else:
163 self.__error("invalid keyword '%s'" % self.buffer)
164
165 def __lex_keyword(self, c):
166 if c in Parser.__lex_alpha:
167 self.buffer += c
168 return True
169 else:
170 self.__lex_finish_keyword()
171 return False
172
173 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
174 r"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
175
176 def __lex_finish_number(self):
177 s = self.buffer
178 m = Parser.__number_re.match(s)
179 if m:
180 sign, integer, fraction, exp = m.groups()
181 if (exp is not None and
182 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
183 self.__error("exponent outside valid range")
184 return
185
186 if fraction is not None and len(fraction.lstrip('0')) == 0:
187 fraction = None
188
189 sig_string = integer
190 if fraction is not None:
191 sig_string += fraction
192 significand = int(sig_string)
193
194 pow10 = 0
195 if fraction is not None:
196 pow10 -= len(fraction)
197 if exp is not None:
198 pow10 += int(exp)
199
200 if significand == 0:
201 self.__parser_input(0)
202 return
203 elif significand <= 2 ** 63:
204 while pow10 > 0 and significand <= 2 ** 63:
205 significand *= 10
206 pow10 -= 1
207 while pow10 < 0 and significand % 10 == 0:
208 significand //= 10
209 pow10 += 1
210 if (pow10 == 0 and
211 ((not sign and significand < 2 ** 63) or
212 (sign and significand <= 2 ** 63))):
213 if sign:
214 self.__parser_input(-significand)
215 else:
216 self.__parser_input(significand)
217 return
218
219 value = float(s)
220 if value == float("inf") or value == float("-inf"):
221 self.__error("number outside valid range")
222 return
223 if value == 0:
224 # Suppress negative zero.
225 value = 0
226 self.__parser_input(value)
227 elif re.match("-?0[0-9]", s):
228 self.__error("leading zeros not allowed")
229 elif re.match("-([^0-9]|$)", s):
230 self.__error("'-' must be followed by digit")
231 elif re.match(r"-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
232 self.__error("decimal point must be followed by digit")
233 elif re.search("e[-+]?([^0-9]|$)", s):
234 self.__error("exponent must contain at least one digit")
235 else:
236 self.__error("syntax error in number")
237
238 def __lex_number(self, c):
239 if c in ".0123456789eE-+":
240 self.buffer += c
241 return True
242 else:
243 self.__lex_finish_number()
244 return False
245
246 __4hex_re = re.compile("[0-9a-fA-F]{4}")
247
248 def __lex_4hex(self, s):
249 if len(s) < 4:
250 self.__error("quoted string ends within \\u escape")
251 elif not Parser.__4hex_re.match(s):
252 self.__error("malformed \\u escape")
253 elif s == "0000":
254 self.__error("null bytes not supported in quoted strings")
255 else:
256 return int(s, 16)
257
258 @staticmethod
259 def __is_leading_surrogate(c):
260 """Returns true if 'c' is a Unicode code point for a leading
261 surrogate."""
262 return c >= 0xd800 and c <= 0xdbff
263
264 @staticmethod
265 def __is_trailing_surrogate(c):
266 """Returns true if 'c' is a Unicode code point for a trailing
267 surrogate."""
268 return c >= 0xdc00 and c <= 0xdfff
269
270 @staticmethod
271 def __utf16_decode_surrogate_pair(leading, trailing):
272 """Returns the unicode code point corresponding to leading surrogate
273 'leading' and trailing surrogate 'trailing'. The return value will not
274 make any sense if 'leading' or 'trailing' are not in the correct ranges
275 for leading or trailing surrogates."""
276 # Leading surrogate: 110110wwwwxxxxxx
277 # Trailing surrogate: 110111xxxxxxxxxx
278 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
279 w = (leading >> 6) & 0xf
280 u = w + 1
281 x0 = leading & 0x3f
282 x1 = trailing & 0x3ff
283 return (u << 16) | (x0 << 10) | x1
284 __unescape = {'"': u'"',
285 "\\": u"\\",
286 "/": u"/",
287 "b": u"\b",
288 "f": u"\f",
289 "n": u"\n",
290 "r": u"\r",
291 "t": u"\t"}
292
293 def __lex_finish_string(self):
294 inp = self.buffer
295 out = u""
296 while len(inp):
297 backslash = inp.find('\\')
298 if backslash == -1:
299 out += inp
300 break
301 out += inp[:backslash]
302 inp = inp[backslash + 1:]
303 if inp == "":
304 self.__error("quoted string may not end with backslash")
305 return
306
307 replacement = Parser.__unescape.get(inp[0])
308 if replacement is not None:
309 out += replacement
310 inp = inp[1:]
311 continue
312 elif inp[0] != u'u':
313 self.__error("bad escape \\%s" % inp[0])
314 return
315
316 c0 = self.__lex_4hex(inp[1:5])
317 if c0 is None:
318 return
319 inp = inp[5:]
320
321 if Parser.__is_leading_surrogate(c0):
322 if inp[:2] != u'\\u':
323 self.__error("malformed escaped surrogate pair")
324 return
325 c1 = self.__lex_4hex(inp[2:6])
326 if c1 is None:
327 return
328 if not Parser.__is_trailing_surrogate(c1):
329 self.__error("second half of escaped surrogate pair is "
330 "not trailing surrogate")
331 return
332 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
333 inp = inp[6:]
334 else:
335 code_point = c0
336 out += chr(code_point)
337 self.__parser_input('string', out)
338
339 def __lex_string_escape(self, c):
340 self.buffer += c
341 self.lex_state = Parser.__lex_string
342 return True
343
344 def __lex_string(self, c):
345 if c == '\\':
346 self.buffer += c
347 self.lex_state = Parser.__lex_string_escape
348 elif c == '"':
349 self.__lex_finish_string()
350 elif ord(c) >= 0x20:
351 self.buffer += c
352 else:
353 self.__error("U+%04X must be escaped in quoted string" % ord(c))
354 return True
355
356 def __lex_input(self, c):
357 eat = self.lex_state(self, c)
358 assert eat is True or eat is False
359 return eat
360
361 def __parse_start(self, token, unused_string):
362 if token == '{':
363 self.__push_object()
364 elif token == '[':
365 self.__push_array()
366 else:
367 self.__error("syntax error at beginning of input")
368
369 def __parse_end(self, unused_token, unused_string):
370 self.__error("trailing garbage at end of input")
371
372 def __parse_object_init(self, token, string):
373 if token == '}':
374 self.__parser_pop()
375 else:
376 self.__parse_object_name(token, string)
377
378 def __parse_object_name(self, token, string):
379 if token == 'string':
380 self.member_name = string
381 self.parse_state = Parser.__parse_object_colon
382 else:
383 self.__error("syntax error parsing object expecting string")
384
385 def __parse_object_colon(self, token, unused_string):
386 if token == ":":
387 self.parse_state = Parser.__parse_object_value
388 else:
389 self.__error("syntax error parsing object expecting ':'")
390
391 def __parse_object_value(self, token, string):
392 self.__parse_value(token, string, Parser.__parse_object_next)
393
394 def __parse_object_next(self, token, unused_string):
395 if token == ",":
396 self.parse_state = Parser.__parse_object_name
397 elif token == "}":
398 self.__parser_pop()
399 else:
400 self.__error("syntax error expecting '}' or ','")
401
402 def __parse_array_init(self, token, string):
403 if token == ']':
404 self.__parser_pop()
405 else:
406 self.__parse_array_value(token, string)
407
408 def __parse_array_value(self, token, string):
409 self.__parse_value(token, string, Parser.__parse_array_next)
410
411 def __parse_array_next(self, token, unused_string):
412 if token == ",":
413 self.parse_state = Parser.__parse_array_value
414 elif token == "]":
415 self.__parser_pop()
416 else:
417 self.__error("syntax error expecting ']' or ','")
418
419 def __parser_input(self, token, string=None):
420 self.lex_state = Parser.__lex_start
421 self.buffer = ""
422 self.parse_state(self, token, string)
423
424 def __put_value(self, value):
425 top = self.stack[-1]
426 if isinstance(top, dict):
427 top[self.member_name] = value
428 else:
429 top.append(value)
430
431 def __parser_push(self, new_json, next_state):
432 if len(self.stack) < Parser.MAX_HEIGHT:
433 if len(self.stack) > 0:
434 self.__put_value(new_json)
435 self.stack.append(new_json)
436 self.parse_state = next_state
437 else:
438 self.__error("input exceeds maximum nesting depth %d" %
439 Parser.MAX_HEIGHT)
440
441 def __push_object(self):
442 self.__parser_push({}, Parser.__parse_object_init)
443
444 def __push_array(self):
445 self.__parser_push([], Parser.__parse_array_init)
446
447 def __parser_pop(self):
448 if len(self.stack) == 1:
449 self.parse_state = Parser.__parse_end
450 if not self.check_trailer:
451 self.done = True
452 else:
453 self.stack.pop()
454 top = self.stack[-1]
455 if isinstance(top, list):
456 self.parse_state = Parser.__parse_array_next
457 else:
458 self.parse_state = Parser.__parse_object_next
459
460 def __parse_value(self, token, string, next_state):
461 number_types = [int]
462 number_types.extend([float])
463 number_types = tuple(number_types)
464 if token in [False, None, True] or isinstance(token, number_types):
465 self.__put_value(token)
466 elif token == 'string':
467 self.__put_value(string)
468 else:
469 if token == '{':
470 self.__push_object()
471 elif token == '[':
472 self.__push_array()
473 else:
474 self.__error("syntax error expecting value")
475 return
476 self.parse_state = next_state
477
478 def __error(self, message):
479 if self.error is None:
480 self.error = ("line %d, column %d, byte %d: %s"
481 % (self.line_number, self.column_number,
482 self.byte_number, message))
483 self.done = True
484
485 def feed(self, s):
486 i = 0
487 while True:
488 if self.done or i >= len(s):
489 return i
490
491 c = s[i]
492 if self.__lex_input(c):
493 self.byte_number += 1
494 if c == '\n':
495 self.column_number = 0
496 self.line_number += 1
497 else:
498 self.column_number += 1
499
500 i += 1
501
502 def is_done(self):
503 return self.done
504
505 def finish(self):
506 if self.lex_state == Parser.__lex_start:
507 pass
508 elif self.lex_state in (Parser.__lex_string,
509 Parser.__lex_string_escape):
510 self.__error("unexpected end of input in quoted string")
511 else:
512 self.__lex_input(" ")
513
514 if self.parse_state == Parser.__parse_start:
515 self.__error("empty input stream")
516 elif self.parse_state != Parser.__parse_end:
517 self.__error("unexpected end of input")
518
519 if self.error is None:
520 assert len(self.stack) == 1
521 return self.stack.pop()
522 else:
523 return self.error