]> git.proxmox.com Git - mirror_ovs.git/blame - python/ovs/json.py
Remove dependency on python3-six
[mirror_ovs.git] / python / ovs / json.py
CommitLineData
e0edde6f 1# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
99155935
BP
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
622749d8 15from __future__ import absolute_import
6c7050b5 16
622749d8
TW
17import functools
18import json
99155935 19import re
99155935
BP
20import sys
21
75ff7116
TW
22PARSER_C = 'C'
23PARSER_PY = 'PYTHON'
c63b04d6
TW
24try:
25 import ovs._json
75ff7116 26 PARSER = PARSER_C
c63b04d6 27except ImportError:
75ff7116 28 PARSER = PARSER_PY
c63b04d6 29
26bb0f31
EJ
30__pychecker__ = 'no-stringiter'
31
cba64103 32SPACES_PER_LEVEL = 2
0c4d144a 33dumper = functools.partial(json.dumps, separators=(",", ":"))
cba64103
BP
34
35
36def to_stream(obj, stream, pretty=False, sort_keys=True):
622749d8
TW
37 stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
38 sort_keys=sort_keys))
99155935 39
26bb0f31 40
99155935 41def to_file(obj, name, pretty=False, sort_keys=True):
622749d8 42 with open(name, "w") as stream:
99155935 43 to_stream(obj, stream, pretty, sort_keys)
99155935 44
26bb0f31 45
99155935 46def to_string(obj, pretty=False, sort_keys=True):
622749d8
TW
47 return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
48 sort_keys=sort_keys)
99155935 49
26bb0f31 50
99155935
BP
51def from_stream(stream):
52 p = Parser(check_trailer=True)
53 while True:
54 buf = stream.read(4096)
55 if buf == "" or p.feed(buf) != len(buf):
56 break
57 return p.finish()
58
26bb0f31 59
99155935
BP
60def from_file(name):
61 stream = open(name, "r")
62 try:
63 return from_stream(stream)
64 finally:
65 stream.close()
66
26bb0f31 67
99155935 68def from_string(s):
0c4d144a 69 if not isinstance(s, str):
25f599fb
RB
70 # We assume the input is a string. We will only hit this case for a
71 # str in Python 2 which is not unicode, so we need to go ahead and
72 # decode it.
73 try:
0c4d144a 74 s = str(s, 'utf-8')
25f599fb
RB
75 except UnicodeDecodeError as e:
76 seq = ' '.join(["0x%2x" % ord(c)
77 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
78 return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
99155935
BP
79 p = Parser(check_trailer=True)
80 p.feed(s)
81 return p.finish()
82
26bb0f31 83
99155935 84class Parser(object):
a0631d92 85 # Maximum height of parsing stack. #
99155935
BP
86 MAX_HEIGHT = 1000
87
c63b04d6 88 def __new__(cls, *args, **kwargs):
75ff7116 89 if PARSER == PARSER_C:
c63b04d6 90 return ovs._json.Parser(*args, **kwargs)
75ff7116 91 return super(Parser, cls).__new__(cls)
c63b04d6 92
99155935
BP
93 def __init__(self, check_trailer=False):
94 self.check_trailer = check_trailer
95
96 # Lexical analysis.
97 self.lex_state = Parser.__lex_start
98 self.buffer = ""
99 self.line_number = 0
100 self.column_number = 0
101 self.byte_number = 0
26bb0f31 102
99155935
BP
103 # Parsing.
104 self.parse_state = Parser.__parse_start
105 self.stack = []
106 self.member_name = None
107
108 # Parse status.
109 self.done = False
110 self.error = None
111
112 def __lex_start_space(self, c):
113 pass
26bb0f31 114
99155935
BP
115 def __lex_start_alpha(self, c):
116 self.buffer = c
117 self.lex_state = Parser.__lex_keyword
26bb0f31 118
99155935
BP
119 def __lex_start_token(self, c):
120 self.__parser_input(c)
26bb0f31 121
99155935
BP
122 def __lex_start_number(self, c):
123 self.buffer = c
124 self.lex_state = Parser.__lex_number
26bb0f31 125
28c781df 126 def __lex_start_string(self, _):
99155935 127 self.lex_state = Parser.__lex_string
26bb0f31 128
99155935
BP
129 def __lex_start_error(self, c):
130 if ord(c) >= 32 and ord(c) < 128:
131 self.__error("invalid character '%s'" % c)
132 else:
133 self.__error("invalid character U+%04x" % ord(c))
134
135 __lex_start_actions = {}
136 for c in " \t\n\r":
137 __lex_start_actions[c] = __lex_start_space
138 for c in "abcdefghijklmnopqrstuvwxyz":
139 __lex_start_actions[c] = __lex_start_alpha
140 for c in "[{]}:,":
141 __lex_start_actions[c] = __lex_start_token
142 for c in "-0123456789":
143 __lex_start_actions[c] = __lex_start_number
144 __lex_start_actions['"'] = __lex_start_string
26bb0f31 145
99155935
BP
146 def __lex_start(self, c):
147 Parser.__lex_start_actions.get(
148 c, Parser.__lex_start_error)(self, c)
149 return True
150
151 __lex_alpha = {}
152 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
153 __lex_alpha[c] = True
26bb0f31 154
99155935
BP
155 def __lex_finish_keyword(self):
156 if self.buffer == "false":
157 self.__parser_input(False)
158 elif self.buffer == "true":
159 self.__parser_input(True)
160 elif self.buffer == "null":
161 self.__parser_input(None)
162 else:
163 self.__error("invalid keyword '%s'" % self.buffer)
26bb0f31 164
99155935
BP
165 def __lex_keyword(self, c):
166 if c in Parser.__lex_alpha:
167 self.buffer += c
168 return True
169 else:
170 self.__lex_finish_keyword()
171 return False
172
26bb0f31 173 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
145a7e88 174 r"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
26bb0f31 175
99155935
BP
176 def __lex_finish_number(self):
177 s = self.buffer
178 m = Parser.__number_re.match(s)
179 if m:
26bb0f31 180 sign, integer, fraction, exp = m.groups()
99155935 181 if (exp is not None and
d36bbd37 182 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
99155935
BP
183 self.__error("exponent outside valid range")
184 return
185
186 if fraction is not None and len(fraction.lstrip('0')) == 0:
187 fraction = None
188
189 sig_string = integer
190 if fraction is not None:
191 sig_string += fraction
192 significand = int(sig_string)
193
194 pow10 = 0
195 if fraction is not None:
196 pow10 -= len(fraction)
197 if exp is not None:
8f808842 198 pow10 += int(exp)
99155935
BP
199
200 if significand == 0:
201 self.__parser_input(0)
202 return
26bb0f31 203 elif significand <= 2 ** 63:
cd1b3f63 204 while pow10 > 0 and significand <= 2 ** 63:
99155935
BP
205 significand *= 10
206 pow10 -= 1
207 while pow10 < 0 and significand % 10 == 0:
2c362f17 208 significand //= 10
99155935
BP
209 pow10 += 1
210 if (pow10 == 0 and
26bb0f31
EJ
211 ((not sign and significand < 2 ** 63) or
212 (sign and significand <= 2 ** 63))):
99155935
BP
213 if sign:
214 self.__parser_input(-significand)
215 else:
216 self.__parser_input(significand)
217 return
218
219 value = float(s)
220 if value == float("inf") or value == float("-inf"):
221 self.__error("number outside valid range")
222 return
223 if value == 0:
224 # Suppress negative zero.
225 value = 0
226 self.__parser_input(value)
227 elif re.match("-?0[0-9]", s):
228 self.__error("leading zeros not allowed")
229 elif re.match("-([^0-9]|$)", s):
230 self.__error("'-' must be followed by digit")
145a7e88 231 elif re.match(r"-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
99155935
BP
232 self.__error("decimal point must be followed by digit")
233 elif re.search("e[-+]?([^0-9]|$)", s):
234 self.__error("exponent must contain at least one digit")
235 else:
236 self.__error("syntax error in number")
26bb0f31 237
99155935
BP
238 def __lex_number(self, c):
239 if c in ".0123456789eE-+":
240 self.buffer += c
241 return True
242 else:
243 self.__lex_finish_number()
244 return False
245
246 __4hex_re = re.compile("[0-9a-fA-F]{4}")
26bb0f31 247
99155935
BP
248 def __lex_4hex(self, s):
249 if len(s) < 4:
250 self.__error("quoted string ends within \\u escape")
251 elif not Parser.__4hex_re.match(s):
252 self.__error("malformed \\u escape")
253 elif s == "0000":
254 self.__error("null bytes not supported in quoted strings")
255 else:
256 return int(s, 16)
26bb0f31 257
99155935
BP
258 @staticmethod
259 def __is_leading_surrogate(c):
260 """Returns true if 'c' is a Unicode code point for a leading
261 surrogate."""
262 return c >= 0xd800 and c <= 0xdbff
26bb0f31 263
99155935
BP
264 @staticmethod
265 def __is_trailing_surrogate(c):
266 """Returns true if 'c' is a Unicode code point for a trailing
267 surrogate."""
268 return c >= 0xdc00 and c <= 0xdfff
26bb0f31 269
99155935
BP
270 @staticmethod
271 def __utf16_decode_surrogate_pair(leading, trailing):
272 """Returns the unicode code point corresponding to leading surrogate
273 'leading' and trailing surrogate 'trailing'. The return value will not
274 make any sense if 'leading' or 'trailing' are not in the correct ranges
275 for leading or trailing surrogates."""
276 # Leading surrogate: 110110wwwwxxxxxx
277 # Trailing surrogate: 110111xxxxxxxxxx
278 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
279 w = (leading >> 6) & 0xf
280 u = w + 1
281 x0 = leading & 0x3f
282 x1 = trailing & 0x3ff
283 return (u << 16) | (x0 << 10) | x1
284 __unescape = {'"': u'"',
285 "\\": u"\\",
286 "/": u"/",
287 "b": u"\b",
288 "f": u"\f",
289 "n": u"\n",
290 "r": u"\r",
291 "t": u"\t"}
26bb0f31 292
99155935
BP
293 def __lex_finish_string(self):
294 inp = self.buffer
295 out = u""
296 while len(inp):
297 backslash = inp.find('\\')
298 if backslash == -1:
299 out += inp
300 break
301 out += inp[:backslash]
302 inp = inp[backslash + 1:]
303 if inp == "":
304 self.__error("quoted string may not end with backslash")
305 return
306
307 replacement = Parser.__unescape.get(inp[0])
308 if replacement is not None:
309 out += replacement
310 inp = inp[1:]
311 continue
312 elif inp[0] != u'u':
313 self.__error("bad escape \\%s" % inp[0])
314 return
26bb0f31 315
99155935
BP
316 c0 = self.__lex_4hex(inp[1:5])
317 if c0 is None:
318 return
319 inp = inp[5:]
320
321 if Parser.__is_leading_surrogate(c0):
322 if inp[:2] != u'\\u':
323 self.__error("malformed escaped surrogate pair")
324 return
325 c1 = self.__lex_4hex(inp[2:6])
326 if c1 is None:
327 return
328 if not Parser.__is_trailing_surrogate(c1):
329 self.__error("second half of escaped surrogate pair is "
330 "not trailing surrogate")
331 return
332 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
333 inp = inp[6:]
334 else:
335 code_point = c0
0c4d144a 336 out += chr(code_point)
99155935
BP
337 self.__parser_input('string', out)
338
339 def __lex_string_escape(self, c):
340 self.buffer += c
341 self.lex_state = Parser.__lex_string
342 return True
26bb0f31 343
99155935
BP
344 def __lex_string(self, c):
345 if c == '\\':
346 self.buffer += c
347 self.lex_state = Parser.__lex_string_escape
348 elif c == '"':
349 self.__lex_finish_string()
350 elif ord(c) >= 0x20:
351 self.buffer += c
352 else:
353 self.__error("U+%04X must be escaped in quoted string" % ord(c))
354 return True
355
356 def __lex_input(self, c):
99155935
BP
357 eat = self.lex_state(self, c)
358 assert eat is True or eat is False
359 return eat
360
28c781df 361 def __parse_start(self, token, unused_string):
99155935
BP
362 if token == '{':
363 self.__push_object()
364 elif token == '[':
365 self.__push_array()
366 else:
367 self.__error("syntax error at beginning of input")
26bb0f31 368
28c781df 369 def __parse_end(self, unused_token, unused_string):
99155935 370 self.__error("trailing garbage at end of input")
26bb0f31 371
99155935
BP
372 def __parse_object_init(self, token, string):
373 if token == '}':
374 self.__parser_pop()
375 else:
376 self.__parse_object_name(token, string)
26bb0f31 377
99155935
BP
378 def __parse_object_name(self, token, string):
379 if token == 'string':
380 self.member_name = string
381 self.parse_state = Parser.__parse_object_colon
382 else:
383 self.__error("syntax error parsing object expecting string")
26bb0f31 384
28c781df 385 def __parse_object_colon(self, token, unused_string):
99155935
BP
386 if token == ":":
387 self.parse_state = Parser.__parse_object_value
388 else:
389 self.__error("syntax error parsing object expecting ':'")
26bb0f31 390
99155935
BP
391 def __parse_object_value(self, token, string):
392 self.__parse_value(token, string, Parser.__parse_object_next)
26bb0f31 393
28c781df 394 def __parse_object_next(self, token, unused_string):
99155935
BP
395 if token == ",":
396 self.parse_state = Parser.__parse_object_name
397 elif token == "}":
398 self.__parser_pop()
399 else:
400 self.__error("syntax error expecting '}' or ','")
26bb0f31 401
99155935
BP
402 def __parse_array_init(self, token, string):
403 if token == ']':
404 self.__parser_pop()
405 else:
406 self.__parse_array_value(token, string)
26bb0f31 407
99155935
BP
408 def __parse_array_value(self, token, string):
409 self.__parse_value(token, string, Parser.__parse_array_next)
26bb0f31 410
28c781df 411 def __parse_array_next(self, token, unused_string):
99155935
BP
412 if token == ",":
413 self.parse_state = Parser.__parse_array_value
414 elif token == "]":
415 self.__parser_pop()
416 else:
417 self.__error("syntax error expecting ']' or ','")
26bb0f31 418
99155935
BP
419 def __parser_input(self, token, string=None):
420 self.lex_state = Parser.__lex_start
421 self.buffer = ""
99155935 422 self.parse_state(self, token, string)
99155935
BP
423
424 def __put_value(self, value):
425 top = self.stack[-1]
da2d45c6 426 if isinstance(top, dict):
99155935
BP
427 top[self.member_name] = value
428 else:
429 top.append(value)
430
431 def __parser_push(self, new_json, next_state):
432 if len(self.stack) < Parser.MAX_HEIGHT:
433 if len(self.stack) > 0:
434 self.__put_value(new_json)
435 self.stack.append(new_json)
436 self.parse_state = next_state
437 else:
438 self.__error("input exceeds maximum nesting depth %d" %
439 Parser.MAX_HEIGHT)
26bb0f31 440
99155935
BP
441 def __push_object(self):
442 self.__parser_push({}, Parser.__parse_object_init)
26bb0f31 443
99155935
BP
444 def __push_array(self):
445 self.__parser_push([], Parser.__parse_array_init)
446
447 def __parser_pop(self):
448 if len(self.stack) == 1:
449 self.parse_state = Parser.__parse_end
450 if not self.check_trailer:
451 self.done = True
452 else:
453 self.stack.pop()
454 top = self.stack[-1]
da2d45c6 455 if isinstance(top, list):
99155935
BP
456 self.parse_state = Parser.__parse_array_next
457 else:
458 self.parse_state = Parser.__parse_object_next
459
460 def __parse_value(self, token, string, next_state):
0c4d144a 461 number_types = [int]
8f808842
RB
462 number_types.extend([float])
463 number_types = tuple(number_types)
464 if token in [False, None, True] or isinstance(token, number_types):
99155935
BP
465 self.__put_value(token)
466 elif token == 'string':
467 self.__put_value(string)
468 else:
469 if token == '{':
470 self.__push_object()
471 elif token == '[':
472 self.__push_array()
473 else:
474 self.__error("syntax error expecting value")
475 return
476 self.parse_state = next_state
477
478 def __error(self, message):
479 if self.error is None:
480 self.error = ("line %d, column %d, byte %d: %s"
481 % (self.line_number, self.column_number,
482 self.byte_number, message))
483 self.done = True
484
485 def feed(self, s):
486 i = 0
487 while True:
488 if self.done or i >= len(s):
489 return i
c640c04f
BP
490
491 c = s[i]
492 if self.__lex_input(c):
493 self.byte_number += 1
494 if c == '\n':
495 self.column_number = 0
496 self.line_number += 1
497 else:
498 self.column_number += 1
499
99155935
BP
500 i += 1
501
502 def is_done(self):
503 return self.done
504
505 def finish(self):
506 if self.lex_state == Parser.__lex_start:
507 pass
508 elif self.lex_state in (Parser.__lex_string,
509 Parser.__lex_string_escape):
510 self.__error("unexpected end of input in quoted string")
511 else:
512 self.__lex_input(" ")
513
514 if self.parse_state == Parser.__parse_start:
515 self.__error("empty input stream")
516 elif self.parse_state != Parser.__parse_end:
517 self.__error("unexpected end of input")
518
3c057118 519 if self.error is None:
99155935
BP
520 assert len(self.stack) == 1
521 return self.stack.pop()
522 else:
523 return self.error