]> git.proxmox.com Git - ovs.git/blame - python/ovs/json.py
compat: Fixups for newer kernels
[ovs.git] / python / ovs / json.py
CommitLineData
e0edde6f 1# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
99155935
BP
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
622749d8 15from __future__ import absolute_import
6c7050b5 16
622749d8
TW
17import functools
18import json
99155935 19import re
99155935
BP
20import sys
21
cb96c1b2 22import six
b3ac2947 23
c63b04d6
TW
24try:
25 import ovs._json
26except ImportError:
27 pass
28
26bb0f31
EJ
29__pychecker__ = 'no-stringiter'
30
cba64103 31SPACES_PER_LEVEL = 2
e7164d96
LR
32_dumper = functools.partial(json.dumps, separators=(",", ":"))
33
34if six.PY2:
35 def dumper(*args, **kwargs):
36 return _dumper(*args, **kwargs).decode('raw-unicode-escape')
37else:
38 dumper = _dumper
cba64103
BP
39
40
41def to_stream(obj, stream, pretty=False, sort_keys=True):
622749d8
TW
42 stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
43 sort_keys=sort_keys))
99155935 44
26bb0f31 45
99155935 46def to_file(obj, name, pretty=False, sort_keys=True):
622749d8 47 with open(name, "w") as stream:
99155935 48 to_stream(obj, stream, pretty, sort_keys)
99155935 49
26bb0f31 50
99155935 51def to_string(obj, pretty=False, sort_keys=True):
622749d8
TW
52 return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
53 sort_keys=sort_keys)
99155935 54
26bb0f31 55
99155935
BP
56def from_stream(stream):
57 p = Parser(check_trailer=True)
58 while True:
59 buf = stream.read(4096)
60 if buf == "" or p.feed(buf) != len(buf):
61 break
62 return p.finish()
63
26bb0f31 64
99155935
BP
65def from_file(name):
66 stream = open(name, "r")
67 try:
68 return from_stream(stream)
69 finally:
70 stream.close()
71
26bb0f31 72
99155935 73def from_string(s):
25f599fb
RB
74 if not isinstance(s, six.text_type):
75 # We assume the input is a string. We will only hit this case for a
76 # str in Python 2 which is not unicode, so we need to go ahead and
77 # decode it.
78 try:
79 s = six.text_type(s, 'utf-8')
80 except UnicodeDecodeError as e:
81 seq = ' '.join(["0x%2x" % ord(c)
82 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
83 return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
99155935
BP
84 p = Parser(check_trailer=True)
85 p.feed(s)
86 return p.finish()
87
26bb0f31 88
99155935 89class Parser(object):
a0631d92 90 # Maximum height of parsing stack. #
99155935
BP
91 MAX_HEIGHT = 1000
92
c63b04d6
TW
93 def __new__(cls, *args, **kwargs):
94 try:
95 return ovs._json.Parser(*args, **kwargs)
96 except NameError:
97 return super(Parser, cls).__new__(cls)
98
99155935
BP
99 def __init__(self, check_trailer=False):
100 self.check_trailer = check_trailer
101
102 # Lexical analysis.
103 self.lex_state = Parser.__lex_start
104 self.buffer = ""
105 self.line_number = 0
106 self.column_number = 0
107 self.byte_number = 0
26bb0f31 108
99155935
BP
109 # Parsing.
110 self.parse_state = Parser.__parse_start
111 self.stack = []
112 self.member_name = None
113
114 # Parse status.
115 self.done = False
116 self.error = None
117
118 def __lex_start_space(self, c):
119 pass
26bb0f31 120
99155935
BP
121 def __lex_start_alpha(self, c):
122 self.buffer = c
123 self.lex_state = Parser.__lex_keyword
26bb0f31 124
99155935
BP
125 def __lex_start_token(self, c):
126 self.__parser_input(c)
26bb0f31 127
99155935
BP
128 def __lex_start_number(self, c):
129 self.buffer = c
130 self.lex_state = Parser.__lex_number
26bb0f31 131
28c781df 132 def __lex_start_string(self, _):
99155935 133 self.lex_state = Parser.__lex_string
26bb0f31 134
99155935
BP
135 def __lex_start_error(self, c):
136 if ord(c) >= 32 and ord(c) < 128:
137 self.__error("invalid character '%s'" % c)
138 else:
139 self.__error("invalid character U+%04x" % ord(c))
140
141 __lex_start_actions = {}
142 for c in " \t\n\r":
143 __lex_start_actions[c] = __lex_start_space
144 for c in "abcdefghijklmnopqrstuvwxyz":
145 __lex_start_actions[c] = __lex_start_alpha
146 for c in "[{]}:,":
147 __lex_start_actions[c] = __lex_start_token
148 for c in "-0123456789":
149 __lex_start_actions[c] = __lex_start_number
150 __lex_start_actions['"'] = __lex_start_string
26bb0f31 151
99155935
BP
152 def __lex_start(self, c):
153 Parser.__lex_start_actions.get(
154 c, Parser.__lex_start_error)(self, c)
155 return True
156
157 __lex_alpha = {}
158 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
159 __lex_alpha[c] = True
26bb0f31 160
99155935
BP
161 def __lex_finish_keyword(self):
162 if self.buffer == "false":
163 self.__parser_input(False)
164 elif self.buffer == "true":
165 self.__parser_input(True)
166 elif self.buffer == "null":
167 self.__parser_input(None)
168 else:
169 self.__error("invalid keyword '%s'" % self.buffer)
26bb0f31 170
99155935
BP
171 def __lex_keyword(self, c):
172 if c in Parser.__lex_alpha:
173 self.buffer += c
174 return True
175 else:
176 self.__lex_finish_keyword()
177 return False
178
26bb0f31
EJ
179 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
180 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
181
99155935
BP
182 def __lex_finish_number(self):
183 s = self.buffer
184 m = Parser.__number_re.match(s)
185 if m:
26bb0f31 186 sign, integer, fraction, exp = m.groups()
99155935 187 if (exp is not None and
d36bbd37 188 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
99155935
BP
189 self.__error("exponent outside valid range")
190 return
191
192 if fraction is not None and len(fraction.lstrip('0')) == 0:
193 fraction = None
194
195 sig_string = integer
196 if fraction is not None:
197 sig_string += fraction
198 significand = int(sig_string)
199
200 pow10 = 0
201 if fraction is not None:
202 pow10 -= len(fraction)
203 if exp is not None:
8f808842 204 pow10 += int(exp)
99155935
BP
205
206 if significand == 0:
207 self.__parser_input(0)
208 return
26bb0f31 209 elif significand <= 2 ** 63:
cd1b3f63 210 while pow10 > 0 and significand <= 2 ** 63:
99155935
BP
211 significand *= 10
212 pow10 -= 1
213 while pow10 < 0 and significand % 10 == 0:
2c362f17 214 significand //= 10
99155935
BP
215 pow10 += 1
216 if (pow10 == 0 and
26bb0f31
EJ
217 ((not sign and significand < 2 ** 63) or
218 (sign and significand <= 2 ** 63))):
99155935
BP
219 if sign:
220 self.__parser_input(-significand)
221 else:
222 self.__parser_input(significand)
223 return
224
225 value = float(s)
226 if value == float("inf") or value == float("-inf"):
227 self.__error("number outside valid range")
228 return
229 if value == 0:
230 # Suppress negative zero.
231 value = 0
232 self.__parser_input(value)
233 elif re.match("-?0[0-9]", s):
234 self.__error("leading zeros not allowed")
235 elif re.match("-([^0-9]|$)", s):
236 self.__error("'-' must be followed by digit")
237 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
238 self.__error("decimal point must be followed by digit")
239 elif re.search("e[-+]?([^0-9]|$)", s):
240 self.__error("exponent must contain at least one digit")
241 else:
242 self.__error("syntax error in number")
26bb0f31 243
99155935
BP
244 def __lex_number(self, c):
245 if c in ".0123456789eE-+":
246 self.buffer += c
247 return True
248 else:
249 self.__lex_finish_number()
250 return False
251
252 __4hex_re = re.compile("[0-9a-fA-F]{4}")
26bb0f31 253
99155935
BP
254 def __lex_4hex(self, s):
255 if len(s) < 4:
256 self.__error("quoted string ends within \\u escape")
257 elif not Parser.__4hex_re.match(s):
258 self.__error("malformed \\u escape")
259 elif s == "0000":
260 self.__error("null bytes not supported in quoted strings")
261 else:
262 return int(s, 16)
26bb0f31 263
99155935
BP
264 @staticmethod
265 def __is_leading_surrogate(c):
266 """Returns true if 'c' is a Unicode code point for a leading
267 surrogate."""
268 return c >= 0xd800 and c <= 0xdbff
26bb0f31 269
99155935
BP
270 @staticmethod
271 def __is_trailing_surrogate(c):
272 """Returns true if 'c' is a Unicode code point for a trailing
273 surrogate."""
274 return c >= 0xdc00 and c <= 0xdfff
26bb0f31 275
99155935
BP
276 @staticmethod
277 def __utf16_decode_surrogate_pair(leading, trailing):
278 """Returns the unicode code point corresponding to leading surrogate
279 'leading' and trailing surrogate 'trailing'. The return value will not
280 make any sense if 'leading' or 'trailing' are not in the correct ranges
281 for leading or trailing surrogates."""
282 # Leading surrogate: 110110wwwwxxxxxx
283 # Trailing surrogate: 110111xxxxxxxxxx
284 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
285 w = (leading >> 6) & 0xf
286 u = w + 1
287 x0 = leading & 0x3f
288 x1 = trailing & 0x3ff
289 return (u << 16) | (x0 << 10) | x1
290 __unescape = {'"': u'"',
291 "\\": u"\\",
292 "/": u"/",
293 "b": u"\b",
294 "f": u"\f",
295 "n": u"\n",
296 "r": u"\r",
297 "t": u"\t"}
26bb0f31 298
99155935
BP
299 def __lex_finish_string(self):
300 inp = self.buffer
301 out = u""
302 while len(inp):
303 backslash = inp.find('\\')
304 if backslash == -1:
305 out += inp
306 break
307 out += inp[:backslash]
308 inp = inp[backslash + 1:]
309 if inp == "":
310 self.__error("quoted string may not end with backslash")
311 return
312
313 replacement = Parser.__unescape.get(inp[0])
314 if replacement is not None:
315 out += replacement
316 inp = inp[1:]
317 continue
318 elif inp[0] != u'u':
319 self.__error("bad escape \\%s" % inp[0])
320 return
26bb0f31 321
99155935
BP
322 c0 = self.__lex_4hex(inp[1:5])
323 if c0 is None:
324 return
325 inp = inp[5:]
326
327 if Parser.__is_leading_surrogate(c0):
328 if inp[:2] != u'\\u':
329 self.__error("malformed escaped surrogate pair")
330 return
331 c1 = self.__lex_4hex(inp[2:6])
332 if c1 is None:
333 return
334 if not Parser.__is_trailing_surrogate(c1):
335 self.__error("second half of escaped surrogate pair is "
336 "not trailing surrogate")
337 return
338 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
339 inp = inp[6:]
340 else:
341 code_point = c0
eac25f50 342 out += six.unichr(code_point)
99155935
BP
343 self.__parser_input('string', out)
344
345 def __lex_string_escape(self, c):
346 self.buffer += c
347 self.lex_state = Parser.__lex_string
348 return True
26bb0f31 349
99155935
BP
350 def __lex_string(self, c):
351 if c == '\\':
352 self.buffer += c
353 self.lex_state = Parser.__lex_string_escape
354 elif c == '"':
355 self.__lex_finish_string()
356 elif ord(c) >= 0x20:
357 self.buffer += c
358 else:
359 self.__error("U+%04X must be escaped in quoted string" % ord(c))
360 return True
361
362 def __lex_input(self, c):
99155935
BP
363 eat = self.lex_state(self, c)
364 assert eat is True or eat is False
365 return eat
366
28c781df 367 def __parse_start(self, token, unused_string):
99155935
BP
368 if token == '{':
369 self.__push_object()
370 elif token == '[':
371 self.__push_array()
372 else:
373 self.__error("syntax error at beginning of input")
26bb0f31 374
28c781df 375 def __parse_end(self, unused_token, unused_string):
99155935 376 self.__error("trailing garbage at end of input")
26bb0f31 377
99155935
BP
378 def __parse_object_init(self, token, string):
379 if token == '}':
380 self.__parser_pop()
381 else:
382 self.__parse_object_name(token, string)
26bb0f31 383
99155935
BP
384 def __parse_object_name(self, token, string):
385 if token == 'string':
386 self.member_name = string
387 self.parse_state = Parser.__parse_object_colon
388 else:
389 self.__error("syntax error parsing object expecting string")
26bb0f31 390
28c781df 391 def __parse_object_colon(self, token, unused_string):
99155935
BP
392 if token == ":":
393 self.parse_state = Parser.__parse_object_value
394 else:
395 self.__error("syntax error parsing object expecting ':'")
26bb0f31 396
99155935
BP
397 def __parse_object_value(self, token, string):
398 self.__parse_value(token, string, Parser.__parse_object_next)
26bb0f31 399
28c781df 400 def __parse_object_next(self, token, unused_string):
99155935
BP
401 if token == ",":
402 self.parse_state = Parser.__parse_object_name
403 elif token == "}":
404 self.__parser_pop()
405 else:
406 self.__error("syntax error expecting '}' or ','")
26bb0f31 407
99155935
BP
408 def __parse_array_init(self, token, string):
409 if token == ']':
410 self.__parser_pop()
411 else:
412 self.__parse_array_value(token, string)
26bb0f31 413
99155935
BP
414 def __parse_array_value(self, token, string):
415 self.__parse_value(token, string, Parser.__parse_array_next)
26bb0f31 416
28c781df 417 def __parse_array_next(self, token, unused_string):
99155935
BP
418 if token == ",":
419 self.parse_state = Parser.__parse_array_value
420 elif token == "]":
421 self.__parser_pop()
422 else:
423 self.__error("syntax error expecting ']' or ','")
26bb0f31 424
99155935
BP
425 def __parser_input(self, token, string=None):
426 self.lex_state = Parser.__lex_start
427 self.buffer = ""
99155935 428 self.parse_state(self, token, string)
99155935
BP
429
430 def __put_value(self, value):
431 top = self.stack[-1]
da2d45c6 432 if isinstance(top, dict):
99155935
BP
433 top[self.member_name] = value
434 else:
435 top.append(value)
436
437 def __parser_push(self, new_json, next_state):
438 if len(self.stack) < Parser.MAX_HEIGHT:
439 if len(self.stack) > 0:
440 self.__put_value(new_json)
441 self.stack.append(new_json)
442 self.parse_state = next_state
443 else:
444 self.__error("input exceeds maximum nesting depth %d" %
445 Parser.MAX_HEIGHT)
26bb0f31 446
99155935
BP
447 def __push_object(self):
448 self.__parser_push({}, Parser.__parse_object_init)
26bb0f31 449
99155935
BP
450 def __push_array(self):
451 self.__parser_push([], Parser.__parse_array_init)
452
453 def __parser_pop(self):
454 if len(self.stack) == 1:
455 self.parse_state = Parser.__parse_end
456 if not self.check_trailer:
457 self.done = True
458 else:
459 self.stack.pop()
460 top = self.stack[-1]
da2d45c6 461 if isinstance(top, list):
99155935
BP
462 self.parse_state = Parser.__parse_array_next
463 else:
464 self.parse_state = Parser.__parse_object_next
465
466 def __parse_value(self, token, string, next_state):
8f808842
RB
467 number_types = list(six.integer_types)
468 number_types.extend([float])
469 number_types = tuple(number_types)
470 if token in [False, None, True] or isinstance(token, number_types):
99155935
BP
471 self.__put_value(token)
472 elif token == 'string':
473 self.__put_value(string)
474 else:
475 if token == '{':
476 self.__push_object()
477 elif token == '[':
478 self.__push_array()
479 else:
480 self.__error("syntax error expecting value")
481 return
482 self.parse_state = next_state
483
484 def __error(self, message):
485 if self.error is None:
486 self.error = ("line %d, column %d, byte %d: %s"
487 % (self.line_number, self.column_number,
488 self.byte_number, message))
489 self.done = True
490
491 def feed(self, s):
492 i = 0
493 while True:
494 if self.done or i >= len(s):
495 return i
c640c04f
BP
496
497 c = s[i]
498 if self.__lex_input(c):
499 self.byte_number += 1
500 if c == '\n':
501 self.column_number = 0
502 self.line_number += 1
503 else:
504 self.column_number += 1
505
99155935
BP
506 i += 1
507
508 def is_done(self):
509 return self.done
510
511 def finish(self):
512 if self.lex_state == Parser.__lex_start:
513 pass
514 elif self.lex_state in (Parser.__lex_string,
515 Parser.__lex_string_escape):
516 self.__error("unexpected end of input in quoted string")
517 else:
518 self.__lex_input(" ")
519
520 if self.parse_state == Parser.__parse_start:
521 self.__error("empty input stream")
522 elif self.parse_state != Parser.__parse_end:
523 self.__error("unexpected end of input")
524
3c057118 525 if self.error is None:
99155935
BP
526 assert len(self.stack) == 1
527 return self.stack.pop()
528 else:
529 return self.error