python/ovs/json.py

   1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at:
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 from __future__ import absolute_import
  16
  17 import functools
  18 import json
  19 import re
  20 import sys
  21
  22 PARSER_C = 'C'
  23 PARSER_PY = 'PYTHON'
  24 try:
  25     import ovs._json
  26     PARSER = PARSER_C
  27 except ImportError:
  28     PARSER = PARSER_PY
  29
  30 __pychecker__ = 'no-stringiter'
  31
  32 SPACES_PER_LEVEL = 2
  33 dumper = functools.partial(json.dumps, separators=(",", ":"))
  34
  35
  36 def to_stream(obj, stream, pretty=False, sort_keys=True):
  37     stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
  38                         sort_keys=sort_keys))
  39
  40
  41 def to_file(obj, name, pretty=False, sort_keys=True):
  42     with open(name, "w") as stream:
  43         to_stream(obj, stream, pretty, sort_keys)
  44
  45
  46 def to_string(obj, pretty=False, sort_keys=True):
  47     return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
  48                   sort_keys=sort_keys)
  49
  50
  51 def from_stream(stream):
  52     p = Parser(check_trailer=True)
  53     while True:
  54         buf = stream.read(4096)
  55         if buf == "" or p.feed(buf) != len(buf):
  56             break
  57     return p.finish()
  58
  59
  60 def from_file(name):
  61     stream = open(name, "r")
  62     try:
  63         return from_stream(stream)
  64     finally:
  65         stream.close()
  66
  67
  68 def from_string(s):
  69     if not isinstance(s, str):
  70         # We assume the input is a string.  We will only hit this case for a
  71         # str in Python 2 which is not unicode, so we need to go ahead and
  72         # decode it.
  73         try:
  74             s = str(s, 'utf-8')
  75         except UnicodeDecodeError as e:
  76             seq = ' '.join(["0x%2x" % ord(c)
  77                            for c in e.object[e.start:e.end] if ord(c) >= 0x80])
  78             return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
  79     p = Parser(check_trailer=True)
  80     p.feed(s)
  81     return p.finish()
  82
  83
  84 class Parser(object):
  85     # Maximum height of parsing stack. #
  86     MAX_HEIGHT = 1000
  87
  88     def __new__(cls, *args, **kwargs):
  89         if PARSER == PARSER_C:
  90             return ovs._json.Parser(*args, **kwargs)
  91         return super(Parser, cls).__new__(cls)
  92
  93     def __init__(self, check_trailer=False):
  94         self.check_trailer = check_trailer
  95
  96         # Lexical analysis.
  97         self.lex_state = Parser.__lex_start
  98         self.buffer = ""
  99         self.line_number = 0
 100         self.column_number = 0
 101         self.byte_number = 0
 102
 103         # Parsing.
 104         self.parse_state = Parser.__parse_start
 105         self.stack = []
 106         self.member_name = None
 107
 108         # Parse status.
 109         self.done = False
 110         self.error = None
 111
 112     def __lex_start_space(self, c):
 113         pass
 114
 115     def __lex_start_alpha(self, c):
 116         self.buffer = c
 117         self.lex_state = Parser.__lex_keyword
 118
 119     def __lex_start_token(self, c):
 120         self.__parser_input(c)
 121
 122     def __lex_start_number(self, c):
 123         self.buffer = c
 124         self.lex_state = Parser.__lex_number
 125
 126     def __lex_start_string(self, _):
 127         self.lex_state = Parser.__lex_string
 128
 129     def __lex_start_error(self, c):
 130         if ord(c) >= 32 and ord(c) < 128:
 131             self.__error("invalid character '%s'" % c)
 132         else:
 133             self.__error("invalid character U+%04x" % ord(c))
 134
 135     __lex_start_actions = {}
 136     for c in " \t\n\r":
 137         __lex_start_actions[c] = __lex_start_space
 138     for c in "abcdefghijklmnopqrstuvwxyz":
 139         __lex_start_actions[c] = __lex_start_alpha
 140     for c in "[{]}:,":
 141         __lex_start_actions[c] = __lex_start_token
 142     for c in "-0123456789":
 143         __lex_start_actions[c] = __lex_start_number
 144     __lex_start_actions['"'] = __lex_start_string
 145
 146     def __lex_start(self, c):
 147         Parser.__lex_start_actions.get(
 148             c, Parser.__lex_start_error)(self, c)
 149         return True
 150
 151     __lex_alpha = {}
 152     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
 153         __lex_alpha[c] = True
 154
 155     def __lex_finish_keyword(self):
 156         if self.buffer == "false":
 157             self.__parser_input(False)
 158         elif self.buffer == "true":
 159             self.__parser_input(True)
 160         elif self.buffer == "null":
 161             self.__parser_input(None)
 162         else:
 163             self.__error("invalid keyword '%s'" % self.buffer)
 164
 165     def __lex_keyword(self, c):
 166         if c in Parser.__lex_alpha:
 167             self.buffer += c
 168             return True
 169         else:
 170             self.__lex_finish_keyword()
 171             return False
 172
 173     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
 174             r"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
 175
 176     def __lex_finish_number(self):
 177         s = self.buffer
 178         m = Parser.__number_re.match(s)
 179         if m:
 180             sign, integer, fraction, exp = m.groups()
 181             if (exp is not None and
 182                 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
 183                 self.__error("exponent outside valid range")
 184                 return
 185
 186             if fraction is not None and len(fraction.lstrip('0')) == 0:
 187                 fraction = None
 188
 189             sig_string = integer
 190             if fraction is not None:
 191                 sig_string += fraction
 192             significand = int(sig_string)
 193
 194             pow10 = 0
 195             if fraction is not None:
 196                 pow10 -= len(fraction)
 197             if exp is not None:
 198                 pow10 += int(exp)
 199
 200             if significand == 0:
 201                 self.__parser_input(0)
 202                 return
 203             elif significand <= 2 ** 63:
 204                 while pow10 > 0 and significand <= 2 ** 63:
 205                     significand *= 10
 206                     pow10 -= 1
 207                 while pow10 < 0 and significand % 10 == 0:
 208                     significand //= 10
 209                     pow10 += 1
 210                 if (pow10 == 0 and
 211                     ((not sign and significand < 2 ** 63) or
 212                      (sign and significand <= 2 ** 63))):
 213                     if sign:
 214                         self.__parser_input(-significand)
 215                     else:
 216                         self.__parser_input(significand)
 217                     return
 218
 219             value = float(s)
 220             if value == float("inf") or value == float("-inf"):
 221                 self.__error("number outside valid range")
 222                 return
 223             if value == 0:
 224                 # Suppress negative zero.
 225                 value = 0
 226             self.__parser_input(value)
 227         elif re.match("-?0[0-9]", s):
 228             self.__error("leading zeros not allowed")
 229         elif re.match("-([^0-9]|$)", s):
 230             self.__error("'-' must be followed by digit")
 231         elif re.match(r"-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
 232             self.__error("decimal point must be followed by digit")
 233         elif re.search("e[-+]?([^0-9]|$)", s):
 234             self.__error("exponent must contain at least one digit")
 235         else:
 236             self.__error("syntax error in number")
 237
 238     def __lex_number(self, c):
 239         if c in ".0123456789eE-+":
 240             self.buffer += c
 241             return True
 242         else:
 243             self.__lex_finish_number()
 244             return False
 245
 246     __4hex_re = re.compile("[0-9a-fA-F]{4}")
 247
 248     def __lex_4hex(self, s):
 249         if len(s) < 4:
 250             self.__error("quoted string ends within \\u escape")
 251         elif not Parser.__4hex_re.match(s):
 252             self.__error("malformed \\u escape")
 253         elif s == "0000":
 254             self.__error("null bytes not supported in quoted strings")
 255         else:
 256             return int(s, 16)
 257
 258     @staticmethod
 259     def __is_leading_surrogate(c):
 260         """Returns true if 'c' is a Unicode code point for a leading
 261         surrogate."""
 262         return c >= 0xd800 and c <= 0xdbff
 263
 264     @staticmethod
 265     def __is_trailing_surrogate(c):
 266         """Returns true if 'c' is a Unicode code point for a trailing
 267         surrogate."""
 268         return c >= 0xdc00 and c <= 0xdfff
 269
 270     @staticmethod
 271     def __utf16_decode_surrogate_pair(leading, trailing):
 272         """Returns the unicode code point corresponding to leading surrogate
 273         'leading' and trailing surrogate 'trailing'.  The return value will not
 274         make any sense if 'leading' or 'trailing' are not in the correct ranges
 275         for leading or trailing surrogates."""
 276         #  Leading surrogate:         110110wwwwxxxxxx
 277         # Trailing surrogate:         110111xxxxxxxxxx
 278         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
 279         w = (leading >> 6) & 0xf
 280         u = w + 1
 281         x0 = leading & 0x3f
 282         x1 = trailing & 0x3ff
 283         return (u << 16) | (x0 << 10) | x1
 284     __unescape = {'"': u'"',
 285                   "\\": u"\\",
 286                   "/": u"/",
 287                   "b": u"\b",
 288                   "f": u"\f",
 289                   "n": u"\n",
 290                   "r": u"\r",
 291                   "t": u"\t"}
 292
 293     def __lex_finish_string(self):
 294         inp = self.buffer
 295         out = u""
 296         while len(inp):
 297             backslash = inp.find('\\')
 298             if backslash == -1:
 299                 out += inp
 300                 break
 301             out += inp[:backslash]
 302             inp = inp[backslash + 1:]
 303             if inp == "":
 304                 self.__error("quoted string may not end with backslash")
 305                 return
 306
 307             replacement = Parser.__unescape.get(inp[0])
 308             if replacement is not None:
 309                 out += replacement
 310                 inp = inp[1:]
 311                 continue
 312             elif inp[0] != u'u':
 313                 self.__error("bad escape \\%s" % inp[0])
 314                 return
 315
 316             c0 = self.__lex_4hex(inp[1:5])
 317             if c0 is None:
 318                 return
 319             inp = inp[5:]
 320
 321             if Parser.__is_leading_surrogate(c0):
 322                 if inp[:2] != u'\\u':
 323                     self.__error("malformed escaped surrogate pair")
 324                     return
 325                 c1 = self.__lex_4hex(inp[2:6])
 326                 if c1 is None:
 327                     return
 328                 if not Parser.__is_trailing_surrogate(c1):
 329                     self.__error("second half of escaped surrogate pair is "
 330                                  "not trailing surrogate")
 331                     return
 332                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
 333                 inp = inp[6:]
 334             else:
 335                 code_point = c0
 336             out += chr(code_point)
 337         self.__parser_input('string', out)
 338
 339     def __lex_string_escape(self, c):
 340         self.buffer += c
 341         self.lex_state = Parser.__lex_string
 342         return True
 343
 344     def __lex_string(self, c):
 345         if c == '\\':
 346             self.buffer += c
 347             self.lex_state = Parser.__lex_string_escape
 348         elif c == '"':
 349             self.__lex_finish_string()
 350         elif ord(c) >= 0x20:
 351             self.buffer += c
 352         else:
 353             self.__error("U+%04X must be escaped in quoted string" % ord(c))
 354         return True
 355
 356     def __lex_input(self, c):
 357         eat = self.lex_state(self, c)
 358         assert eat is True or eat is False
 359         return eat
 360
 361     def __parse_start(self, token, unused_string):
 362         if token == '{':
 363             self.__push_object()
 364         elif token == '[':
 365             self.__push_array()
 366         else:
 367             self.__error("syntax error at beginning of input")
 368
 369     def __parse_end(self, unused_token, unused_string):
 370         self.__error("trailing garbage at end of input")
 371
 372     def __parse_object_init(self, token, string):
 373         if token == '}':
 374             self.__parser_pop()
 375         else:
 376             self.__parse_object_name(token, string)
 377
 378     def __parse_object_name(self, token, string):
 379         if token == 'string':
 380             self.member_name = string
 381             self.parse_state = Parser.__parse_object_colon
 382         else:
 383             self.__error("syntax error parsing object expecting string")
 384
 385     def __parse_object_colon(self, token, unused_string):
 386         if token == ":":
 387             self.parse_state = Parser.__parse_object_value
 388         else:
 389             self.__error("syntax error parsing object expecting ':'")
 390
 391     def __parse_object_value(self, token, string):
 392         self.__parse_value(token, string, Parser.__parse_object_next)
 393
 394     def __parse_object_next(self, token, unused_string):
 395         if token == ",":
 396             self.parse_state = Parser.__parse_object_name
 397         elif token == "}":
 398             self.__parser_pop()
 399         else:
 400             self.__error("syntax error expecting '}' or ','")
 401
 402     def __parse_array_init(self, token, string):
 403         if token == ']':
 404             self.__parser_pop()
 405         else:
 406             self.__parse_array_value(token, string)
 407
 408     def __parse_array_value(self, token, string):
 409         self.__parse_value(token, string, Parser.__parse_array_next)
 410
 411     def __parse_array_next(self, token, unused_string):
 412         if token == ",":
 413             self.parse_state = Parser.__parse_array_value
 414         elif token == "]":
 415             self.__parser_pop()
 416         else:
 417             self.__error("syntax error expecting ']' or ','")
 418
 419     def __parser_input(self, token, string=None):
 420         self.lex_state = Parser.__lex_start
 421         self.buffer = ""
 422         self.parse_state(self, token, string)
 423
 424     def __put_value(self, value):
 425         top = self.stack[-1]
 426         if isinstance(top, dict):
 427             top[self.member_name] = value
 428         else:
 429             top.append(value)
 430
 431     def __parser_push(self, new_json, next_state):
 432         if len(self.stack) < Parser.MAX_HEIGHT:
 433             if len(self.stack) > 0:
 434                 self.__put_value(new_json)
 435             self.stack.append(new_json)
 436             self.parse_state = next_state
 437         else:
 438             self.__error("input exceeds maximum nesting depth %d" %
 439                          Parser.MAX_HEIGHT)
 440
 441     def __push_object(self):
 442         self.__parser_push({}, Parser.__parse_object_init)
 443
 444     def __push_array(self):
 445         self.__parser_push([], Parser.__parse_array_init)
 446
 447     def __parser_pop(self):
 448         if len(self.stack) == 1:
 449             self.parse_state = Parser.__parse_end
 450             if not self.check_trailer:
 451                 self.done = True
 452         else:
 453             self.stack.pop()
 454             top = self.stack[-1]
 455             if isinstance(top, list):
 456                 self.parse_state = Parser.__parse_array_next
 457             else:
 458                 self.parse_state = Parser.__parse_object_next
 459
 460     def __parse_value(self, token, string, next_state):
 461         number_types = [int]
 462         number_types.extend([float])
 463         number_types = tuple(number_types)
 464         if token in [False, None, True] or isinstance(token, number_types):
 465             self.__put_value(token)
 466         elif token == 'string':
 467             self.__put_value(string)
 468         else:
 469             if token == '{':
 470                 self.__push_object()
 471             elif token == '[':
 472                 self.__push_array()
 473             else:
 474                 self.__error("syntax error expecting value")
 475             return
 476         self.parse_state = next_state
 477
 478     def __error(self, message):
 479         if self.error is None:
 480             self.error = ("line %d, column %d, byte %d: %s"
 481                           % (self.line_number, self.column_number,
 482                              self.byte_number, message))
 483             self.done = True
 484
 485     def feed(self, s):
 486         i = 0
 487         while True:
 488             if self.done or i >= len(s):
 489                 return i
 490
 491             c = s[i]
 492             if self.__lex_input(c):
 493                 self.byte_number += 1
 494                 if c == '\n':
 495                     self.column_number = 0
 496                     self.line_number += 1
 497                 else:
 498                     self.column_number += 1
 499
 500                 i += 1
 501
 502     def is_done(self):
 503         return self.done
 504
 505     def finish(self):
 506         if self.lex_state == Parser.__lex_start:
 507             pass
 508         elif self.lex_state in (Parser.__lex_string,
 509                                 Parser.__lex_string_escape):
 510             self.__error("unexpected end of input in quoted string")
 511         else:
 512             self.__lex_input(" ")
 513
 514         if self.parse_state == Parser.__parse_start:
 515             self.__error("empty input stream")
 516         elif self.parse_state != Parser.__parse_end:
 517             self.__error("unexpected end of input")
 518
 519         if self.error is None:
 520             assert len(self.stack) == 1
 521             return self.stack.pop()
 522         else:
 523             return self.error