python/ovs/json.py

   1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at:
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 from __future__ import absolute_import
  16
  17 import functools
  18 import json
  19 import re
  20 import sys
  21
  22 import six
  23
  24 PARSER_C = 'C'
  25 PARSER_PY = 'PYTHON'
  26 try:
  27     import ovs._json
  28     PARSER = PARSER_C
  29 except ImportError:
  30     PARSER = PARSER_PY
  31
  32 __pychecker__ = 'no-stringiter'
  33
  34 SPACES_PER_LEVEL = 2
  35 _dumper = functools.partial(json.dumps, separators=(",", ":"))
  36
  37 if six.PY2:
  38     def dumper(*args, **kwargs):
  39         return _dumper(*args, **kwargs).decode('raw-unicode-escape')
  40 else:
  41     dumper = _dumper
  42
  43
  44 def to_stream(obj, stream, pretty=False, sort_keys=True):
  45     stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
  46                         sort_keys=sort_keys))
  47
  48
  49 def to_file(obj, name, pretty=False, sort_keys=True):
  50     with open(name, "w") as stream:
  51         to_stream(obj, stream, pretty, sort_keys)
  52
  53
  54 def to_string(obj, pretty=False, sort_keys=True):
  55     return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
  56                   sort_keys=sort_keys)
  57
  58
  59 def from_stream(stream):
  60     p = Parser(check_trailer=True)
  61     while True:
  62         buf = stream.read(4096)
  63         if buf == "" or p.feed(buf) != len(buf):
  64             break
  65     return p.finish()
  66
  67
  68 def from_file(name):
  69     stream = open(name, "r")
  70     try:
  71         return from_stream(stream)
  72     finally:
  73         stream.close()
  74
  75
  76 def from_string(s):
  77     if not isinstance(s, six.text_type):
  78         # We assume the input is a string.  We will only hit this case for a
  79         # str in Python 2 which is not unicode, so we need to go ahead and
  80         # decode it.
  81         try:
  82             s = six.text_type(s, 'utf-8')
  83         except UnicodeDecodeError as e:
  84             seq = ' '.join(["0x%2x" % ord(c)
  85                            for c in e.object[e.start:e.end] if ord(c) >= 0x80])
  86             return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
  87     p = Parser(check_trailer=True)
  88     p.feed(s)
  89     return p.finish()
  90
  91
  92 class Parser(object):
  93     # Maximum height of parsing stack. #
  94     MAX_HEIGHT = 1000
  95
  96     def __new__(cls, *args, **kwargs):
  97         if PARSER == PARSER_C:
  98             return ovs._json.Parser(*args, **kwargs)
  99         return super(Parser, cls).__new__(cls)
 100
 101     def __init__(self, check_trailer=False):
 102         self.check_trailer = check_trailer
 103
 104         # Lexical analysis.
 105         self.lex_state = Parser.__lex_start
 106         self.buffer = ""
 107         self.line_number = 0
 108         self.column_number = 0
 109         self.byte_number = 0
 110
 111         # Parsing.
 112         self.parse_state = Parser.__parse_start
 113         self.stack = []
 114         self.member_name = None
 115
 116         # Parse status.
 117         self.done = False
 118         self.error = None
 119
 120     def __lex_start_space(self, c):
 121         pass
 122
 123     def __lex_start_alpha(self, c):
 124         self.buffer = c
 125         self.lex_state = Parser.__lex_keyword
 126
 127     def __lex_start_token(self, c):
 128         self.__parser_input(c)
 129
 130     def __lex_start_number(self, c):
 131         self.buffer = c
 132         self.lex_state = Parser.__lex_number
 133
 134     def __lex_start_string(self, _):
 135         self.lex_state = Parser.__lex_string
 136
 137     def __lex_start_error(self, c):
 138         if ord(c) >= 32 and ord(c) < 128:
 139             self.__error("invalid character '%s'" % c)
 140         else:
 141             self.__error("invalid character U+%04x" % ord(c))
 142
 143     __lex_start_actions = {}
 144     for c in " \t\n\r":
 145         __lex_start_actions[c] = __lex_start_space
 146     for c in "abcdefghijklmnopqrstuvwxyz":
 147         __lex_start_actions[c] = __lex_start_alpha
 148     for c in "[{]}:,":
 149         __lex_start_actions[c] = __lex_start_token
 150     for c in "-0123456789":
 151         __lex_start_actions[c] = __lex_start_number
 152     __lex_start_actions['"'] = __lex_start_string
 153
 154     def __lex_start(self, c):
 155         Parser.__lex_start_actions.get(
 156             c, Parser.__lex_start_error)(self, c)
 157         return True
 158
 159     __lex_alpha = {}
 160     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
 161         __lex_alpha[c] = True
 162
 163     def __lex_finish_keyword(self):
 164         if self.buffer == "false":
 165             self.__parser_input(False)
 166         elif self.buffer == "true":
 167             self.__parser_input(True)
 168         elif self.buffer == "null":
 169             self.__parser_input(None)
 170         else:
 171             self.__error("invalid keyword '%s'" % self.buffer)
 172
 173     def __lex_keyword(self, c):
 174         if c in Parser.__lex_alpha:
 175             self.buffer += c
 176             return True
 177         else:
 178             self.__lex_finish_keyword()
 179             return False
 180
 181     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
 182             "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
 183
 184     def __lex_finish_number(self):
 185         s = self.buffer
 186         m = Parser.__number_re.match(s)
 187         if m:
 188             sign, integer, fraction, exp = m.groups()
 189             if (exp is not None and
 190                 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
 191                 self.__error("exponent outside valid range")
 192                 return
 193
 194             if fraction is not None and len(fraction.lstrip('0')) == 0:
 195                 fraction = None
 196
 197             sig_string = integer
 198             if fraction is not None:
 199                 sig_string += fraction
 200             significand = int(sig_string)
 201
 202             pow10 = 0
 203             if fraction is not None:
 204                 pow10 -= len(fraction)
 205             if exp is not None:
 206                 pow10 += int(exp)
 207
 208             if significand == 0:
 209                 self.__parser_input(0)
 210                 return
 211             elif significand <= 2 ** 63:
 212                 while pow10 > 0 and significand <= 2 ** 63:
 213                     significand *= 10
 214                     pow10 -= 1
 215                 while pow10 < 0 and significand % 10 == 0:
 216                     significand //= 10
 217                     pow10 += 1
 218                 if (pow10 == 0 and
 219                     ((not sign and significand < 2 ** 63) or
 220                      (sign and significand <= 2 ** 63))):
 221                     if sign:
 222                         self.__parser_input(-significand)
 223                     else:
 224                         self.__parser_input(significand)
 225                     return
 226
 227             value = float(s)
 228             if value == float("inf") or value == float("-inf"):
 229                 self.__error("number outside valid range")
 230                 return
 231             if value == 0:
 232                 # Suppress negative zero.
 233                 value = 0
 234             self.__parser_input(value)
 235         elif re.match("-?0[0-9]", s):
 236             self.__error("leading zeros not allowed")
 237         elif re.match("-([^0-9]|$)", s):
 238             self.__error("'-' must be followed by digit")
 239         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
 240             self.__error("decimal point must be followed by digit")
 241         elif re.search("e[-+]?([^0-9]|$)", s):
 242             self.__error("exponent must contain at least one digit")
 243         else:
 244             self.__error("syntax error in number")
 245
 246     def __lex_number(self, c):
 247         if c in ".0123456789eE-+":
 248             self.buffer += c
 249             return True
 250         else:
 251             self.__lex_finish_number()
 252             return False
 253
 254     __4hex_re = re.compile("[0-9a-fA-F]{4}")
 255
 256     def __lex_4hex(self, s):
 257         if len(s) < 4:
 258             self.__error("quoted string ends within \\u escape")
 259         elif not Parser.__4hex_re.match(s):
 260             self.__error("malformed \\u escape")
 261         elif s == "0000":
 262             self.__error("null bytes not supported in quoted strings")
 263         else:
 264             return int(s, 16)
 265
 266     @staticmethod
 267     def __is_leading_surrogate(c):
 268         """Returns true if 'c' is a Unicode code point for a leading
 269         surrogate."""
 270         return c >= 0xd800 and c <= 0xdbff
 271
 272     @staticmethod
 273     def __is_trailing_surrogate(c):
 274         """Returns true if 'c' is a Unicode code point for a trailing
 275         surrogate."""
 276         return c >= 0xdc00 and c <= 0xdfff
 277
 278     @staticmethod
 279     def __utf16_decode_surrogate_pair(leading, trailing):
 280         """Returns the unicode code point corresponding to leading surrogate
 281         'leading' and trailing surrogate 'trailing'.  The return value will not
 282         make any sense if 'leading' or 'trailing' are not in the correct ranges
 283         for leading or trailing surrogates."""
 284         #  Leading surrogate:         110110wwwwxxxxxx
 285         # Trailing surrogate:         110111xxxxxxxxxx
 286         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
 287         w = (leading >> 6) & 0xf
 288         u = w + 1
 289         x0 = leading & 0x3f
 290         x1 = trailing & 0x3ff
 291         return (u << 16) | (x0 << 10) | x1
 292     __unescape = {'"': u'"',
 293                   "\\": u"\\",
 294                   "/": u"/",
 295                   "b": u"\b",
 296                   "f": u"\f",
 297                   "n": u"\n",
 298                   "r": u"\r",
 299                   "t": u"\t"}
 300
 301     def __lex_finish_string(self):
 302         inp = self.buffer
 303         out = u""
 304         while len(inp):
 305             backslash = inp.find('\\')
 306             if backslash == -1:
 307                 out += inp
 308                 break
 309             out += inp[:backslash]
 310             inp = inp[backslash + 1:]
 311             if inp == "":
 312                 self.__error("quoted string may not end with backslash")
 313                 return
 314
 315             replacement = Parser.__unescape.get(inp[0])
 316             if replacement is not None:
 317                 out += replacement
 318                 inp = inp[1:]
 319                 continue
 320             elif inp[0] != u'u':
 321                 self.__error("bad escape \\%s" % inp[0])
 322                 return
 323
 324             c0 = self.__lex_4hex(inp[1:5])
 325             if c0 is None:
 326                 return
 327             inp = inp[5:]
 328
 329             if Parser.__is_leading_surrogate(c0):
 330                 if inp[:2] != u'\\u':
 331                     self.__error("malformed escaped surrogate pair")
 332                     return
 333                 c1 = self.__lex_4hex(inp[2:6])
 334                 if c1 is None:
 335                     return
 336                 if not Parser.__is_trailing_surrogate(c1):
 337                     self.__error("second half of escaped surrogate pair is "
 338                                  "not trailing surrogate")
 339                     return
 340                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
 341                 inp = inp[6:]
 342             else:
 343                 code_point = c0
 344             out += six.unichr(code_point)
 345         self.__parser_input('string', out)
 346
 347     def __lex_string_escape(self, c):
 348         self.buffer += c
 349         self.lex_state = Parser.__lex_string
 350         return True
 351
 352     def __lex_string(self, c):
 353         if c == '\\':
 354             self.buffer += c
 355             self.lex_state = Parser.__lex_string_escape
 356         elif c == '"':
 357             self.__lex_finish_string()
 358         elif ord(c) >= 0x20:
 359             self.buffer += c
 360         else:
 361             self.__error("U+%04X must be escaped in quoted string" % ord(c))
 362         return True
 363
 364     def __lex_input(self, c):
 365         eat = self.lex_state(self, c)
 366         assert eat is True or eat is False
 367         return eat
 368
 369     def __parse_start(self, token, unused_string):
 370         if token == '{':
 371             self.__push_object()
 372         elif token == '[':
 373             self.__push_array()
 374         else:
 375             self.__error("syntax error at beginning of input")
 376
 377     def __parse_end(self, unused_token, unused_string):
 378         self.__error("trailing garbage at end of input")
 379
 380     def __parse_object_init(self, token, string):
 381         if token == '}':
 382             self.__parser_pop()
 383         else:
 384             self.__parse_object_name(token, string)
 385
 386     def __parse_object_name(self, token, string):
 387         if token == 'string':
 388             self.member_name = string
 389             self.parse_state = Parser.__parse_object_colon
 390         else:
 391             self.__error("syntax error parsing object expecting string")
 392
 393     def __parse_object_colon(self, token, unused_string):
 394         if token == ":":
 395             self.parse_state = Parser.__parse_object_value
 396         else:
 397             self.__error("syntax error parsing object expecting ':'")
 398
 399     def __parse_object_value(self, token, string):
 400         self.__parse_value(token, string, Parser.__parse_object_next)
 401
 402     def __parse_object_next(self, token, unused_string):
 403         if token == ",":
 404             self.parse_state = Parser.__parse_object_name
 405         elif token == "}":
 406             self.__parser_pop()
 407         else:
 408             self.__error("syntax error expecting '}' or ','")
 409
 410     def __parse_array_init(self, token, string):
 411         if token == ']':
 412             self.__parser_pop()
 413         else:
 414             self.__parse_array_value(token, string)
 415
 416     def __parse_array_value(self, token, string):
 417         self.__parse_value(token, string, Parser.__parse_array_next)
 418
 419     def __parse_array_next(self, token, unused_string):
 420         if token == ",":
 421             self.parse_state = Parser.__parse_array_value
 422         elif token == "]":
 423             self.__parser_pop()
 424         else:
 425             self.__error("syntax error expecting ']' or ','")
 426
 427     def __parser_input(self, token, string=None):
 428         self.lex_state = Parser.__lex_start
 429         self.buffer = ""
 430         self.parse_state(self, token, string)
 431
 432     def __put_value(self, value):
 433         top = self.stack[-1]
 434         if isinstance(top, dict):
 435             top[self.member_name] = value
 436         else:
 437             top.append(value)
 438
 439     def __parser_push(self, new_json, next_state):
 440         if len(self.stack) < Parser.MAX_HEIGHT:
 441             if len(self.stack) > 0:
 442                 self.__put_value(new_json)
 443             self.stack.append(new_json)
 444             self.parse_state = next_state
 445         else:
 446             self.__error("input exceeds maximum nesting depth %d" %
 447                          Parser.MAX_HEIGHT)
 448
 449     def __push_object(self):
 450         self.__parser_push({}, Parser.__parse_object_init)
 451
 452     def __push_array(self):
 453         self.__parser_push([], Parser.__parse_array_init)
 454
 455     def __parser_pop(self):
 456         if len(self.stack) == 1:
 457             self.parse_state = Parser.__parse_end
 458             if not self.check_trailer:
 459                 self.done = True
 460         else:
 461             self.stack.pop()
 462             top = self.stack[-1]
 463             if isinstance(top, list):
 464                 self.parse_state = Parser.__parse_array_next
 465             else:
 466                 self.parse_state = Parser.__parse_object_next
 467
 468     def __parse_value(self, token, string, next_state):
 469         number_types = list(six.integer_types)
 470         number_types.extend([float])
 471         number_types = tuple(number_types)
 472         if token in [False, None, True] or isinstance(token, number_types):
 473             self.__put_value(token)
 474         elif token == 'string':
 475             self.__put_value(string)
 476         else:
 477             if token == '{':
 478                 self.__push_object()
 479             elif token == '[':
 480                 self.__push_array()
 481             else:
 482                 self.__error("syntax error expecting value")
 483             return
 484         self.parse_state = next_state
 485
 486     def __error(self, message):
 487         if self.error is None:
 488             self.error = ("line %d, column %d, byte %d: %s"
 489                           % (self.line_number, self.column_number,
 490                              self.byte_number, message))
 491             self.done = True
 492
 493     def feed(self, s):
 494         i = 0
 495         while True:
 496             if self.done or i >= len(s):
 497                 return i
 498
 499             c = s[i]
 500             if self.__lex_input(c):
 501                 self.byte_number += 1
 502                 if c == '\n':
 503                     self.column_number = 0
 504                     self.line_number += 1
 505                 else:
 506                     self.column_number += 1
 507
 508                 i += 1
 509
 510     def is_done(self):
 511         return self.done
 512
 513     def finish(self):
 514         if self.lex_state == Parser.__lex_start:
 515             pass
 516         elif self.lex_state in (Parser.__lex_string,
 517                                 Parser.__lex_string_escape):
 518             self.__error("unexpected end of input in quoted string")
 519         else:
 520             self.__lex_input(" ")
 521
 522         if self.parse_state == Parser.__parse_start:
 523             self.__error("empty input stream")
 524         elif self.parse_state != Parser.__parse_end:
 525             self.__error("unexpected end of input")
 526
 527         if self.error is None:
 528             assert len(self.stack) == 1
 529             return self.stack.pop()
 530         else:
 531             return self.error