[mirror_ovs.git] / python / ovs / json.py

# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import sys

import six
from six.moves import range

__pychecker__ = 'no-stringiter'

escapes = {ord('"'): u"\\\"",
           ord("\\"): u"\\\\",
           ord("\b"): u"\\b",
           ord("\f"): u"\\f",
           ord("\n"): u"\\n",
           ord("\r"): u"\\r",
           ord("\t"): u"\\t"}
for esc in range(32):
    if esc not in escapes:
        escapes[esc] = u"\\u%04x" % esc

SPACES_PER_LEVEL = 2


class _Serializer(object):
    def __init__(self, stream, pretty, sort_keys):
        self.stream = stream
        self.pretty = pretty
        self.sort_keys = sort_keys
        self.depth = 0

    def __serialize_string(self, s):
        self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))

    def __indent_line(self):
        if self.pretty:
            self.stream.write('\n')
            self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))

    def serialize(self, obj):
        if obj is None:
            self.stream.write(u"null")
        elif obj is False:
            self.stream.write(u"false")
        elif obj is True:
            self.stream.write(u"true")
        elif isinstance(obj, six.integer_types):
            self.stream.write(u"%d" % obj)
        elif isinstance(obj, float):
            self.stream.write("%.15g" % obj)
        elif isinstance(obj, six.text_type):
            # unicode() on Python 2, or str() in Python 3 (always unicode)
            self.__serialize_string(obj)
        elif isinstance(obj, str):
            # This is for Python 2, where this comes out to unicode(str()).
            # For Python 3, it's str(str()), but it's harmless.
            self.__serialize_string(six.text_type(obj))
        elif isinstance(obj, dict):
            self.stream.write(u"{")

            self.depth += 1
            self.__indent_line()

            if self.sort_keys:
                items = sorted(obj.items())
            else:
                items = six.iteritems(obj)
            for i, (key, value) in enumerate(items):
                if i > 0:
                    self.stream.write(u",")
                    self.__indent_line()
                self.__serialize_string(six.text_type(key))
                self.stream.write(u":")
                if self.pretty:
                    self.stream.write(u' ')
                self.serialize(value)

            self.stream.write(u"}")
            self.depth -= 1
        elif isinstance(obj, (list, tuple)):
            self.stream.write(u"[")
            self.depth += 1

            if obj:
                self.__indent_line()

                for i, value in enumerate(obj):
                    if i > 0:
                        self.stream.write(u",")
                        self.__indent_line()
                    self.serialize(value)

            self.depth -= 1
            self.stream.write(u"]")
        else:
            raise Exception("can't serialize %s as JSON" % obj)


def to_stream(obj, stream, pretty=False, sort_keys=True):
    _Serializer(stream, pretty, sort_keys).serialize(obj)


def to_file(obj, name, pretty=False, sort_keys=True):
    stream = open(name, "w")
    try:
        to_stream(obj, stream, pretty, sort_keys)
    finally:
        stream.close()


def to_string(obj, pretty=False, sort_keys=True):
    output = six.StringIO()
    to_stream(obj, output, pretty, sort_keys)
    s = output.getvalue()
    output.close()
    return s


def from_stream(stream):
    p = Parser(check_trailer=True)
    while True:
        buf = stream.read(4096)
        if buf == "" or p.feed(buf) != len(buf):
            break
    return p.finish()


def from_file(name):
    stream = open(name, "r")
    try:
        return from_stream(stream)
    finally:
        stream.close()


def from_string(s):
    if not isinstance(s, six.text_type):
        # We assume the input is a string.  We will only hit this case for a
        # str in Python 2 which is not unicode, so we need to go ahead and
        # decode it.
        try:
            s = six.text_type(s, 'utf-8')
        except UnicodeDecodeError as e:
            seq = ' '.join(["0x%2x" % ord(c)
                           for c in e.object[e.start:e.end] if ord(c) >= 0x80])
            return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
    p = Parser(check_trailer=True)
    p.feed(s)
    return p.finish()


class Parser(object):
    # Maximum height of parsing stack. #
    MAX_HEIGHT = 1000

    def __init__(self, check_trailer=False):
        self.check_trailer = check_trailer

        # Lexical analysis.
        self.lex_state = Parser.__lex_start
        self.buffer = ""
        self.line_number = 0
        self.column_number = 0
        self.byte_number = 0

        # Parsing.
        self.parse_state = Parser.__parse_start
        self.stack = []
        self.member_name = None

        # Parse status.
        self.done = False
        self.error = None

    def __lex_start_space(self, c):
        pass

    def __lex_start_alpha(self, c):
        self.buffer = c
        self.lex_state = Parser.__lex_keyword

    def __lex_start_token(self, c):
        self.__parser_input(c)

    def __lex_start_number(self, c):
        self.buffer = c
        self.lex_state = Parser.__lex_number

    def __lex_start_string(self, _):
        self.lex_state = Parser.__lex_string

    def __lex_start_error(self, c):
        if ord(c) >= 32 and ord(c) < 128:
            self.__error("invalid character '%s'" % c)
        else:
            self.__error("invalid character U+%04x" % ord(c))

    __lex_start_actions = {}
    for c in " \t\n\r":
        __lex_start_actions[c] = __lex_start_space
    for c in "abcdefghijklmnopqrstuvwxyz":
        __lex_start_actions[c] = __lex_start_alpha
    for c in "[{]}:,":
        __lex_start_actions[c] = __lex_start_token
    for c in "-0123456789":
        __lex_start_actions[c] = __lex_start_number
    __lex_start_actions['"'] = __lex_start_string

    def __lex_start(self, c):
        Parser.__lex_start_actions.get(
            c, Parser.__lex_start_error)(self, c)
        return True

    __lex_alpha = {}
    for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
        __lex_alpha[c] = True

    def __lex_finish_keyword(self):
        if self.buffer == "false":
            self.__parser_input(False)
        elif self.buffer == "true":
            self.__parser_input(True)
        elif self.buffer == "null":
            self.__parser_input(None)
        else:
            self.__error("invalid keyword '%s'" % self.buffer)

    def __lex_keyword(self, c):
        if c in Parser.__lex_alpha:
            self.buffer += c
            return True
        else:
            self.__lex_finish_keyword()
            return False

    __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
            "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")

    def __lex_finish_number(self):
        s = self.buffer
        m = Parser.__number_re.match(s)
        if m:
            sign, integer, fraction, exp = m.groups()
            if (exp is not None and
                (int(exp) > sys.maxint or int(exp) < -sys.maxint - 1)):
                self.__error("exponent outside valid range")
                return

            if fraction is not None and len(fraction.lstrip('0')) == 0:
                fraction = None

            sig_string = integer
            if fraction is not None:
                sig_string += fraction
            significand = int(sig_string)

            pow10 = 0
            if fraction is not None:
                pow10 -= len(fraction)
            if exp is not None:
                pow10 += int(exp)

            if significand == 0:
                self.__parser_input(0)
                return
            elif significand <= 2 ** 63:
                while pow10 > 0 and significand <= 2 ** 63:
                    significand *= 10
                    pow10 -= 1
                while pow10 < 0 and significand % 10 == 0:
                    significand /= 10
                    pow10 += 1
                if (pow10 == 0 and
                    ((not sign and significand < 2 ** 63) or
                     (sign and significand <= 2 ** 63))):
                    if sign:
                        self.__parser_input(-significand)
                    else:
                        self.__parser_input(significand)
                    return

            value = float(s)
            if value == float("inf") or value == float("-inf"):
                self.__error("number outside valid range")
                return
            if value == 0:
                # Suppress negative zero.
                value = 0
            self.__parser_input(value)
        elif re.match("-?0[0-9]", s):
            self.__error("leading zeros not allowed")
        elif re.match("-([^0-9]|$)", s):
            self.__error("'-' must be followed by digit")
        elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
            self.__error("decimal point must be followed by digit")
        elif re.search("e[-+]?([^0-9]|$)", s):
            self.__error("exponent must contain at least one digit")
        else:
            self.__error("syntax error in number")

    def __lex_number(self, c):
        if c in ".0123456789eE-+":
            self.buffer += c
            return True
        else:
            self.__lex_finish_number()
            return False

    __4hex_re = re.compile("[0-9a-fA-F]{4}")

    def __lex_4hex(self, s):
        if len(s) < 4:
            self.__error("quoted string ends within \\u escape")
        elif not Parser.__4hex_re.match(s):
            self.__error("malformed \\u escape")
        elif s == "0000":
            self.__error("null bytes not supported in quoted strings")
        else:
            return int(s, 16)

    @staticmethod
    def __is_leading_surrogate(c):
        """Returns true if 'c' is a Unicode code point for a leading
        surrogate."""
        return c >= 0xd800 and c <= 0xdbff

    @staticmethod
    def __is_trailing_surrogate(c):
        """Returns true if 'c' is a Unicode code point for a trailing
        surrogate."""
        return c >= 0xdc00 and c <= 0xdfff

    @staticmethod
    def __utf16_decode_surrogate_pair(leading, trailing):
        """Returns the unicode code point corresponding to leading surrogate
        'leading' and trailing surrogate 'trailing'.  The return value will not
        make any sense if 'leading' or 'trailing' are not in the correct ranges
        for leading or trailing surrogates."""
        #  Leading surrogate:         110110wwwwxxxxxx
        # Trailing surrogate:         110111xxxxxxxxxx
        #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
        w = (leading >> 6) & 0xf
        u = w + 1
        x0 = leading & 0x3f
        x1 = trailing & 0x3ff
        return (u << 16) | (x0 << 10) | x1
    __unescape = {'"': u'"',
                  "\\": u"\\",
                  "/": u"/",
                  "b": u"\b",
                  "f": u"\f",
                  "n": u"\n",
                  "r": u"\r",
                  "t": u"\t"}

    def __lex_finish_string(self):
        inp = self.buffer
        out = u""
        while len(inp):
            backslash = inp.find('\\')
            if backslash == -1:
                out += inp
                break
            out += inp[:backslash]
            inp = inp[backslash + 1:]
            if inp == "":
                self.__error("quoted string may not end with backslash")
                return

            replacement = Parser.__unescape.get(inp[0])
            if replacement is not None:
                out += replacement
                inp = inp[1:]
                continue
            elif inp[0] != u'u':
                self.__error("bad escape \\%s" % inp[0])
                return

            c0 = self.__lex_4hex(inp[1:5])
            if c0 is None:
                return
            inp = inp[5:]

            if Parser.__is_leading_surrogate(c0):
                if inp[:2] != u'\\u':
                    self.__error("malformed escaped surrogate pair")
                    return
                c1 = self.__lex_4hex(inp[2:6])
                if c1 is None:
                    return
                if not Parser.__is_trailing_surrogate(c1):
                    self.__error("second half of escaped surrogate pair is "
                                 "not trailing surrogate")
                    return
                code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
                inp = inp[6:]
            else:
                code_point = c0
            out += unichr(code_point)
        self.__parser_input('string', out)

    def __lex_string_escape(self, c):
        self.buffer += c
        self.lex_state = Parser.__lex_string
        return True

    def __lex_string(self, c):
        if c == '\\':
            self.buffer += c
            self.lex_state = Parser.__lex_string_escape
        elif c == '"':
            self.__lex_finish_string()
        elif ord(c) >= 0x20:
            self.buffer += c
        else:
            self.__error("U+%04X must be escaped in quoted string" % ord(c))
        return True

    def __lex_input(self, c):
        eat = self.lex_state(self, c)
        assert eat is True or eat is False
        return eat

    def __parse_start(self, token, unused_string):
        if token == '{':
            self.__push_object()
        elif token == '[':
            self.__push_array()
        else:
            self.__error("syntax error at beginning of input")

    def __parse_end(self, unused_token, unused_string):
        self.__error("trailing garbage at end of input")

    def __parse_object_init(self, token, string):
        if token == '}':
            self.__parser_pop()
        else:
            self.__parse_object_name(token, string)

    def __parse_object_name(self, token, string):
        if token == 'string':
            self.member_name = string
            self.parse_state = Parser.__parse_object_colon
        else:
            self.__error("syntax error parsing object expecting string")

    def __parse_object_colon(self, token, unused_string):
        if token == ":":
            self.parse_state = Parser.__parse_object_value
        else:
            self.__error("syntax error parsing object expecting ':'")

    def __parse_object_value(self, token, string):
        self.__parse_value(token, string, Parser.__parse_object_next)

    def __parse_object_next(self, token, unused_string):
        if token == ",":
            self.parse_state = Parser.__parse_object_name
        elif token == "}":
            self.__parser_pop()
        else:
            self.__error("syntax error expecting '}' or ','")

    def __parse_array_init(self, token, string):
        if token == ']':
            self.__parser_pop()
        else:
            self.__parse_array_value(token, string)

    def __parse_array_value(self, token, string):
        self.__parse_value(token, string, Parser.__parse_array_next)

    def __parse_array_next(self, token, unused_string):
        if token == ",":
            self.parse_state = Parser.__parse_array_value
        elif token == "]":
            self.__parser_pop()
        else:
            self.__error("syntax error expecting ']' or ','")

    def __parser_input(self, token, string=None):
        self.lex_state = Parser.__lex_start
        self.buffer = ""
        self.parse_state(self, token, string)

    def __put_value(self, value):
        top = self.stack[-1]
        if type(top) == dict:
            top[self.member_name] = value
        else:
            top.append(value)

    def __parser_push(self, new_json, next_state):
        if len(self.stack) < Parser.MAX_HEIGHT:
            if len(self.stack) > 0:
                self.__put_value(new_json)
            self.stack.append(new_json)
            self.parse_state = next_state
        else:
            self.__error("input exceeds maximum nesting depth %d" %
                         Parser.MAX_HEIGHT)

    def __push_object(self):
        self.__parser_push({}, Parser.__parse_object_init)

    def __push_array(self):
        self.__parser_push([], Parser.__parse_array_init)

    def __parser_pop(self):
        if len(self.stack) == 1:
            self.parse_state = Parser.__parse_end
            if not self.check_trailer:
                self.done = True
        else:
            self.stack.pop()
            top = self.stack[-1]
            if type(top) == list:
                self.parse_state = Parser.__parse_array_next
            else:
                self.parse_state = Parser.__parse_object_next

    def __parse_value(self, token, string, next_state):
        number_types = list(six.integer_types)
        number_types.extend([float])
        number_types = tuple(number_types)
        if token in [False, None, True] or isinstance(token, number_types):
            self.__put_value(token)
        elif token == 'string':
            self.__put_value(string)
        else:
            if token == '{':
                self.__push_object()
            elif token == '[':
                self.__push_array()
            else:
                self.__error("syntax error expecting value")
            return
        self.parse_state = next_state

    def __error(self, message):
        if self.error is None:
            self.error = ("line %d, column %d, byte %d: %s"
                          % (self.line_number, self.column_number,
                             self.byte_number, message))
            self.done = True

    def feed(self, s):
        i = 0
        while True:
            if self.done or i >= len(s):
                return i

            c = s[i]
            if self.__lex_input(c):
                self.byte_number += 1
                if c == '\n':
                    self.column_number = 0
                    self.line_number += 1
                else:
                    self.column_number += 1

                i += 1

    def is_done(self):
        return self.done

    def finish(self):
        if self.lex_state == Parser.__lex_start:
            pass
        elif self.lex_state in (Parser.__lex_string,
                                Parser.__lex_string_escape):
            self.__error("unexpected end of input in quoted string")
        else:
            self.__lex_input(" ")

        if self.parse_state == Parser.__parse_start:
            self.__error("empty input stream")
        elif self.parse_state != Parser.__parse_end:
            self.__error("unexpected end of input")

        if self.error is None:
            assert len(self.stack) == 1
            return self.stack.pop()
        else:
            return self.error
Commit	Line	Data
e0edde6f	1	# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
99155935 BP	2	#
	3	# Licensed under the Apache License, Version 2.0 (the "License");
	4	# you may not use this file except in compliance with the License.
	5	# You may obtain a copy of the License at:
	6	#
	7	# http://www.apache.org/licenses/LICENSE-2.0
	8	#
	9	# Unless required by applicable law or agreed to in writing, software
	10	# distributed under the License is distributed on an "AS IS" BASIS,
	11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	12	# See the License for the specific language governing permissions and
	13	# limitations under the License.
	14
	15	import re
99155935 BP	16	import sys
99155935 BP	17
cb96c1b2	18	import six
b3ac2947 RB	19	from six.moves import range
b3ac2947 RB	20
26bb0f31 EJ	21	__pychecker__ = 'no-stringiter'
26bb0f31 EJ	22
99155935 BP	23	escapes = {ord('"'): u"\\\"",
	24	ord("\\"): u"\\\\",
	25	ord("\b"): u"\\b",
	26	ord("\f"): u"\\f",
	27	ord("\n"): u"\\n",
	28	ord("\r"): u"\\r",
	29	ord("\t"): u"\\t"}
9b46cccc BP	30	for esc in range(32):
	31	if esc not in escapes:
	32	escapes[esc] = u"\\u%04x" % esc
99155935	33
cba64103	34	SPACES_PER_LEVEL = 2
99155935	35
cba64103 BP	36
	37	class _Serializer(object):
	38	def __init__(self, stream, pretty, sort_keys):
	39	self.stream = stream
	40	self.pretty = pretty
	41	self.sort_keys = sort_keys
	42	self.depth = 0
	43
	44	def __serialize_string(self, s):
	45	self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
	46
	47	def __indent_line(self):
	48	if self.pretty:
	49	self.stream.write('\n')
	50	self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
	51
	52	def serialize(self, obj):
	53	if obj is None:
	54	self.stream.write(u"null")
	55	elif obj is False:
	56	self.stream.write(u"false")
	57	elif obj is True:
	58	self.stream.write(u"true")
8f808842	59	elif isinstance(obj, six.integer_types):
cba64103	60	self.stream.write(u"%d" % obj)
8f808842	61	elif isinstance(obj, float):
cba64103	62	self.stream.write("%.15g" % obj)
25f599fb RB	63	elif isinstance(obj, six.text_type):
25f599fb RB	64	# unicode() on Python 2, or str() in Python 3 (always unicode)
cba64103	65	self.__serialize_string(obj)
8f808842	66	elif isinstance(obj, str):
25f599fb RB	67	# This is for Python 2, where this comes out to unicode(str()).
	68	# For Python 3, it's str(str()), but it's harmless.
	69	self.__serialize_string(six.text_type(obj))
8f808842	70	elif isinstance(obj, dict):
cba64103 BP	71	self.stream.write(u"{")
	72
	73	self.depth += 1
	74	self.__indent_line()
	75
	76	if self.sort_keys:
	77	items = sorted(obj.items())
	78	else:
cb96c1b2	79	items = six.iteritems(obj)
cba64103 BP	80	for i, (key, value) in enumerate(items):
	81	if i > 0:
	82	self.stream.write(u",")
	83	self.__indent_line()
25f599fb	84	self.__serialize_string(six.text_type(key))
cba64103 BP	85	self.stream.write(u":")
	86	if self.pretty:
	87	self.stream.write(u' ')
	88	self.serialize(value)
	89
	90	self.stream.write(u"}")
	91	self.depth -= 1
8f808842	92	elif isinstance(obj, (list, tuple)):
cba64103 BP	93	self.stream.write(u"[")
	94	self.depth += 1
	95
	96	if obj:
	97	self.__indent_line()
	98
	99	for i, value in enumerate(obj):
	100	if i > 0:
	101	self.stream.write(u",")
	102	self.__indent_line()
	103	self.serialize(value)
	104
	105	self.depth -= 1
	106	self.stream.write(u"]")
99155935	107	else:
cba64103 BP	108	raise Exception("can't serialize %s as JSON" % obj)
	109
	110
	111	def to_stream(obj, stream, pretty=False, sort_keys=True):
	112	_Serializer(stream, pretty, sort_keys).serialize(obj)
99155935	113
26bb0f31	114
99155935 BP	115	def to_file(obj, name, pretty=False, sort_keys=True):
	116	stream = open(name, "w")
	117	try:
	118	to_stream(obj, stream, pretty, sort_keys)
	119	finally:
	120	stream.close()
	121
26bb0f31	122
99155935	123	def to_string(obj, pretty=False, sort_keys=True):
981e9560	124	output = six.StringIO()
99155935 BP	125	to_stream(obj, output, pretty, sort_keys)
	126	s = output.getvalue()
	127	output.close()
	128	return s
	129
26bb0f31	130
99155935 BP	131	def from_stream(stream):
	132	p = Parser(check_trailer=True)
	133	while True:
	134	buf = stream.read(4096)
	135	if buf == "" or p.feed(buf) != len(buf):
	136	break
	137	return p.finish()
	138
26bb0f31	139
99155935 BP	140	def from_file(name):
	141	stream = open(name, "r")
	142	try:
	143	return from_stream(stream)
	144	finally:
	145	stream.close()
	146
26bb0f31	147
99155935	148	def from_string(s):
25f599fb RB	149	if not isinstance(s, six.text_type):
	150	# We assume the input is a string. We will only hit this case for a
	151	# str in Python 2 which is not unicode, so we need to go ahead and
	152	# decode it.
	153	try:
	154	s = six.text_type(s, 'utf-8')
	155	except UnicodeDecodeError as e:
	156	seq = ' '.join(["0x%2x" % ord(c)
	157	for c in e.object[e.start:e.end] if ord(c) >= 0x80])
	158	return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
99155935 BP	159	p = Parser(check_trailer=True)
	160	p.feed(s)
	161	return p.finish()
	162
26bb0f31	163
99155935	164	class Parser(object):
a0631d92	165	# Maximum height of parsing stack. #
99155935 BP	166	MAX_HEIGHT = 1000
	167
	168	def __init__(self, check_trailer=False):
	169	self.check_trailer = check_trailer
	170
	171	# Lexical analysis.
	172	self.lex_state = Parser.__lex_start
	173	self.buffer = ""
	174	self.line_number = 0
	175	self.column_number = 0
	176	self.byte_number = 0
26bb0f31	177
99155935 BP	178	# Parsing.
	179	self.parse_state = Parser.__parse_start
	180	self.stack = []
	181	self.member_name = None
	182
	183	# Parse status.
	184	self.done = False
	185	self.error = None
	186
	187	def __lex_start_space(self, c):
	188	pass
26bb0f31	189
99155935 BP	190	def __lex_start_alpha(self, c):
	191	self.buffer = c
	192	self.lex_state = Parser.__lex_keyword
26bb0f31	193
99155935 BP	194	def __lex_start_token(self, c):
99155935 BP	195	self.__parser_input(c)
26bb0f31	196
99155935 BP	197	def __lex_start_number(self, c):
	198	self.buffer = c
	199	self.lex_state = Parser.__lex_number
26bb0f31	200
28c781df	201	def __lex_start_string(self, _):
99155935	202	self.lex_state = Parser.__lex_string
26bb0f31	203
99155935 BP	204	def __lex_start_error(self, c):
	205	if ord(c) >= 32 and ord(c) < 128:
	206	self.__error("invalid character '%s'" % c)
	207	else:
	208	self.__error("invalid character U+%04x" % ord(c))
	209
	210	__lex_start_actions = {}
	211	for c in " \t\n\r":
	212	__lex_start_actions[c] = __lex_start_space
	213	for c in "abcdefghijklmnopqrstuvwxyz":
	214	__lex_start_actions[c] = __lex_start_alpha
	215	for c in "[{]}:,":
	216	__lex_start_actions[c] = __lex_start_token
	217	for c in "-0123456789":
	218	__lex_start_actions[c] = __lex_start_number
	219	__lex_start_actions['"'] = __lex_start_string
26bb0f31	220
99155935 BP	221	def __lex_start(self, c):
	222	Parser.__lex_start_actions.get(
	223	c, Parser.__lex_start_error)(self, c)
	224	return True
	225
	226	__lex_alpha = {}
	227	for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
	228	__lex_alpha[c] = True
26bb0f31	229
99155935 BP	230	def __lex_finish_keyword(self):
	231	if self.buffer == "false":
	232	self.__parser_input(False)
	233	elif self.buffer == "true":
	234	self.__parser_input(True)
	235	elif self.buffer == "null":
	236	self.__parser_input(None)
	237	else:
	238	self.__error("invalid keyword '%s'" % self.buffer)
26bb0f31	239
99155935 BP	240	def __lex_keyword(self, c):
	241	if c in Parser.__lex_alpha:
	242	self.buffer += c
	243	return True
	244	else:
	245	self.__lex_finish_keyword()
	246	return False
	247
26bb0f31 EJ	248	__number_re = re.compile("(-)?(0\|[1-9][0-9]*)"
	249	"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
	250
99155935 BP	251	def __lex_finish_number(self):
	252	s = self.buffer
	253	m = Parser.__number_re.match(s)
	254	if m:
26bb0f31	255	sign, integer, fraction, exp = m.groups()
99155935	256	if (exp is not None and
8f808842	257	(int(exp) > sys.maxint or int(exp) < -sys.maxint - 1)):
99155935 BP	258	self.__error("exponent outside valid range")
	259	return
	260
	261	if fraction is not None and len(fraction.lstrip('0')) == 0:
	262	fraction = None
	263
	264	sig_string = integer
	265	if fraction is not None:
	266	sig_string += fraction
	267	significand = int(sig_string)
	268
	269	pow10 = 0
	270	if fraction is not None:
	271	pow10 -= len(fraction)
	272	if exp is not None:
8f808842	273	pow10 += int(exp)
99155935 BP	274
	275	if significand == 0:
	276	self.__parser_input(0)
	277	return
26bb0f31	278	elif significand <= 2 ** 63:
cd1b3f63	279	while pow10 > 0 and significand <= 2 ** 63:
99155935 BP	280	significand *= 10
	281	pow10 -= 1
	282	while pow10 < 0 and significand % 10 == 0:
	283	significand /= 10
	284	pow10 += 1
	285	if (pow10 == 0 and
26bb0f31 EJ	286	((not sign and significand < 2 ** 63) or
26bb0f31 EJ	287	(sign and significand <= 2 ** 63))):
99155935 BP	288	if sign:
	289	self.__parser_input(-significand)
	290	else:
	291	self.__parser_input(significand)
	292	return
	293
	294	value = float(s)
	295	if value == float("inf") or value == float("-inf"):
	296	self.__error("number outside valid range")
	297	return
	298	if value == 0:
	299	# Suppress negative zero.
	300	value = 0
	301	self.__parser_input(value)
	302	elif re.match("-?0[0-9]", s):
	303	self.__error("leading zeros not allowed")
	304	elif re.match("-([^0-9]\|$)", s):
	305	self.__error("'-' must be followed by digit")
	306	elif re.match("-?(0\|[1-9][0-9]*)\.([^0-9]\|$)", s):
	307	self.__error("decimal point must be followed by digit")
	308	elif re.search("e[-+]?([^0-9]\|$)", s):
	309	self.__error("exponent must contain at least one digit")
	310	else:
	311	self.__error("syntax error in number")
26bb0f31	312
99155935 BP	313	def __lex_number(self, c):
	314	if c in ".0123456789eE-+":
	315	self.buffer += c
	316	return True
	317	else:
	318	self.__lex_finish_number()
	319	return False
	320
	321	__4hex_re = re.compile("[0-9a-fA-F]{4}")
26bb0f31	322
99155935 BP	323	def __lex_4hex(self, s):
	324	if len(s) < 4:
	325	self.__error("quoted string ends within \\u escape")
	326	elif not Parser.__4hex_re.match(s):
	327	self.__error("malformed \\u escape")
	328	elif s == "0000":
	329	self.__error("null bytes not supported in quoted strings")
	330	else:
	331	return int(s, 16)
26bb0f31	332
99155935 BP	333	@staticmethod
	334	def __is_leading_surrogate(c):
	335	"""Returns true if 'c' is a Unicode code point for a leading
	336	surrogate."""
	337	return c >= 0xd800 and c <= 0xdbff
26bb0f31	338
99155935 BP	339	@staticmethod
	340	def __is_trailing_surrogate(c):
	341	"""Returns true if 'c' is a Unicode code point for a trailing
	342	surrogate."""
	343	return c >= 0xdc00 and c <= 0xdfff
26bb0f31	344
99155935 BP	345	@staticmethod
	346	def __utf16_decode_surrogate_pair(leading, trailing):
	347	"""Returns the unicode code point corresponding to leading surrogate
	348	'leading' and trailing surrogate 'trailing'. The return value will not
	349	make any sense if 'leading' or 'trailing' are not in the correct ranges
	350	for leading or trailing surrogates."""
	351	# Leading surrogate: 110110wwwwxxxxxx
	352	# Trailing surrogate: 110111xxxxxxxxxx
	353	# Code point: 000uuuuuxxxxxxxxxxxxxxxx
	354	w = (leading >> 6) & 0xf
	355	u = w + 1
	356	x0 = leading & 0x3f
	357	x1 = trailing & 0x3ff
	358	return (u << 16) \| (x0 << 10) \| x1
	359	__unescape = {'"': u'"',
	360	"\\": u"\\",
	361	"/": u"/",
	362	"b": u"\b",
	363	"f": u"\f",
	364	"n": u"\n",
	365	"r": u"\r",
	366	"t": u"\t"}
26bb0f31	367
99155935 BP	368	def __lex_finish_string(self):
	369	inp = self.buffer
	370	out = u""
	371	while len(inp):
	372	backslash = inp.find('\\')
	373	if backslash == -1:
	374	out += inp
	375	break
	376	out += inp[:backslash]
	377	inp = inp[backslash + 1:]
	378	if inp == "":
	379	self.__error("quoted string may not end with backslash")
	380	return
	381
	382	replacement = Parser.__unescape.get(inp[0])
	383	if replacement is not None:
	384	out += replacement
	385	inp = inp[1:]
	386	continue
	387	elif inp[0] != u'u':
	388	self.__error("bad escape \\%s" % inp[0])
	389	return
26bb0f31	390
99155935 BP	391	c0 = self.__lex_4hex(inp[1:5])
	392	if c0 is None:
	393	return
	394	inp = inp[5:]
	395
	396	if Parser.__is_leading_surrogate(c0):
	397	if inp[:2] != u'\\u':
	398	self.__error("malformed escaped surrogate pair")
	399	return
	400	c1 = self.__lex_4hex(inp[2:6])
	401	if c1 is None:
	402	return
	403	if not Parser.__is_trailing_surrogate(c1):
	404	self.__error("second half of escaped surrogate pair is "
	405	"not trailing surrogate")
	406	return
	407	code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
	408	inp = inp[6:]
	409	else:
	410	code_point = c0
	411	out += unichr(code_point)
	412	self.__parser_input('string', out)
	413
	414	def __lex_string_escape(self, c):
	415	self.buffer += c
	416	self.lex_state = Parser.__lex_string
	417	return True
26bb0f31	418
99155935 BP	419	def __lex_string(self, c):
	420	if c == '\\':
	421	self.buffer += c
	422	self.lex_state = Parser.__lex_string_escape
	423	elif c == '"':
	424	self.__lex_finish_string()
	425	elif ord(c) >= 0x20:
	426	self.buffer += c
	427	else:
	428	self.__error("U+%04X must be escaped in quoted string" % ord(c))
	429	return True
	430
	431	def __lex_input(self, c):
99155935 BP	432	eat = self.lex_state(self, c)
	433	assert eat is True or eat is False
	434	return eat
	435
28c781df	436	def __parse_start(self, token, unused_string):
99155935 BP	437	if token == '{':
	438	self.__push_object()
	439	elif token == '[':
	440	self.__push_array()
	441	else:
	442	self.__error("syntax error at beginning of input")
26bb0f31	443
28c781df	444	def __parse_end(self, unused_token, unused_string):
99155935	445	self.__error("trailing garbage at end of input")
26bb0f31	446
99155935 BP	447	def __parse_object_init(self, token, string):
	448	if token == '}':
	449	self.__parser_pop()
	450	else:
	451	self.__parse_object_name(token, string)
26bb0f31	452
99155935 BP	453	def __parse_object_name(self, token, string):
	454	if token == 'string':
	455	self.member_name = string
	456	self.parse_state = Parser.__parse_object_colon
	457	else:
	458	self.__error("syntax error parsing object expecting string")
26bb0f31	459
28c781df	460	def __parse_object_colon(self, token, unused_string):
99155935 BP	461	if token == ":":
	462	self.parse_state = Parser.__parse_object_value
	463	else:
	464	self.__error("syntax error parsing object expecting ':'")
26bb0f31	465
99155935 BP	466	def __parse_object_value(self, token, string):
99155935 BP	467	self.__parse_value(token, string, Parser.__parse_object_next)
26bb0f31	468
28c781df	469	def __parse_object_next(self, token, unused_string):
99155935 BP	470	if token == ",":
	471	self.parse_state = Parser.__parse_object_name
	472	elif token == "}":
	473	self.__parser_pop()
	474	else:
	475	self.__error("syntax error expecting '}' or ','")
26bb0f31	476
99155935 BP	477	def __parse_array_init(self, token, string):
	478	if token == ']':
	479	self.__parser_pop()
	480	else:
	481	self.__parse_array_value(token, string)
26bb0f31	482
99155935 BP	483	def __parse_array_value(self, token, string):
99155935 BP	484	self.__parse_value(token, string, Parser.__parse_array_next)
26bb0f31	485
28c781df	486	def __parse_array_next(self, token, unused_string):
99155935 BP	487	if token == ",":
	488	self.parse_state = Parser.__parse_array_value
	489	elif token == "]":
	490	self.__parser_pop()
	491	else:
	492	self.__error("syntax error expecting ']' or ','")
26bb0f31	493
99155935 BP	494	def __parser_input(self, token, string=None):
	495	self.lex_state = Parser.__lex_start
	496	self.buffer = ""
99155935	497	self.parse_state(self, token, string)
99155935 BP	498
	499	def __put_value(self, value):
	500	top = self.stack[-1]
	501	if type(top) == dict:
	502	top[self.member_name] = value
	503	else:
	504	top.append(value)
	505
	506	def __parser_push(self, new_json, next_state):
	507	if len(self.stack) < Parser.MAX_HEIGHT:
	508	if len(self.stack) > 0:
	509	self.__put_value(new_json)
	510	self.stack.append(new_json)
	511	self.parse_state = next_state
	512	else:
	513	self.__error("input exceeds maximum nesting depth %d" %
	514	Parser.MAX_HEIGHT)
26bb0f31	515
99155935 BP	516	def __push_object(self):
99155935 BP	517	self.__parser_push({}, Parser.__parse_object_init)
26bb0f31	518
99155935 BP	519	def __push_array(self):
	520	self.__parser_push([], Parser.__parse_array_init)
	521
	522	def __parser_pop(self):
	523	if len(self.stack) == 1:
	524	self.parse_state = Parser.__parse_end
	525	if not self.check_trailer:
	526	self.done = True
	527	else:
	528	self.stack.pop()
	529	top = self.stack[-1]
	530	if type(top) == list:
	531	self.parse_state = Parser.__parse_array_next
	532	else:
	533	self.parse_state = Parser.__parse_object_next
	534
	535	def __parse_value(self, token, string, next_state):
8f808842 RB	536	number_types = list(six.integer_types)
	537	number_types.extend([float])
	538	number_types = tuple(number_types)
	539	if token in [False, None, True] or isinstance(token, number_types):
99155935 BP	540	self.__put_value(token)
	541	elif token == 'string':
	542	self.__put_value(string)
	543	else:
	544	if token == '{':
	545	self.__push_object()
	546	elif token == '[':
	547	self.__push_array()
	548	else:
	549	self.__error("syntax error expecting value")
	550	return
	551	self.parse_state = next_state
	552
	553	def __error(self, message):
	554	if self.error is None:
	555	self.error = ("line %d, column %d, byte %d: %s"
	556	% (self.line_number, self.column_number,
	557	self.byte_number, message))
	558	self.done = True
	559
	560	def feed(self, s):
	561	i = 0
	562	while True:
	563	if self.done or i >= len(s):
	564	return i
c640c04f BP	565
	566	c = s[i]
	567	if self.__lex_input(c):
	568	self.byte_number += 1
	569	if c == '\n':
	570	self.column_number = 0
	571	self.line_number += 1
	572	else:
	573	self.column_number += 1
	574
99155935 BP	575	i += 1
	576
	577	def is_done(self):
	578	return self.done
	579
	580	def finish(self):
	581	if self.lex_state == Parser.__lex_start:
	582	pass
	583	elif self.lex_state in (Parser.__lex_string,
	584	Parser.__lex_string_escape):
	585	self.__error("unexpected end of input in quoted string")
	586	else:
	587	self.__lex_input(" ")
	588
	589	if self.parse_state == Parser.__parse_start:
	590	self.__error("empty input stream")
	591	elif self.parse_state != Parser.__parse_end:
	592	self.__error("unexpected end of input")
	593
3c057118	594	if self.error is None:
99155935 BP	595	assert len(self.stack) == 1
	596	return self.stack.pop()
	597	else:
	598	return self.error