[ovs.git] / python / ovs / json.py

# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

import functools
import json
import re
import sys

import six

try:
    import ovs._json
except ImportError:
    pass

__pychecker__ = 'no-stringiter'

SPACES_PER_LEVEL = 2
_dumper = functools.partial(json.dumps, separators=(",", ":"))

if six.PY2:
    def dumper(*args, **kwargs):
        return _dumper(*args, **kwargs).decode('raw-unicode-escape')
else:
    dumper = _dumper


def to_stream(obj, stream, pretty=False, sort_keys=True):
    stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
                        sort_keys=sort_keys))


def to_file(obj, name, pretty=False, sort_keys=True):
    with open(name, "w") as stream:
        to_stream(obj, stream, pretty, sort_keys)


def to_string(obj, pretty=False, sort_keys=True):
    return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
                  sort_keys=sort_keys)


def from_stream(stream):
    p = Parser(check_trailer=True)
    while True:
        buf = stream.read(4096)
        if buf == "" or p.feed(buf) != len(buf):
            break
    return p.finish()


def from_file(name):
    stream = open(name, "r")
    try:
        return from_stream(stream)
    finally:
        stream.close()


def from_string(s):
    if not isinstance(s, six.text_type):
        # We assume the input is a string.  We will only hit this case for a
        # str in Python 2 which is not unicode, so we need to go ahead and
        # decode it.
        try:
            s = six.text_type(s, 'utf-8')
        except UnicodeDecodeError as e:
            seq = ' '.join(["0x%2x" % ord(c)
                           for c in e.object[e.start:e.end] if ord(c) >= 0x80])
            return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
    p = Parser(check_trailer=True)
    p.feed(s)
    return p.finish()


class Parser(object):
    # Maximum height of parsing stack. #
    MAX_HEIGHT = 1000

    def __new__(cls, *args, **kwargs):
        try:
            return ovs._json.Parser(*args, **kwargs)
        except NameError:
            return super(Parser, cls).__new__(cls)

    def __init__(self, check_trailer=False):
        self.check_trailer = check_trailer

        # Lexical analysis.
        self.lex_state = Parser.__lex_start
        self.buffer = ""
        self.line_number = 0
        self.column_number = 0
        self.byte_number = 0

        # Parsing.
        self.parse_state = Parser.__parse_start
        self.stack = []
        self.member_name = None

        # Parse status.
        self.done = False
        self.error = None

    def __lex_start_space(self, c):
        pass

    def __lex_start_alpha(self, c):
        self.buffer = c
        self.lex_state = Parser.__lex_keyword

    def __lex_start_token(self, c):
        self.__parser_input(c)

    def __lex_start_number(self, c):
        self.buffer = c
        self.lex_state = Parser.__lex_number

    def __lex_start_string(self, _):
        self.lex_state = Parser.__lex_string

    def __lex_start_error(self, c):
        if ord(c) >= 32 and ord(c) < 128:
            self.__error("invalid character '%s'" % c)
        else:
            self.__error("invalid character U+%04x" % ord(c))

    __lex_start_actions = {}
    for c in " \t\n\r":
        __lex_start_actions[c] = __lex_start_space
    for c in "abcdefghijklmnopqrstuvwxyz":
        __lex_start_actions[c] = __lex_start_alpha
    for c in "[{]}:,":
        __lex_start_actions[c] = __lex_start_token
    for c in "-0123456789":
        __lex_start_actions[c] = __lex_start_number
    __lex_start_actions['"'] = __lex_start_string

    def __lex_start(self, c):
        Parser.__lex_start_actions.get(
            c, Parser.__lex_start_error)(self, c)
        return True

    __lex_alpha = {}
    for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
        __lex_alpha[c] = True

    def __lex_finish_keyword(self):
        if self.buffer == "false":
            self.__parser_input(False)
        elif self.buffer == "true":
            self.__parser_input(True)
        elif self.buffer == "null":
            self.__parser_input(None)
        else:
            self.__error("invalid keyword '%s'" % self.buffer)

    def __lex_keyword(self, c):
        if c in Parser.__lex_alpha:
            self.buffer += c
            return True
        else:
            self.__lex_finish_keyword()
            return False

    __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
            "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")

    def __lex_finish_number(self):
        s = self.buffer
        m = Parser.__number_re.match(s)
        if m:
            sign, integer, fraction, exp = m.groups()
            if (exp is not None and
                (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
                self.__error("exponent outside valid range")
                return

            if fraction is not None and len(fraction.lstrip('0')) == 0:
                fraction = None

            sig_string = integer
            if fraction is not None:
                sig_string += fraction
            significand = int(sig_string)

            pow10 = 0
            if fraction is not None:
                pow10 -= len(fraction)
            if exp is not None:
                pow10 += int(exp)

            if significand == 0:
                self.__parser_input(0)
                return
            elif significand <= 2 ** 63:
                while pow10 > 0 and significand <= 2 ** 63:
                    significand *= 10
                    pow10 -= 1
                while pow10 < 0 and significand % 10 == 0:
                    significand //= 10
                    pow10 += 1
                if (pow10 == 0 and
                    ((not sign and significand < 2 ** 63) or
                     (sign and significand <= 2 ** 63))):
                    if sign:
                        self.__parser_input(-significand)
                    else:
                        self.__parser_input(significand)
                    return

            value = float(s)
            if value == float("inf") or value == float("-inf"):
                self.__error("number outside valid range")
                return
            if value == 0:
                # Suppress negative zero.
                value = 0
            self.__parser_input(value)
        elif re.match("-?0[0-9]", s):
            self.__error("leading zeros not allowed")
        elif re.match("-([^0-9]|$)", s):
            self.__error("'-' must be followed by digit")
        elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
            self.__error("decimal point must be followed by digit")
        elif re.search("e[-+]?([^0-9]|$)", s):
            self.__error("exponent must contain at least one digit")
        else:
            self.__error("syntax error in number")

    def __lex_number(self, c):
        if c in ".0123456789eE-+":
            self.buffer += c
            return True
        else:
            self.__lex_finish_number()
            return False

    __4hex_re = re.compile("[0-9a-fA-F]{4}")

    def __lex_4hex(self, s):
        if len(s) < 4:
            self.__error("quoted string ends within \\u escape")
        elif not Parser.__4hex_re.match(s):
            self.__error("malformed \\u escape")
        elif s == "0000":
            self.__error("null bytes not supported in quoted strings")
        else:
            return int(s, 16)

    @staticmethod
    def __is_leading_surrogate(c):
        """Returns true if 'c' is a Unicode code point for a leading
        surrogate."""
        return c >= 0xd800 and c <= 0xdbff

    @staticmethod
    def __is_trailing_surrogate(c):
        """Returns true if 'c' is a Unicode code point for a trailing
        surrogate."""
        return c >= 0xdc00 and c <= 0xdfff

    @staticmethod
    def __utf16_decode_surrogate_pair(leading, trailing):
        """Returns the unicode code point corresponding to leading surrogate
        'leading' and trailing surrogate 'trailing'.  The return value will not
        make any sense if 'leading' or 'trailing' are not in the correct ranges
        for leading or trailing surrogates."""
        #  Leading surrogate:         110110wwwwxxxxxx
        # Trailing surrogate:         110111xxxxxxxxxx
        #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
        w = (leading >> 6) & 0xf
        u = w + 1
        x0 = leading & 0x3f
        x1 = trailing & 0x3ff
        return (u << 16) | (x0 << 10) | x1
    __unescape = {'"': u'"',
                  "\\": u"\\",
                  "/": u"/",
                  "b": u"\b",
                  "f": u"\f",
                  "n": u"\n",
                  "r": u"\r",
                  "t": u"\t"}

    def __lex_finish_string(self):
        inp = self.buffer
        out = u""
        while len(inp):
            backslash = inp.find('\\')
            if backslash == -1:
                out += inp
                break
            out += inp[:backslash]
            inp = inp[backslash + 1:]
            if inp == "":
                self.__error("quoted string may not end with backslash")
                return

            replacement = Parser.__unescape.get(inp[0])
            if replacement is not None:
                out += replacement
                inp = inp[1:]
                continue
            elif inp[0] != u'u':
                self.__error("bad escape \\%s" % inp[0])
                return

            c0 = self.__lex_4hex(inp[1:5])
            if c0 is None:
                return
            inp = inp[5:]

            if Parser.__is_leading_surrogate(c0):
                if inp[:2] != u'\\u':
                    self.__error("malformed escaped surrogate pair")
                    return
                c1 = self.__lex_4hex(inp[2:6])
                if c1 is None:
                    return
                if not Parser.__is_trailing_surrogate(c1):
                    self.__error("second half of escaped surrogate pair is "
                                 "not trailing surrogate")
                    return
                code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
                inp = inp[6:]
            else:
                code_point = c0
            out += six.unichr(code_point)
        self.__parser_input('string', out)

    def __lex_string_escape(self, c):
        self.buffer += c
        self.lex_state = Parser.__lex_string
        return True

    def __lex_string(self, c):
        if c == '\\':
            self.buffer += c
            self.lex_state = Parser.__lex_string_escape
        elif c == '"':
            self.__lex_finish_string()
        elif ord(c) >= 0x20:
            self.buffer += c
        else:
            self.__error("U+%04X must be escaped in quoted string" % ord(c))
        return True

    def __lex_input(self, c):
        eat = self.lex_state(self, c)
        assert eat is True or eat is False
        return eat

    def __parse_start(self, token, unused_string):
        if token == '{':
            self.__push_object()
        elif token == '[':
            self.__push_array()
        else:
            self.__error("syntax error at beginning of input")

    def __parse_end(self, unused_token, unused_string):
        self.__error("trailing garbage at end of input")

    def __parse_object_init(self, token, string):
        if token == '}':
            self.__parser_pop()
        else:
            self.__parse_object_name(token, string)

    def __parse_object_name(self, token, string):
        if token == 'string':
            self.member_name = string
            self.parse_state = Parser.__parse_object_colon
        else:
            self.__error("syntax error parsing object expecting string")

    def __parse_object_colon(self, token, unused_string):
        if token == ":":
            self.parse_state = Parser.__parse_object_value
        else:
            self.__error("syntax error parsing object expecting ':'")

    def __parse_object_value(self, token, string):
        self.__parse_value(token, string, Parser.__parse_object_next)

    def __parse_object_next(self, token, unused_string):
        if token == ",":
            self.parse_state = Parser.__parse_object_name
        elif token == "}":
            self.__parser_pop()
        else:
            self.__error("syntax error expecting '}' or ','")

    def __parse_array_init(self, token, string):
        if token == ']':
            self.__parser_pop()
        else:
            self.__parse_array_value(token, string)

    def __parse_array_value(self, token, string):
        self.__parse_value(token, string, Parser.__parse_array_next)

    def __parse_array_next(self, token, unused_string):
        if token == ",":
            self.parse_state = Parser.__parse_array_value
        elif token == "]":
            self.__parser_pop()
        else:
            self.__error("syntax error expecting ']' or ','")

    def __parser_input(self, token, string=None):
        self.lex_state = Parser.__lex_start
        self.buffer = ""
        self.parse_state(self, token, string)

    def __put_value(self, value):
        top = self.stack[-1]
        if isinstance(top, dict):
            top[self.member_name] = value
        else:
            top.append(value)

    def __parser_push(self, new_json, next_state):
        if len(self.stack) < Parser.MAX_HEIGHT:
            if len(self.stack) > 0:
                self.__put_value(new_json)
            self.stack.append(new_json)
            self.parse_state = next_state
        else:
            self.__error("input exceeds maximum nesting depth %d" %
                         Parser.MAX_HEIGHT)

    def __push_object(self):
        self.__parser_push({}, Parser.__parse_object_init)

    def __push_array(self):
        self.__parser_push([], Parser.__parse_array_init)

    def __parser_pop(self):
        if len(self.stack) == 1:
            self.parse_state = Parser.__parse_end
            if not self.check_trailer:
                self.done = True
        else:
            self.stack.pop()
            top = self.stack[-1]
            if isinstance(top, list):
                self.parse_state = Parser.__parse_array_next
            else:
                self.parse_state = Parser.__parse_object_next

    def __parse_value(self, token, string, next_state):
        number_types = list(six.integer_types)
        number_types.extend([float])
        number_types = tuple(number_types)
        if token in [False, None, True] or isinstance(token, number_types):
            self.__put_value(token)
        elif token == 'string':
            self.__put_value(string)
        else:
            if token == '{':
                self.__push_object()
            elif token == '[':
                self.__push_array()
            else:
                self.__error("syntax error expecting value")
            return
        self.parse_state = next_state

    def __error(self, message):
        if self.error is None:
            self.error = ("line %d, column %d, byte %d: %s"
                          % (self.line_number, self.column_number,
                             self.byte_number, message))
            self.done = True

    def feed(self, s):
        i = 0
        while True:
            if self.done or i >= len(s):
                return i

            c = s[i]
            if self.__lex_input(c):
                self.byte_number += 1
                if c == '\n':
                    self.column_number = 0
                    self.line_number += 1
                else:
                    self.column_number += 1

                i += 1

    def is_done(self):
        return self.done

    def finish(self):
        if self.lex_state == Parser.__lex_start:
            pass
        elif self.lex_state in (Parser.__lex_string,
                                Parser.__lex_string_escape):
            self.__error("unexpected end of input in quoted string")
        else:
            self.__lex_input(" ")

        if self.parse_state == Parser.__parse_start:
            self.__error("empty input stream")
        elif self.parse_state != Parser.__parse_end:
            self.__error("unexpected end of input")

        if self.error is None:
            assert len(self.stack) == 1
            return self.stack.pop()
        else:
            return self.error
Commit	Line	Data
e0edde6f	1	# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
99155935 BP	2	#
	3	# Licensed under the Apache License, Version 2.0 (the "License");
	4	# you may not use this file except in compliance with the License.
	5	# You may obtain a copy of the License at:
	6	#
	7	# http://www.apache.org/licenses/LICENSE-2.0
	8	#
	9	# Unless required by applicable law or agreed to in writing, software
	10	# distributed under the License is distributed on an "AS IS" BASIS,
	11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	12	# See the License for the specific language governing permissions and
	13	# limitations under the License.
	14
622749d8	15	from __future__ import absolute_import
6c7050b5	16
622749d8 TW	17	import functools
622749d8 TW	18	import json
99155935	19	import re
99155935 BP	20	import sys
99155935 BP	21
cb96c1b2	22	import six
b3ac2947	23
c63b04d6 TW	24	try:
	25	import ovs._json
	26	except ImportError:
	27	pass
	28
26bb0f31 EJ	29	__pychecker__ = 'no-stringiter'
26bb0f31 EJ	30
cba64103	31	SPACES_PER_LEVEL = 2
e7164d96 LR	32	_dumper = functools.partial(json.dumps, separators=(",", ":"))
	33
	34	if six.PY2:
	35	def dumper(args, *kwargs):
	36	return _dumper(args, *kwargs).decode('raw-unicode-escape')
	37	else:
	38	dumper = _dumper
cba64103 BP	39
	40
	41	def to_stream(obj, stream, pretty=False, sort_keys=True):
622749d8 TW	42	stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
622749d8 TW	43	sort_keys=sort_keys))
99155935	44
26bb0f31	45
99155935	46	def to_file(obj, name, pretty=False, sort_keys=True):
622749d8	47	with open(name, "w") as stream:
99155935	48	to_stream(obj, stream, pretty, sort_keys)
99155935	49
26bb0f31	50
99155935	51	def to_string(obj, pretty=False, sort_keys=True):
622749d8 TW	52	return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
622749d8 TW	53	sort_keys=sort_keys)
99155935	54
26bb0f31	55
99155935 BP	56	def from_stream(stream):
	57	p = Parser(check_trailer=True)
	58	while True:
	59	buf = stream.read(4096)
	60	if buf == "" or p.feed(buf) != len(buf):
	61	break
	62	return p.finish()
	63
26bb0f31	64
99155935 BP	65	def from_file(name):
	66	stream = open(name, "r")
	67	try:
	68	return from_stream(stream)
	69	finally:
	70	stream.close()
	71
26bb0f31	72
99155935	73	def from_string(s):
25f599fb RB	74	if not isinstance(s, six.text_type):
	75	# We assume the input is a string. We will only hit this case for a
	76	# str in Python 2 which is not unicode, so we need to go ahead and
	77	# decode it.
	78	try:
	79	s = six.text_type(s, 'utf-8')
	80	except UnicodeDecodeError as e:
	81	seq = ' '.join(["0x%2x" % ord(c)
	82	for c in e.object[e.start:e.end] if ord(c) >= 0x80])
	83	return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
99155935 BP	84	p = Parser(check_trailer=True)
	85	p.feed(s)
	86	return p.finish()
	87
26bb0f31	88
99155935	89	class Parser(object):
a0631d92	90	# Maximum height of parsing stack. #
99155935 BP	91	MAX_HEIGHT = 1000
99155935 BP	92
c63b04d6 TW	93	def __new__(cls, args, *kwargs):
	94	try:
	95	return ovs._json.Parser(args, *kwargs)
	96	except NameError:
	97	return super(Parser, cls).__new__(cls)
	98
99155935 BP	99	def __init__(self, check_trailer=False):
	100	self.check_trailer = check_trailer
	101
	102	# Lexical analysis.
	103	self.lex_state = Parser.__lex_start
	104	self.buffer = ""
	105	self.line_number = 0
	106	self.column_number = 0
	107	self.byte_number = 0
26bb0f31	108
99155935 BP	109	# Parsing.
	110	self.parse_state = Parser.__parse_start
	111	self.stack = []
	112	self.member_name = None
	113
	114	# Parse status.
	115	self.done = False
	116	self.error = None
	117
	118	def __lex_start_space(self, c):
	119	pass
26bb0f31	120
99155935 BP	121	def __lex_start_alpha(self, c):
	122	self.buffer = c
	123	self.lex_state = Parser.__lex_keyword
26bb0f31	124
99155935 BP	125	def __lex_start_token(self, c):
99155935 BP	126	self.__parser_input(c)
26bb0f31	127
99155935 BP	128	def __lex_start_number(self, c):
	129	self.buffer = c
	130	self.lex_state = Parser.__lex_number
26bb0f31	131
28c781df	132	def __lex_start_string(self, _):
99155935	133	self.lex_state = Parser.__lex_string
26bb0f31	134
99155935 BP	135	def __lex_start_error(self, c):
	136	if ord(c) >= 32 and ord(c) < 128:
	137	self.__error("invalid character '%s'" % c)
	138	else:
	139	self.__error("invalid character U+%04x" % ord(c))
	140
	141	__lex_start_actions = {}
	142	for c in " \t\n\r":
	143	__lex_start_actions[c] = __lex_start_space
	144	for c in "abcdefghijklmnopqrstuvwxyz":
	145	__lex_start_actions[c] = __lex_start_alpha
	146	for c in "[{]}:,":
	147	__lex_start_actions[c] = __lex_start_token
	148	for c in "-0123456789":
	149	__lex_start_actions[c] = __lex_start_number
	150	__lex_start_actions['"'] = __lex_start_string
26bb0f31	151
99155935 BP	152	def __lex_start(self, c):
	153	Parser.__lex_start_actions.get(
	154	c, Parser.__lex_start_error)(self, c)
	155	return True
	156
	157	__lex_alpha = {}
	158	for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
	159	__lex_alpha[c] = True
26bb0f31	160
99155935 BP	161	def __lex_finish_keyword(self):
	162	if self.buffer == "false":
	163	self.__parser_input(False)
	164	elif self.buffer == "true":
	165	self.__parser_input(True)
	166	elif self.buffer == "null":
	167	self.__parser_input(None)
	168	else:
	169	self.__error("invalid keyword '%s'" % self.buffer)
26bb0f31	170
99155935 BP	171	def __lex_keyword(self, c):
	172	if c in Parser.__lex_alpha:
	173	self.buffer += c
	174	return True
	175	else:
	176	self.__lex_finish_keyword()
	177	return False
	178
26bb0f31 EJ	179	__number_re = re.compile("(-)?(0\|[1-9][0-9]*)"
	180	"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
	181
99155935 BP	182	def __lex_finish_number(self):
	183	s = self.buffer
	184	m = Parser.__number_re.match(s)
	185	if m:
26bb0f31	186	sign, integer, fraction, exp = m.groups()
99155935	187	if (exp is not None and
d36bbd37	188	(int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
99155935 BP	189	self.__error("exponent outside valid range")
	190	return
	191
	192	if fraction is not None and len(fraction.lstrip('0')) == 0:
	193	fraction = None
	194
	195	sig_string = integer
	196	if fraction is not None:
	197	sig_string += fraction
	198	significand = int(sig_string)
	199
	200	pow10 = 0
	201	if fraction is not None:
	202	pow10 -= len(fraction)
	203	if exp is not None:
8f808842	204	pow10 += int(exp)
99155935 BP	205
	206	if significand == 0:
	207	self.__parser_input(0)
	208	return
26bb0f31	209	elif significand <= 2 ** 63:
cd1b3f63	210	while pow10 > 0 and significand <= 2 ** 63:
99155935 BP	211	significand *= 10
	212	pow10 -= 1
	213	while pow10 < 0 and significand % 10 == 0:
2c362f17	214	significand //= 10
99155935 BP	215	pow10 += 1
99155935 BP	216	if (pow10 == 0 and
26bb0f31 EJ	217	((not sign and significand < 2 ** 63) or
26bb0f31 EJ	218	(sign and significand <= 2 ** 63))):
99155935 BP	219	if sign:
	220	self.__parser_input(-significand)
	221	else:
	222	self.__parser_input(significand)
	223	return
	224
	225	value = float(s)
	226	if value == float("inf") or value == float("-inf"):
	227	self.__error("number outside valid range")
	228	return
	229	if value == 0:
	230	# Suppress negative zero.
	231	value = 0
	232	self.__parser_input(value)
	233	elif re.match("-?0[0-9]", s):
	234	self.__error("leading zeros not allowed")
	235	elif re.match("-([^0-9]\|$)", s):
	236	self.__error("'-' must be followed by digit")
	237	elif re.match("-?(0\|[1-9][0-9]*)\.([^0-9]\|$)", s):
	238	self.__error("decimal point must be followed by digit")
	239	elif re.search("e[-+]?([^0-9]\|$)", s):
	240	self.__error("exponent must contain at least one digit")
	241	else:
	242	self.__error("syntax error in number")
26bb0f31	243
99155935 BP	244	def __lex_number(self, c):
	245	if c in ".0123456789eE-+":
	246	self.buffer += c
	247	return True
	248	else:
	249	self.__lex_finish_number()
	250	return False
	251
	252	__4hex_re = re.compile("[0-9a-fA-F]{4}")
26bb0f31	253
99155935 BP	254	def __lex_4hex(self, s):
	255	if len(s) < 4:
	256	self.__error("quoted string ends within \\u escape")
	257	elif not Parser.__4hex_re.match(s):
	258	self.__error("malformed \\u escape")
	259	elif s == "0000":
	260	self.__error("null bytes not supported in quoted strings")
	261	else:
	262	return int(s, 16)
26bb0f31	263
99155935 BP	264	@staticmethod
	265	def __is_leading_surrogate(c):
	266	"""Returns true if 'c' is a Unicode code point for a leading
	267	surrogate."""
	268	return c >= 0xd800 and c <= 0xdbff
26bb0f31	269
99155935 BP	270	@staticmethod
	271	def __is_trailing_surrogate(c):
	272	"""Returns true if 'c' is a Unicode code point for a trailing
	273	surrogate."""
	274	return c >= 0xdc00 and c <= 0xdfff
26bb0f31	275
99155935 BP	276	@staticmethod
	277	def __utf16_decode_surrogate_pair(leading, trailing):
	278	"""Returns the unicode code point corresponding to leading surrogate
	279	'leading' and trailing surrogate 'trailing'. The return value will not
	280	make any sense if 'leading' or 'trailing' are not in the correct ranges
	281	for leading or trailing surrogates."""
	282	# Leading surrogate: 110110wwwwxxxxxx
	283	# Trailing surrogate: 110111xxxxxxxxxx
	284	# Code point: 000uuuuuxxxxxxxxxxxxxxxx
	285	w = (leading >> 6) & 0xf
	286	u = w + 1
	287	x0 = leading & 0x3f
	288	x1 = trailing & 0x3ff
	289	return (u << 16) \| (x0 << 10) \| x1
	290	__unescape = {'"': u'"',
	291	"\\": u"\\",
	292	"/": u"/",
	293	"b": u"\b",
	294	"f": u"\f",
	295	"n": u"\n",
	296	"r": u"\r",
	297	"t": u"\t"}
26bb0f31	298
99155935 BP	299	def __lex_finish_string(self):
	300	inp = self.buffer
	301	out = u""
	302	while len(inp):
	303	backslash = inp.find('\\')
	304	if backslash == -1:
	305	out += inp
	306	break
	307	out += inp[:backslash]
	308	inp = inp[backslash + 1:]
	309	if inp == "":
	310	self.__error("quoted string may not end with backslash")
	311	return
	312
	313	replacement = Parser.__unescape.get(inp[0])
	314	if replacement is not None:
	315	out += replacement
	316	inp = inp[1:]
	317	continue
	318	elif inp[0] != u'u':
	319	self.__error("bad escape \\%s" % inp[0])
	320	return
26bb0f31	321
99155935 BP	322	c0 = self.__lex_4hex(inp[1:5])
	323	if c0 is None:
	324	return
	325	inp = inp[5:]
	326
	327	if Parser.__is_leading_surrogate(c0):
	328	if inp[:2] != u'\\u':
	329	self.__error("malformed escaped surrogate pair")
	330	return
	331	c1 = self.__lex_4hex(inp[2:6])
	332	if c1 is None:
	333	return
	334	if not Parser.__is_trailing_surrogate(c1):
	335	self.__error("second half of escaped surrogate pair is "
	336	"not trailing surrogate")
	337	return
	338	code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
	339	inp = inp[6:]
	340	else:
	341	code_point = c0
eac25f50	342	out += six.unichr(code_point)
99155935 BP	343	self.__parser_input('string', out)
	344
	345	def __lex_string_escape(self, c):
	346	self.buffer += c
	347	self.lex_state = Parser.__lex_string
	348	return True
26bb0f31	349
99155935 BP	350	def __lex_string(self, c):
	351	if c == '\\':
	352	self.buffer += c
	353	self.lex_state = Parser.__lex_string_escape
	354	elif c == '"':
	355	self.__lex_finish_string()
	356	elif ord(c) >= 0x20:
	357	self.buffer += c
	358	else:
	359	self.__error("U+%04X must be escaped in quoted string" % ord(c))
	360	return True
	361
	362	def __lex_input(self, c):
99155935 BP	363	eat = self.lex_state(self, c)
	364	assert eat is True or eat is False
	365	return eat
	366
28c781df	367	def __parse_start(self, token, unused_string):
99155935 BP	368	if token == '{':
	369	self.__push_object()
	370	elif token == '[':
	371	self.__push_array()
	372	else:
	373	self.__error("syntax error at beginning of input")
26bb0f31	374
28c781df	375	def __parse_end(self, unused_token, unused_string):
99155935	376	self.__error("trailing garbage at end of input")
26bb0f31	377
99155935 BP	378	def __parse_object_init(self, token, string):
	379	if token == '}':
	380	self.__parser_pop()
	381	else:
	382	self.__parse_object_name(token, string)
26bb0f31	383
99155935 BP	384	def __parse_object_name(self, token, string):
	385	if token == 'string':
	386	self.member_name = string
	387	self.parse_state = Parser.__parse_object_colon
	388	else:
	389	self.__error("syntax error parsing object expecting string")
26bb0f31	390
28c781df	391	def __parse_object_colon(self, token, unused_string):
99155935 BP	392	if token == ":":
	393	self.parse_state = Parser.__parse_object_value
	394	else:
	395	self.__error("syntax error parsing object expecting ':'")
26bb0f31	396
99155935 BP	397	def __parse_object_value(self, token, string):
99155935 BP	398	self.__parse_value(token, string, Parser.__parse_object_next)
26bb0f31	399
28c781df	400	def __parse_object_next(self, token, unused_string):
99155935 BP	401	if token == ",":
	402	self.parse_state = Parser.__parse_object_name
	403	elif token == "}":
	404	self.__parser_pop()
	405	else:
	406	self.__error("syntax error expecting '}' or ','")
26bb0f31	407
99155935 BP	408	def __parse_array_init(self, token, string):
	409	if token == ']':
	410	self.__parser_pop()
	411	else:
	412	self.__parse_array_value(token, string)
26bb0f31	413
99155935 BP	414	def __parse_array_value(self, token, string):
99155935 BP	415	self.__parse_value(token, string, Parser.__parse_array_next)
26bb0f31	416
28c781df	417	def __parse_array_next(self, token, unused_string):
99155935 BP	418	if token == ",":
	419	self.parse_state = Parser.__parse_array_value
	420	elif token == "]":
	421	self.__parser_pop()
	422	else:
	423	self.__error("syntax error expecting ']' or ','")
26bb0f31	424
99155935 BP	425	def __parser_input(self, token, string=None):
	426	self.lex_state = Parser.__lex_start
	427	self.buffer = ""
99155935	428	self.parse_state(self, token, string)
99155935 BP	429
	430	def __put_value(self, value):
	431	top = self.stack[-1]
da2d45c6	432	if isinstance(top, dict):
99155935 BP	433	top[self.member_name] = value
	434	else:
	435	top.append(value)
	436
	437	def __parser_push(self, new_json, next_state):
	438	if len(self.stack) < Parser.MAX_HEIGHT:
	439	if len(self.stack) > 0:
	440	self.__put_value(new_json)
	441	self.stack.append(new_json)
	442	self.parse_state = next_state
	443	else:
	444	self.__error("input exceeds maximum nesting depth %d" %
	445	Parser.MAX_HEIGHT)
26bb0f31	446
99155935 BP	447	def __push_object(self):
99155935 BP	448	self.__parser_push({}, Parser.__parse_object_init)
26bb0f31	449
99155935 BP	450	def __push_array(self):
	451	self.__parser_push([], Parser.__parse_array_init)
	452
	453	def __parser_pop(self):
	454	if len(self.stack) == 1:
	455	self.parse_state = Parser.__parse_end
	456	if not self.check_trailer:
	457	self.done = True
	458	else:
	459	self.stack.pop()
	460	top = self.stack[-1]
da2d45c6	461	if isinstance(top, list):
99155935 BP	462	self.parse_state = Parser.__parse_array_next
	463	else:
	464	self.parse_state = Parser.__parse_object_next
	465
	466	def __parse_value(self, token, string, next_state):
8f808842 RB	467	number_types = list(six.integer_types)
	468	number_types.extend([float])
	469	number_types = tuple(number_types)
	470	if token in [False, None, True] or isinstance(token, number_types):
99155935 BP	471	self.__put_value(token)
	472	elif token == 'string':
	473	self.__put_value(string)
	474	else:
	475	if token == '{':
	476	self.__push_object()
	477	elif token == '[':
	478	self.__push_array()
	479	else:
	480	self.__error("syntax error expecting value")
	481	return
	482	self.parse_state = next_state
	483
	484	def __error(self, message):
	485	if self.error is None:
	486	self.error = ("line %d, column %d, byte %d: %s"
	487	% (self.line_number, self.column_number,
	488	self.byte_number, message))
	489	self.done = True
	490
	491	def feed(self, s):
	492	i = 0
	493	while True:
	494	if self.done or i >= len(s):
	495	return i
c640c04f BP	496
	497	c = s[i]
	498	if self.__lex_input(c):
	499	self.byte_number += 1
	500	if c == '\n':
	501	self.column_number = 0
	502	self.line_number += 1
	503	else:
	504	self.column_number += 1
	505
99155935 BP	506	i += 1
	507
	508	def is_done(self):
	509	return self.done
	510
	511	def finish(self):
	512	if self.lex_state == Parser.__lex_start:
	513	pass
	514	elif self.lex_state in (Parser.__lex_string,
	515	Parser.__lex_string_escape):
	516	self.__error("unexpected end of input in quoted string")
	517	else:
	518	self.__lex_input(" ")
	519
	520	if self.parse_state == Parser.__parse_start:
	521	self.__error("empty input stream")
	522	elif self.parse_state != Parser.__parse_end:
	523	self.__error("unexpected end of input")
	524
3c057118	525	if self.error is None:
99155935 BP	526	assert len(self.stack) == 1
	527	return self.stack.pop()
	528	else:
	529	return self.error