[mirror_ovs.git] / python / ovs / json.py

# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

import functools
import json
import re
import sys

PARSER_C = 'C'
PARSER_PY = 'PYTHON'
try:
    import ovs._json
    PARSER = PARSER_C
except ImportError:
    PARSER = PARSER_PY

__pychecker__ = 'no-stringiter'

SPACES_PER_LEVEL = 2
dumper = functools.partial(json.dumps, separators=(",", ":"))


def to_stream(obj, stream, pretty=False, sort_keys=True):
    stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
                        sort_keys=sort_keys))


def to_file(obj, name, pretty=False, sort_keys=True):
    with open(name, "w") as stream:
        to_stream(obj, stream, pretty, sort_keys)


def to_string(obj, pretty=False, sort_keys=True):
    return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
                  sort_keys=sort_keys)


def from_stream(stream):
    p = Parser(check_trailer=True)
    while True:
        buf = stream.read(4096)
        if buf == "" or p.feed(buf) != len(buf):
            break
    return p.finish()


def from_file(name):
    stream = open(name, "r")
    try:
        return from_stream(stream)
    finally:
        stream.close()


def from_string(s):
    if not isinstance(s, str):
        # We assume the input is a string.  We will only hit this case for a
        # str in Python 2 which is not unicode, so we need to go ahead and
        # decode it.
        try:
            s = str(s, 'utf-8')
        except UnicodeDecodeError as e:
            seq = ' '.join(["0x%2x" % ord(c)
                           for c in e.object[e.start:e.end] if ord(c) >= 0x80])
            return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
    p = Parser(check_trailer=True)
    p.feed(s)
    return p.finish()


class Parser(object):
    # Maximum height of parsing stack. #
    MAX_HEIGHT = 1000

    def __new__(cls, *args, **kwargs):
        if PARSER == PARSER_C:
            return ovs._json.Parser(*args, **kwargs)
        return super(Parser, cls).__new__(cls)

    def __init__(self, check_trailer=False):
        self.check_trailer = check_trailer

        # Lexical analysis.
        self.lex_state = Parser.__lex_start
        self.buffer = ""
        self.line_number = 0
        self.column_number = 0
        self.byte_number = 0

        # Parsing.
        self.parse_state = Parser.__parse_start
        self.stack = []
        self.member_name = None

        # Parse status.
        self.done = False
        self.error = None

    def __lex_start_space(self, c):
        pass

    def __lex_start_alpha(self, c):
        self.buffer = c
        self.lex_state = Parser.__lex_keyword

    def __lex_start_token(self, c):
        self.__parser_input(c)

    def __lex_start_number(self, c):
        self.buffer = c
        self.lex_state = Parser.__lex_number

    def __lex_start_string(self, _):
        self.lex_state = Parser.__lex_string

    def __lex_start_error(self, c):
        if ord(c) >= 32 and ord(c) < 128:
            self.__error("invalid character '%s'" % c)
        else:
            self.__error("invalid character U+%04x" % ord(c))

    __lex_start_actions = {}
    for c in " \t\n\r":
        __lex_start_actions[c] = __lex_start_space
    for c in "abcdefghijklmnopqrstuvwxyz":
        __lex_start_actions[c] = __lex_start_alpha
    for c in "[{]}:,":
        __lex_start_actions[c] = __lex_start_token
    for c in "-0123456789":
        __lex_start_actions[c] = __lex_start_number
    __lex_start_actions['"'] = __lex_start_string

    def __lex_start(self, c):
        Parser.__lex_start_actions.get(
            c, Parser.__lex_start_error)(self, c)
        return True

    __lex_alpha = {}
    for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
        __lex_alpha[c] = True

    def __lex_finish_keyword(self):
        if self.buffer == "false":
            self.__parser_input(False)
        elif self.buffer == "true":
            self.__parser_input(True)
        elif self.buffer == "null":
            self.__parser_input(None)
        else:
            self.__error("invalid keyword '%s'" % self.buffer)

    def __lex_keyword(self, c):
        if c in Parser.__lex_alpha:
            self.buffer += c
            return True
        else:
            self.__lex_finish_keyword()
            return False

    __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
            r"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")

    def __lex_finish_number(self):
        s = self.buffer
        m = Parser.__number_re.match(s)
        if m:
            sign, integer, fraction, exp = m.groups()
            if (exp is not None and
                (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
                self.__error("exponent outside valid range")
                return

            if fraction is not None and len(fraction.lstrip('0')) == 0:
                fraction = None

            sig_string = integer
            if fraction is not None:
                sig_string += fraction
            significand = int(sig_string)

            pow10 = 0
            if fraction is not None:
                pow10 -= len(fraction)
            if exp is not None:
                pow10 += int(exp)

            if significand == 0:
                self.__parser_input(0)
                return
            elif significand <= 2 ** 63:
                while pow10 > 0 and significand <= 2 ** 63:
                    significand *= 10
                    pow10 -= 1
                while pow10 < 0 and significand % 10 == 0:
                    significand //= 10
                    pow10 += 1
                if (pow10 == 0 and
                    ((not sign and significand < 2 ** 63) or
                     (sign and significand <= 2 ** 63))):
                    if sign:
                        self.__parser_input(-significand)
                    else:
                        self.__parser_input(significand)
                    return

            value = float(s)
            if value == float("inf") or value == float("-inf"):
                self.__error("number outside valid range")
                return
            if value == 0:
                # Suppress negative zero.
                value = 0
            self.__parser_input(value)
        elif re.match("-?0[0-9]", s):
            self.__error("leading zeros not allowed")
        elif re.match("-([^0-9]|$)", s):
            self.__error("'-' must be followed by digit")
        elif re.match(r"-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
            self.__error("decimal point must be followed by digit")
        elif re.search("e[-+]?([^0-9]|$)", s):
            self.__error("exponent must contain at least one digit")
        else:
            self.__error("syntax error in number")

    def __lex_number(self, c):
        if c in ".0123456789eE-+":
            self.buffer += c
            return True
        else:
            self.__lex_finish_number()
            return False

    __4hex_re = re.compile("[0-9a-fA-F]{4}")

    def __lex_4hex(self, s):
        if len(s) < 4:
            self.__error("quoted string ends within \\u escape")
        elif not Parser.__4hex_re.match(s):
            self.__error("malformed \\u escape")
        elif s == "0000":
            self.__error("null bytes not supported in quoted strings")
        else:
            return int(s, 16)

    @staticmethod
    def __is_leading_surrogate(c):
        """Returns true if 'c' is a Unicode code point for a leading
        surrogate."""
        return c >= 0xd800 and c <= 0xdbff

    @staticmethod
    def __is_trailing_surrogate(c):
        """Returns true if 'c' is a Unicode code point for a trailing
        surrogate."""
        return c >= 0xdc00 and c <= 0xdfff

    @staticmethod
    def __utf16_decode_surrogate_pair(leading, trailing):
        """Returns the unicode code point corresponding to leading surrogate
        'leading' and trailing surrogate 'trailing'.  The return value will not
        make any sense if 'leading' or 'trailing' are not in the correct ranges
        for leading or trailing surrogates."""
        #  Leading surrogate:         110110wwwwxxxxxx
        # Trailing surrogate:         110111xxxxxxxxxx
        #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
        w = (leading >> 6) & 0xf
        u = w + 1
        x0 = leading & 0x3f
        x1 = trailing & 0x3ff
        return (u << 16) | (x0 << 10) | x1
    __unescape = {'"': u'"',
                  "\\": u"\\",
                  "/": u"/",
                  "b": u"\b",
                  "f": u"\f",
                  "n": u"\n",
                  "r": u"\r",
                  "t": u"\t"}

    def __lex_finish_string(self):
        inp = self.buffer
        out = u""
        while len(inp):
            backslash = inp.find('\\')
            if backslash == -1:
                out += inp
                break
            out += inp[:backslash]
            inp = inp[backslash + 1:]
            if inp == "":
                self.__error("quoted string may not end with backslash")
                return

            replacement = Parser.__unescape.get(inp[0])
            if replacement is not None:
                out += replacement
                inp = inp[1:]
                continue
            elif inp[0] != u'u':
                self.__error("bad escape \\%s" % inp[0])
                return

            c0 = self.__lex_4hex(inp[1:5])
            if c0 is None:
                return
            inp = inp[5:]

            if Parser.__is_leading_surrogate(c0):
                if inp[:2] != u'\\u':
                    self.__error("malformed escaped surrogate pair")
                    return
                c1 = self.__lex_4hex(inp[2:6])
                if c1 is None:
                    return
                if not Parser.__is_trailing_surrogate(c1):
                    self.__error("second half of escaped surrogate pair is "
                                 "not trailing surrogate")
                    return
                code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
                inp = inp[6:]
            else:
                code_point = c0
            out += chr(code_point)
        self.__parser_input('string', out)

    def __lex_string_escape(self, c):
        self.buffer += c
        self.lex_state = Parser.__lex_string
        return True

    def __lex_string(self, c):
        if c == '\\':
            self.buffer += c
            self.lex_state = Parser.__lex_string_escape
        elif c == '"':
            self.__lex_finish_string()
        elif ord(c) >= 0x20:
            self.buffer += c
        else:
            self.__error("U+%04X must be escaped in quoted string" % ord(c))
        return True

    def __lex_input(self, c):
        eat = self.lex_state(self, c)
        assert eat is True or eat is False
        return eat

    def __parse_start(self, token, unused_string):
        if token == '{':
            self.__push_object()
        elif token == '[':
            self.__push_array()
        else:
            self.__error("syntax error at beginning of input")

    def __parse_end(self, unused_token, unused_string):
        self.__error("trailing garbage at end of input")

    def __parse_object_init(self, token, string):
        if token == '}':
            self.__parser_pop()
        else:
            self.__parse_object_name(token, string)

    def __parse_object_name(self, token, string):
        if token == 'string':
            self.member_name = string
            self.parse_state = Parser.__parse_object_colon
        else:
            self.__error("syntax error parsing object expecting string")

    def __parse_object_colon(self, token, unused_string):
        if token == ":":
            self.parse_state = Parser.__parse_object_value
        else:
            self.__error("syntax error parsing object expecting ':'")

    def __parse_object_value(self, token, string):
        self.__parse_value(token, string, Parser.__parse_object_next)

    def __parse_object_next(self, token, unused_string):
        if token == ",":
            self.parse_state = Parser.__parse_object_name
        elif token == "}":
            self.__parser_pop()
        else:
            self.__error("syntax error expecting '}' or ','")

    def __parse_array_init(self, token, string):
        if token == ']':
            self.__parser_pop()
        else:
            self.__parse_array_value(token, string)

    def __parse_array_value(self, token, string):
        self.__parse_value(token, string, Parser.__parse_array_next)

    def __parse_array_next(self, token, unused_string):
        if token == ",":
            self.parse_state = Parser.__parse_array_value
        elif token == "]":
            self.__parser_pop()
        else:
            self.__error("syntax error expecting ']' or ','")

    def __parser_input(self, token, string=None):
        self.lex_state = Parser.__lex_start
        self.buffer = ""
        self.parse_state(self, token, string)

    def __put_value(self, value):
        top = self.stack[-1]
        if isinstance(top, dict):
            top[self.member_name] = value
        else:
            top.append(value)

    def __parser_push(self, new_json, next_state):
        if len(self.stack) < Parser.MAX_HEIGHT:
            if len(self.stack) > 0:
                self.__put_value(new_json)
            self.stack.append(new_json)
            self.parse_state = next_state
        else:
            self.__error("input exceeds maximum nesting depth %d" %
                         Parser.MAX_HEIGHT)

    def __push_object(self):
        self.__parser_push({}, Parser.__parse_object_init)

    def __push_array(self):
        self.__parser_push([], Parser.__parse_array_init)

    def __parser_pop(self):
        if len(self.stack) == 1:
            self.parse_state = Parser.__parse_end
            if not self.check_trailer:
                self.done = True
        else:
            self.stack.pop()
            top = self.stack[-1]
            if isinstance(top, list):
                self.parse_state = Parser.__parse_array_next
            else:
                self.parse_state = Parser.__parse_object_next

    def __parse_value(self, token, string, next_state):
        number_types = [int]
        number_types.extend([float])
        number_types = tuple(number_types)
        if token in [False, None, True] or isinstance(token, number_types):
            self.__put_value(token)
        elif token == 'string':
            self.__put_value(string)
        else:
            if token == '{':
                self.__push_object()
            elif token == '[':
                self.__push_array()
            else:
                self.__error("syntax error expecting value")
            return
        self.parse_state = next_state

    def __error(self, message):
        if self.error is None:
            self.error = ("line %d, column %d, byte %d: %s"
                          % (self.line_number, self.column_number,
                             self.byte_number, message))
            self.done = True

    def feed(self, s):
        i = 0
        while True:
            if self.done or i >= len(s):
                return i

            c = s[i]
            if self.__lex_input(c):
                self.byte_number += 1
                if c == '\n':
                    self.column_number = 0
                    self.line_number += 1
                else:
                    self.column_number += 1

                i += 1

    def is_done(self):
        return self.done

    def finish(self):
        if self.lex_state == Parser.__lex_start:
            pass
        elif self.lex_state in (Parser.__lex_string,
                                Parser.__lex_string_escape):
            self.__error("unexpected end of input in quoted string")
        else:
            self.__lex_input(" ")

        if self.parse_state == Parser.__parse_start:
            self.__error("empty input stream")
        elif self.parse_state != Parser.__parse_end:
            self.__error("unexpected end of input")

        if self.error is None:
            assert len(self.stack) == 1
            return self.stack.pop()
        else:
            return self.error
Commit	Line	Data
e0edde6f	1	# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
99155935 BP	2	#
	3	# Licensed under the Apache License, Version 2.0 (the "License");
	4	# you may not use this file except in compliance with the License.
	5	# You may obtain a copy of the License at:
	6	#
	7	# http://www.apache.org/licenses/LICENSE-2.0
	8	#
	9	# Unless required by applicable law or agreed to in writing, software
	10	# distributed under the License is distributed on an "AS IS" BASIS,
	11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	12	# See the License for the specific language governing permissions and
	13	# limitations under the License.
	14
622749d8	15	from __future__ import absolute_import
6c7050b5	16
622749d8 TW	17	import functools
622749d8 TW	18	import json
99155935	19	import re
99155935 BP	20	import sys
99155935 BP	21
75ff7116 TW	22	PARSER_C = 'C'
75ff7116 TW	23	PARSER_PY = 'PYTHON'
c63b04d6 TW	24	try:
c63b04d6 TW	25	import ovs._json
75ff7116	26	PARSER = PARSER_C
c63b04d6	27	except ImportError:
75ff7116	28	PARSER = PARSER_PY
c63b04d6	29
26bb0f31 EJ	30	__pychecker__ = 'no-stringiter'
26bb0f31 EJ	31
cba64103	32	SPACES_PER_LEVEL = 2
0c4d144a	33	dumper = functools.partial(json.dumps, separators=(",", ":"))
cba64103 BP	34
	35
	36	def to_stream(obj, stream, pretty=False, sort_keys=True):
622749d8 TW	37	stream.write(dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
622749d8 TW	38	sort_keys=sort_keys))
99155935	39
26bb0f31	40
99155935	41	def to_file(obj, name, pretty=False, sort_keys=True):
622749d8	42	with open(name, "w") as stream:
99155935	43	to_stream(obj, stream, pretty, sort_keys)
99155935	44
26bb0f31	45
99155935	46	def to_string(obj, pretty=False, sort_keys=True):
622749d8 TW	47	return dumper(obj, indent=SPACES_PER_LEVEL if pretty else None,
622749d8 TW	48	sort_keys=sort_keys)
99155935	49
26bb0f31	50
99155935 BP	51	def from_stream(stream):
	52	p = Parser(check_trailer=True)
	53	while True:
	54	buf = stream.read(4096)
	55	if buf == "" or p.feed(buf) != len(buf):
	56	break
	57	return p.finish()
	58
26bb0f31	59
99155935 BP	60	def from_file(name):
	61	stream = open(name, "r")
	62	try:
	63	return from_stream(stream)
	64	finally:
	65	stream.close()
	66
26bb0f31	67
99155935	68	def from_string(s):
0c4d144a	69	if not isinstance(s, str):
25f599fb RB	70	# We assume the input is a string. We will only hit this case for a
	71	# str in Python 2 which is not unicode, so we need to go ahead and
	72	# decode it.
	73	try:
0c4d144a	74	s = str(s, 'utf-8')
25f599fb RB	75	except UnicodeDecodeError as e:
	76	seq = ' '.join(["0x%2x" % ord(c)
	77	for c in e.object[e.start:e.end] if ord(c) >= 0x80])
	78	return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
99155935 BP	79	p = Parser(check_trailer=True)
	80	p.feed(s)
	81	return p.finish()
	82
26bb0f31	83
99155935	84	class Parser(object):
a0631d92	85	# Maximum height of parsing stack. #
99155935 BP	86	MAX_HEIGHT = 1000
99155935 BP	87
c63b04d6	88	def __new__(cls, args, *kwargs):
75ff7116	89	if PARSER == PARSER_C:
c63b04d6	90	return ovs._json.Parser(args, *kwargs)
75ff7116	91	return super(Parser, cls).__new__(cls)
c63b04d6	92
99155935 BP	93	def __init__(self, check_trailer=False):
	94	self.check_trailer = check_trailer
	95
	96	# Lexical analysis.
	97	self.lex_state = Parser.__lex_start
	98	self.buffer = ""
	99	self.line_number = 0
	100	self.column_number = 0
	101	self.byte_number = 0
26bb0f31	102
99155935 BP	103	# Parsing.
	104	self.parse_state = Parser.__parse_start
	105	self.stack = []
	106	self.member_name = None
	107
	108	# Parse status.
	109	self.done = False
	110	self.error = None
	111
	112	def __lex_start_space(self, c):
	113	pass
26bb0f31	114
99155935 BP	115	def __lex_start_alpha(self, c):
	116	self.buffer = c
	117	self.lex_state = Parser.__lex_keyword
26bb0f31	118
99155935 BP	119	def __lex_start_token(self, c):
99155935 BP	120	self.__parser_input(c)
26bb0f31	121
99155935 BP	122	def __lex_start_number(self, c):
	123	self.buffer = c
	124	self.lex_state = Parser.__lex_number
26bb0f31	125
28c781df	126	def __lex_start_string(self, _):
99155935	127	self.lex_state = Parser.__lex_string
26bb0f31	128
99155935 BP	129	def __lex_start_error(self, c):
	130	if ord(c) >= 32 and ord(c) < 128:
	131	self.__error("invalid character '%s'" % c)
	132	else:
	133	self.__error("invalid character U+%04x" % ord(c))
	134
	135	__lex_start_actions = {}
	136	for c in " \t\n\r":
	137	__lex_start_actions[c] = __lex_start_space
	138	for c in "abcdefghijklmnopqrstuvwxyz":
	139	__lex_start_actions[c] = __lex_start_alpha
	140	for c in "[{]}:,":
	141	__lex_start_actions[c] = __lex_start_token
	142	for c in "-0123456789":
	143	__lex_start_actions[c] = __lex_start_number
	144	__lex_start_actions['"'] = __lex_start_string
26bb0f31	145
99155935 BP	146	def __lex_start(self, c):
	147	Parser.__lex_start_actions.get(
	148	c, Parser.__lex_start_error)(self, c)
	149	return True
	150
	151	__lex_alpha = {}
	152	for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
	153	__lex_alpha[c] = True
26bb0f31	154
99155935 BP	155	def __lex_finish_keyword(self):
	156	if self.buffer == "false":
	157	self.__parser_input(False)
	158	elif self.buffer == "true":
	159	self.__parser_input(True)
	160	elif self.buffer == "null":
	161	self.__parser_input(None)
	162	else:
	163	self.__error("invalid keyword '%s'" % self.buffer)
26bb0f31	164
99155935 BP	165	def __lex_keyword(self, c):
	166	if c in Parser.__lex_alpha:
	167	self.buffer += c
	168	return True
	169	else:
	170	self.__lex_finish_keyword()
	171	return False
	172
26bb0f31	173	__number_re = re.compile("(-)?(0\|[1-9][0-9]*)"
145a7e88	174	r"(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
26bb0f31	175
99155935 BP	176	def __lex_finish_number(self):
	177	s = self.buffer
	178	m = Parser.__number_re.match(s)
	179	if m:
26bb0f31	180	sign, integer, fraction, exp = m.groups()
99155935	181	if (exp is not None and
d36bbd37	182	(int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
99155935 BP	183	self.__error("exponent outside valid range")
	184	return
	185
	186	if fraction is not None and len(fraction.lstrip('0')) == 0:
	187	fraction = None
	188
	189	sig_string = integer
	190	if fraction is not None:
	191	sig_string += fraction
	192	significand = int(sig_string)
	193
	194	pow10 = 0
	195	if fraction is not None:
	196	pow10 -= len(fraction)
	197	if exp is not None:
8f808842	198	pow10 += int(exp)
99155935 BP	199
	200	if significand == 0:
	201	self.__parser_input(0)
	202	return
26bb0f31	203	elif significand <= 2 ** 63:
cd1b3f63	204	while pow10 > 0 and significand <= 2 ** 63:
99155935 BP	205	significand *= 10
	206	pow10 -= 1
	207	while pow10 < 0 and significand % 10 == 0:
2c362f17	208	significand //= 10
99155935 BP	209	pow10 += 1
99155935 BP	210	if (pow10 == 0 and
26bb0f31 EJ	211	((not sign and significand < 2 ** 63) or
26bb0f31 EJ	212	(sign and significand <= 2 ** 63))):
99155935 BP	213	if sign:
	214	self.__parser_input(-significand)
	215	else:
	216	self.__parser_input(significand)
	217	return
	218
	219	value = float(s)
	220	if value == float("inf") or value == float("-inf"):
	221	self.__error("number outside valid range")
	222	return
	223	if value == 0:
	224	# Suppress negative zero.
	225	value = 0
	226	self.__parser_input(value)
	227	elif re.match("-?0[0-9]", s):
	228	self.__error("leading zeros not allowed")
	229	elif re.match("-([^0-9]\|$)", s):
	230	self.__error("'-' must be followed by digit")
145a7e88	231	elif re.match(r"-?(0\|[1-9][0-9]*)\.([^0-9]\|$)", s):
99155935 BP	232	self.__error("decimal point must be followed by digit")
	233	elif re.search("e[-+]?([^0-9]\|$)", s):
	234	self.__error("exponent must contain at least one digit")
	235	else:
	236	self.__error("syntax error in number")
26bb0f31	237
99155935 BP	238	def __lex_number(self, c):
	239	if c in ".0123456789eE-+":
	240	self.buffer += c
	241	return True
	242	else:
	243	self.__lex_finish_number()
	244	return False
	245
	246	__4hex_re = re.compile("[0-9a-fA-F]{4}")
26bb0f31	247
99155935 BP	248	def __lex_4hex(self, s):
	249	if len(s) < 4:
	250	self.__error("quoted string ends within \\u escape")
	251	elif not Parser.__4hex_re.match(s):
	252	self.__error("malformed \\u escape")
	253	elif s == "0000":
	254	self.__error("null bytes not supported in quoted strings")
	255	else:
	256	return int(s, 16)
26bb0f31	257
99155935 BP	258	@staticmethod
	259	def __is_leading_surrogate(c):
	260	"""Returns true if 'c' is a Unicode code point for a leading
	261	surrogate."""
	262	return c >= 0xd800 and c <= 0xdbff
26bb0f31	263
99155935 BP	264	@staticmethod
	265	def __is_trailing_surrogate(c):
	266	"""Returns true if 'c' is a Unicode code point for a trailing
	267	surrogate."""
	268	return c >= 0xdc00 and c <= 0xdfff
26bb0f31	269
99155935 BP	270	@staticmethod
	271	def __utf16_decode_surrogate_pair(leading, trailing):
	272	"""Returns the unicode code point corresponding to leading surrogate
	273	'leading' and trailing surrogate 'trailing'. The return value will not
	274	make any sense if 'leading' or 'trailing' are not in the correct ranges
	275	for leading or trailing surrogates."""
	276	# Leading surrogate: 110110wwwwxxxxxx
	277	# Trailing surrogate: 110111xxxxxxxxxx
	278	# Code point: 000uuuuuxxxxxxxxxxxxxxxx
	279	w = (leading >> 6) & 0xf
	280	u = w + 1
	281	x0 = leading & 0x3f
	282	x1 = trailing & 0x3ff
	283	return (u << 16) \| (x0 << 10) \| x1
	284	__unescape = {'"': u'"',
	285	"\\": u"\\",
	286	"/": u"/",
	287	"b": u"\b",
	288	"f": u"\f",
	289	"n": u"\n",
	290	"r": u"\r",
	291	"t": u"\t"}
26bb0f31	292
99155935 BP	293	def __lex_finish_string(self):
	294	inp = self.buffer
	295	out = u""
	296	while len(inp):
	297	backslash = inp.find('\\')
	298	if backslash == -1:
	299	out += inp
	300	break
	301	out += inp[:backslash]
	302	inp = inp[backslash + 1:]
	303	if inp == "":
	304	self.__error("quoted string may not end with backslash")
	305	return
	306
	307	replacement = Parser.__unescape.get(inp[0])
	308	if replacement is not None:
	309	out += replacement
	310	inp = inp[1:]
	311	continue
	312	elif inp[0] != u'u':
	313	self.__error("bad escape \\%s" % inp[0])
	314	return
26bb0f31	315
99155935 BP	316	c0 = self.__lex_4hex(inp[1:5])
	317	if c0 is None:
	318	return
	319	inp = inp[5:]
	320
	321	if Parser.__is_leading_surrogate(c0):
	322	if inp[:2] != u'\\u':
	323	self.__error("malformed escaped surrogate pair")
	324	return
	325	c1 = self.__lex_4hex(inp[2:6])
	326	if c1 is None:
	327	return
	328	if not Parser.__is_trailing_surrogate(c1):
	329	self.__error("second half of escaped surrogate pair is "
	330	"not trailing surrogate")
	331	return
	332	code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
	333	inp = inp[6:]
	334	else:
	335	code_point = c0
0c4d144a	336	out += chr(code_point)
99155935 BP	337	self.__parser_input('string', out)
	338
	339	def __lex_string_escape(self, c):
	340	self.buffer += c
	341	self.lex_state = Parser.__lex_string
	342	return True
26bb0f31	343
99155935 BP	344	def __lex_string(self, c):
	345	if c == '\\':
	346	self.buffer += c
	347	self.lex_state = Parser.__lex_string_escape
	348	elif c == '"':
	349	self.__lex_finish_string()
	350	elif ord(c) >= 0x20:
	351	self.buffer += c
	352	else:
	353	self.__error("U+%04X must be escaped in quoted string" % ord(c))
	354	return True
	355
	356	def __lex_input(self, c):
99155935 BP	357	eat = self.lex_state(self, c)
	358	assert eat is True or eat is False
	359	return eat
	360
28c781df	361	def __parse_start(self, token, unused_string):
99155935 BP	362	if token == '{':
	363	self.__push_object()
	364	elif token == '[':
	365	self.__push_array()
	366	else:
	367	self.__error("syntax error at beginning of input")
26bb0f31	368
28c781df	369	def __parse_end(self, unused_token, unused_string):
99155935	370	self.__error("trailing garbage at end of input")
26bb0f31	371
99155935 BP	372	def __parse_object_init(self, token, string):
	373	if token == '}':
	374	self.__parser_pop()
	375	else:
	376	self.__parse_object_name(token, string)
26bb0f31	377
99155935 BP	378	def __parse_object_name(self, token, string):
	379	if token == 'string':
	380	self.member_name = string
	381	self.parse_state = Parser.__parse_object_colon
	382	else:
	383	self.__error("syntax error parsing object expecting string")
26bb0f31	384
28c781df	385	def __parse_object_colon(self, token, unused_string):
99155935 BP	386	if token == ":":
	387	self.parse_state = Parser.__parse_object_value
	388	else:
	389	self.__error("syntax error parsing object expecting ':'")
26bb0f31	390
99155935 BP	391	def __parse_object_value(self, token, string):
99155935 BP	392	self.__parse_value(token, string, Parser.__parse_object_next)
26bb0f31	393
28c781df	394	def __parse_object_next(self, token, unused_string):
99155935 BP	395	if token == ",":
	396	self.parse_state = Parser.__parse_object_name
	397	elif token == "}":
	398	self.__parser_pop()
	399	else:
	400	self.__error("syntax error expecting '}' or ','")
26bb0f31	401
99155935 BP	402	def __parse_array_init(self, token, string):
	403	if token == ']':
	404	self.__parser_pop()
	405	else:
	406	self.__parse_array_value(token, string)
26bb0f31	407
99155935 BP	408	def __parse_array_value(self, token, string):
99155935 BP	409	self.__parse_value(token, string, Parser.__parse_array_next)
26bb0f31	410
28c781df	411	def __parse_array_next(self, token, unused_string):
99155935 BP	412	if token == ",":
	413	self.parse_state = Parser.__parse_array_value
	414	elif token == "]":
	415	self.__parser_pop()
	416	else:
	417	self.__error("syntax error expecting ']' or ','")
26bb0f31	418
99155935 BP	419	def __parser_input(self, token, string=None):
	420	self.lex_state = Parser.__lex_start
	421	self.buffer = ""
99155935	422	self.parse_state(self, token, string)
99155935 BP	423
	424	def __put_value(self, value):
	425	top = self.stack[-1]
da2d45c6	426	if isinstance(top, dict):
99155935 BP	427	top[self.member_name] = value
	428	else:
	429	top.append(value)
	430
	431	def __parser_push(self, new_json, next_state):
	432	if len(self.stack) < Parser.MAX_HEIGHT:
	433	if len(self.stack) > 0:
	434	self.__put_value(new_json)
	435	self.stack.append(new_json)
	436	self.parse_state = next_state
	437	else:
	438	self.__error("input exceeds maximum nesting depth %d" %
	439	Parser.MAX_HEIGHT)
26bb0f31	440
99155935 BP	441	def __push_object(self):
99155935 BP	442	self.__parser_push({}, Parser.__parse_object_init)
26bb0f31	443
99155935 BP	444	def __push_array(self):
	445	self.__parser_push([], Parser.__parse_array_init)
	446
	447	def __parser_pop(self):
	448	if len(self.stack) == 1:
	449	self.parse_state = Parser.__parse_end
	450	if not self.check_trailer:
	451	self.done = True
	452	else:
	453	self.stack.pop()
	454	top = self.stack[-1]
da2d45c6	455	if isinstance(top, list):
99155935 BP	456	self.parse_state = Parser.__parse_array_next
	457	else:
	458	self.parse_state = Parser.__parse_object_next
	459
	460	def __parse_value(self, token, string, next_state):
0c4d144a	461	number_types = [int]
8f808842 RB	462	number_types.extend([float])
	463	number_types = tuple(number_types)
	464	if token in [False, None, True] or isinstance(token, number_types):
99155935 BP	465	self.__put_value(token)
	466	elif token == 'string':
	467	self.__put_value(string)
	468	else:
	469	if token == '{':
	470	self.__push_object()
	471	elif token == '[':
	472	self.__push_array()
	473	else:
	474	self.__error("syntax error expecting value")
	475	return
	476	self.parse_state = next_state
	477
	478	def __error(self, message):
	479	if self.error is None:
	480	self.error = ("line %d, column %d, byte %d: %s"
	481	% (self.line_number, self.column_number,
	482	self.byte_number, message))
	483	self.done = True
	484
	485	def feed(self, s):
	486	i = 0
	487	while True:
	488	if self.done or i >= len(s):
	489	return i
c640c04f BP	490
	491	c = s[i]
	492	if self.__lex_input(c):
	493	self.byte_number += 1
	494	if c == '\n':
	495	self.column_number = 0
	496	self.line_number += 1
	497	else:
	498	self.column_number += 1
	499
99155935 BP	500	i += 1
	501
	502	def is_done(self):
	503	return self.done
	504
	505	def finish(self):
	506	if self.lex_state == Parser.__lex_start:
	507	pass
	508	elif self.lex_state in (Parser.__lex_string,
	509	Parser.__lex_string_escape):
	510	self.__error("unexpected end of input in quoted string")
	511	else:
	512	self.__lex_input(" ")
	513
	514	if self.parse_state == Parser.__parse_start:
	515	self.__error("empty input stream")
	516	elif self.parse_state != Parser.__parse_end:
	517	self.__error("unexpected end of input")
	518
3c057118	519	if self.error is None:
99155935 BP	520	assert len(self.stack) == 1
	521	return self.stack.pop()
	522	else:
	523	return self.error