]>
Commit | Line | Data |
---|---|---|
3257aa99 DM |
1 | """Implementation of JSONDecoder\r |
2 | """\r | |
3 | import re\r | |
4 | import sys\r | |
5 | import struct\r | |
6 | \r | |
7 | from json import scanner\r | |
8 | try:\r | |
9 | from _json import scanstring as c_scanstring\r | |
10 | except ImportError:\r | |
11 | c_scanstring = None\r | |
12 | \r | |
13 | __all__ = ['JSONDecoder']\r | |
14 | \r | |
15 | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL\r | |
16 | \r | |
17 | def _floatconstants():\r | |
18 | _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')\r | |
19 | if sys.byteorder != 'big':\r | |
20 | _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]\r | |
21 | nan, inf = struct.unpack('dd', _BYTES)\r | |
22 | return nan, inf, -inf\r | |
23 | \r | |
24 | NaN, PosInf, NegInf = _floatconstants()\r | |
25 | \r | |
26 | \r | |
27 | def linecol(doc, pos):\r | |
28 | lineno = doc.count('\n', 0, pos) + 1\r | |
29 | if lineno == 1:\r | |
30 | colno = pos + 1\r | |
31 | else:\r | |
32 | colno = pos - doc.rindex('\n', 0, pos)\r | |
33 | return lineno, colno\r | |
34 | \r | |
35 | \r | |
36 | def errmsg(msg, doc, pos, end=None):\r | |
37 | # Note that this function is called from _json\r | |
38 | lineno, colno = linecol(doc, pos)\r | |
39 | if end is None:\r | |
40 | fmt = '{0}: line {1} column {2} (char {3})'\r | |
41 | return fmt.format(msg, lineno, colno, pos)\r | |
42 | #fmt = '%s: line %d column %d (char %d)'\r | |
43 | #return fmt % (msg, lineno, colno, pos)\r | |
44 | endlineno, endcolno = linecol(doc, end)\r | |
45 | fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'\r | |
46 | return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)\r | |
47 | #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'\r | |
48 | #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)\r | |
49 | \r | |
50 | \r | |
51 | _CONSTANTS = {\r | |
52 | '-Infinity': NegInf,\r | |
53 | 'Infinity': PosInf,\r | |
54 | 'NaN': NaN,\r | |
55 | }\r | |
56 | \r | |
57 | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)\r | |
58 | BACKSLASH = {\r | |
59 | '"': u'"', '\\': u'\\', '/': u'/',\r | |
60 | 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',\r | |
61 | }\r | |
62 | \r | |
63 | DEFAULT_ENCODING = "utf-8"\r | |
64 | \r | |
65 | def _decode_uXXXX(s, pos):\r | |
66 | esc = s[pos + 1:pos + 5]\r | |
67 | if len(esc) == 4 and esc[1] not in 'xX':\r | |
68 | try:\r | |
69 | return int(esc, 16)\r | |
70 | except ValueError:\r | |
71 | pass\r | |
72 | msg = "Invalid \\uXXXX escape"\r | |
73 | raise ValueError(errmsg(msg, s, pos))\r | |
74 | \r | |
75 | def py_scanstring(s, end, encoding=None, strict=True,\r | |
76 | _b=BACKSLASH, _m=STRINGCHUNK.match):\r | |
77 | """Scan the string s for a JSON string. End is the index of the\r | |
78 | character in s after the quote that started the JSON string.\r | |
79 | Unescapes all valid JSON string escape sequences and raises ValueError\r | |
80 | on attempt to decode an invalid string. If strict is False then literal\r | |
81 | control characters are allowed in the string.\r | |
82 | \r | |
83 | Returns a tuple of the decoded string and the index of the character in s\r | |
84 | after the end quote."""\r | |
85 | if encoding is None:\r | |
86 | encoding = DEFAULT_ENCODING\r | |
87 | chunks = []\r | |
88 | _append = chunks.append\r | |
89 | begin = end - 1\r | |
90 | while 1:\r | |
91 | chunk = _m(s, end)\r | |
92 | if chunk is None:\r | |
93 | raise ValueError(\r | |
94 | errmsg("Unterminated string starting at", s, begin))\r | |
95 | end = chunk.end()\r | |
96 | content, terminator = chunk.groups()\r | |
97 | # Content is contains zero or more unescaped string characters\r | |
98 | if content:\r | |
99 | if not isinstance(content, unicode):\r | |
100 | content = unicode(content, encoding)\r | |
101 | _append(content)\r | |
102 | # Terminator is the end of string, a literal control character,\r | |
103 | # or a backslash denoting that an escape sequence follows\r | |
104 | if terminator == '"':\r | |
105 | break\r | |
106 | elif terminator != '\\':\r | |
107 | if strict:\r | |
108 | #msg = "Invalid control character %r at" % (terminator,)\r | |
109 | msg = "Invalid control character {0!r} at".format(terminator)\r | |
110 | raise ValueError(errmsg(msg, s, end))\r | |
111 | else:\r | |
112 | _append(terminator)\r | |
113 | continue\r | |
114 | try:\r | |
115 | esc = s[end]\r | |
116 | except IndexError:\r | |
117 | raise ValueError(\r | |
118 | errmsg("Unterminated string starting at", s, begin))\r | |
119 | # If not a unicode escape sequence, must be in the lookup table\r | |
120 | if esc != 'u':\r | |
121 | try:\r | |
122 | char = _b[esc]\r | |
123 | except KeyError:\r | |
124 | msg = "Invalid \\escape: " + repr(esc)\r | |
125 | raise ValueError(errmsg(msg, s, end))\r | |
126 | end += 1\r | |
127 | else:\r | |
128 | # Unicode escape sequence\r | |
129 | uni = _decode_uXXXX(s, end)\r | |
130 | end += 5\r | |
131 | # Check for surrogate pair on UCS-4 systems\r | |
132 | if sys.maxunicode > 65535 and \\r | |
133 | 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':\r | |
134 | uni2 = _decode_uXXXX(s, end + 1)\r | |
135 | if 0xdc00 <= uni2 <= 0xdfff:\r | |
136 | uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))\r | |
137 | end += 6\r | |
138 | char = unichr(uni)\r | |
139 | # Append the unescaped character\r | |
140 | _append(char)\r | |
141 | return u''.join(chunks), end\r | |
142 | \r | |
143 | \r | |
144 | # Use speedup if available\r | |
145 | scanstring = c_scanstring or py_scanstring\r | |
146 | \r | |
147 | WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)\r | |
148 | WHITESPACE_STR = ' \t\n\r'\r | |
149 | \r | |
150 | def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,\r | |
151 | object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):\r | |
152 | s, end = s_and_end\r | |
153 | pairs = []\r | |
154 | pairs_append = pairs.append\r | |
155 | # Use a slice to prevent IndexError from being raised, the following\r | |
156 | # check will raise a more specific ValueError if the string is empty\r | |
157 | nextchar = s[end:end + 1]\r | |
158 | # Normally we expect nextchar == '"'\r | |
159 | if nextchar != '"':\r | |
160 | if nextchar in _ws:\r | |
161 | end = _w(s, end).end()\r | |
162 | nextchar = s[end:end + 1]\r | |
163 | # Trivial empty object\r | |
164 | if nextchar == '}':\r | |
165 | if object_pairs_hook is not None:\r | |
166 | result = object_pairs_hook(pairs)\r | |
167 | return result, end + 1\r | |
168 | pairs = {}\r | |
169 | if object_hook is not None:\r | |
170 | pairs = object_hook(pairs)\r | |
171 | return pairs, end + 1\r | |
172 | elif nextchar != '"':\r | |
173 | raise ValueError(errmsg(\r | |
174 | "Expecting property name enclosed in double quotes", s, end))\r | |
175 | end += 1\r | |
176 | while True:\r | |
177 | key, end = scanstring(s, end, encoding, strict)\r | |
178 | \r | |
179 | # To skip some function call overhead we optimize the fast paths where\r | |
180 | # the JSON key separator is ": " or just ":".\r | |
181 | if s[end:end + 1] != ':':\r | |
182 | end = _w(s, end).end()\r | |
183 | if s[end:end + 1] != ':':\r | |
184 | raise ValueError(errmsg("Expecting ':' delimiter", s, end))\r | |
185 | end += 1\r | |
186 | \r | |
187 | try:\r | |
188 | if s[end] in _ws:\r | |
189 | end += 1\r | |
190 | if s[end] in _ws:\r | |
191 | end = _w(s, end + 1).end()\r | |
192 | except IndexError:\r | |
193 | pass\r | |
194 | \r | |
195 | try:\r | |
196 | value, end = scan_once(s, end)\r | |
197 | except StopIteration:\r | |
198 | raise ValueError(errmsg("Expecting object", s, end))\r | |
199 | pairs_append((key, value))\r | |
200 | \r | |
201 | try:\r | |
202 | nextchar = s[end]\r | |
203 | if nextchar in _ws:\r | |
204 | end = _w(s, end + 1).end()\r | |
205 | nextchar = s[end]\r | |
206 | except IndexError:\r | |
207 | nextchar = ''\r | |
208 | end += 1\r | |
209 | \r | |
210 | if nextchar == '}':\r | |
211 | break\r | |
212 | elif nextchar != ',':\r | |
213 | raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))\r | |
214 | \r | |
215 | try:\r | |
216 | nextchar = s[end]\r | |
217 | if nextchar in _ws:\r | |
218 | end += 1\r | |
219 | nextchar = s[end]\r | |
220 | if nextchar in _ws:\r | |
221 | end = _w(s, end + 1).end()\r | |
222 | nextchar = s[end]\r | |
223 | except IndexError:\r | |
224 | nextchar = ''\r | |
225 | \r | |
226 | end += 1\r | |
227 | if nextchar != '"':\r | |
228 | raise ValueError(errmsg(\r | |
229 | "Expecting property name enclosed in double quotes", s, end - 1))\r | |
230 | if object_pairs_hook is not None:\r | |
231 | result = object_pairs_hook(pairs)\r | |
232 | return result, end\r | |
233 | pairs = dict(pairs)\r | |
234 | if object_hook is not None:\r | |
235 | pairs = object_hook(pairs)\r | |
236 | return pairs, end\r | |
237 | \r | |
238 | def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):\r | |
239 | s, end = s_and_end\r | |
240 | values = []\r | |
241 | nextchar = s[end:end + 1]\r | |
242 | if nextchar in _ws:\r | |
243 | end = _w(s, end + 1).end()\r | |
244 | nextchar = s[end:end + 1]\r | |
245 | # Look-ahead for trivial empty array\r | |
246 | if nextchar == ']':\r | |
247 | return values, end + 1\r | |
248 | _append = values.append\r | |
249 | while True:\r | |
250 | try:\r | |
251 | value, end = scan_once(s, end)\r | |
252 | except StopIteration:\r | |
253 | raise ValueError(errmsg("Expecting object", s, end))\r | |
254 | _append(value)\r | |
255 | nextchar = s[end:end + 1]\r | |
256 | if nextchar in _ws:\r | |
257 | end = _w(s, end + 1).end()\r | |
258 | nextchar = s[end:end + 1]\r | |
259 | end += 1\r | |
260 | if nextchar == ']':\r | |
261 | break\r | |
262 | elif nextchar != ',':\r | |
263 | raise ValueError(errmsg("Expecting ',' delimiter", s, end))\r | |
264 | try:\r | |
265 | if s[end] in _ws:\r | |
266 | end += 1\r | |
267 | if s[end] in _ws:\r | |
268 | end = _w(s, end + 1).end()\r | |
269 | except IndexError:\r | |
270 | pass\r | |
271 | \r | |
272 | return values, end\r | |
273 | \r | |
274 | class JSONDecoder(object):\r | |
275 | """Simple JSON <http://json.org> decoder\r | |
276 | \r | |
277 | Performs the following translations in decoding by default:\r | |
278 | \r | |
279 | +---------------+-------------------+\r | |
280 | | JSON | Python |\r | |
281 | +===============+===================+\r | |
282 | | object | dict |\r | |
283 | +---------------+-------------------+\r | |
284 | | array | list |\r | |
285 | +---------------+-------------------+\r | |
286 | | string | unicode |\r | |
287 | +---------------+-------------------+\r | |
288 | | number (int) | int, long |\r | |
289 | +---------------+-------------------+\r | |
290 | | number (real) | float |\r | |
291 | +---------------+-------------------+\r | |
292 | | true | True |\r | |
293 | +---------------+-------------------+\r | |
294 | | false | False |\r | |
295 | +---------------+-------------------+\r | |
296 | | null | None |\r | |
297 | +---------------+-------------------+\r | |
298 | \r | |
299 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as\r | |
300 | their corresponding ``float`` values, which is outside the JSON spec.\r | |
301 | \r | |
302 | """\r | |
303 | \r | |
304 | def __init__(self, encoding=None, object_hook=None, parse_float=None,\r | |
305 | parse_int=None, parse_constant=None, strict=True,\r | |
306 | object_pairs_hook=None):\r | |
307 | """``encoding`` determines the encoding used to interpret any ``str``\r | |
308 | objects decoded by this instance (utf-8 by default). It has no\r | |
309 | effect when decoding ``unicode`` objects.\r | |
310 | \r | |
311 | Note that currently only encodings that are a superset of ASCII work,\r | |
312 | strings of other encodings should be passed in as ``unicode``.\r | |
313 | \r | |
314 | ``object_hook``, if specified, will be called with the result\r | |
315 | of every JSON object decoded and its return value will be used in\r | |
316 | place of the given ``dict``. This can be used to provide custom\r | |
317 | deserializations (e.g. to support JSON-RPC class hinting).\r | |
318 | \r | |
319 | ``object_pairs_hook``, if specified will be called with the result of\r | |
320 | every JSON object decoded with an ordered list of pairs. The return\r | |
321 | value of ``object_pairs_hook`` will be used instead of the ``dict``.\r | |
322 | This feature can be used to implement custom decoders that rely on the\r | |
323 | order that the key and value pairs are decoded (for example,\r | |
324 | collections.OrderedDict will remember the order of insertion). If\r | |
325 | ``object_hook`` is also defined, the ``object_pairs_hook`` takes\r | |
326 | priority.\r | |
327 | \r | |
328 | ``parse_float``, if specified, will be called with the string\r | |
329 | of every JSON float to be decoded. By default this is equivalent to\r | |
330 | float(num_str). This can be used to use another datatype or parser\r | |
331 | for JSON floats (e.g. decimal.Decimal).\r | |
332 | \r | |
333 | ``parse_int``, if specified, will be called with the string\r | |
334 | of every JSON int to be decoded. By default this is equivalent to\r | |
335 | int(num_str). This can be used to use another datatype or parser\r | |
336 | for JSON integers (e.g. float).\r | |
337 | \r | |
338 | ``parse_constant``, if specified, will be called with one of the\r | |
339 | following strings: -Infinity, Infinity, NaN.\r | |
340 | This can be used to raise an exception if invalid JSON numbers\r | |
341 | are encountered.\r | |
342 | \r | |
343 | If ``strict`` is false (true is the default), then control\r | |
344 | characters will be allowed inside strings. Control characters in\r | |
345 | this context are those with character codes in the 0-31 range,\r | |
346 | including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.\r | |
347 | \r | |
348 | """\r | |
349 | self.encoding = encoding\r | |
350 | self.object_hook = object_hook\r | |
351 | self.object_pairs_hook = object_pairs_hook\r | |
352 | self.parse_float = parse_float or float\r | |
353 | self.parse_int = parse_int or int\r | |
354 | self.parse_constant = parse_constant or _CONSTANTS.__getitem__\r | |
355 | self.strict = strict\r | |
356 | self.parse_object = JSONObject\r | |
357 | self.parse_array = JSONArray\r | |
358 | self.parse_string = scanstring\r | |
359 | self.scan_once = scanner.make_scanner(self)\r | |
360 | \r | |
361 | def decode(self, s, _w=WHITESPACE.match):\r | |
362 | """Return the Python representation of ``s`` (a ``str`` or ``unicode``\r | |
363 | instance containing a JSON document)\r | |
364 | \r | |
365 | """\r | |
366 | obj, end = self.raw_decode(s, idx=_w(s, 0).end())\r | |
367 | end = _w(s, end).end()\r | |
368 | if end != len(s):\r | |
369 | raise ValueError(errmsg("Extra data", s, end, len(s)))\r | |
370 | return obj\r | |
371 | \r | |
372 | def raw_decode(self, s, idx=0):\r | |
373 | """Decode a JSON document from ``s`` (a ``str`` or ``unicode``\r | |
374 | beginning with a JSON document) and return a 2-tuple of the Python\r | |
375 | representation and the index in ``s`` where the document ended.\r | |
376 | \r | |
377 | This can be used to decode a JSON document from a string that may\r | |
378 | have extraneous data at the end.\r | |
379 | \r | |
380 | """\r | |
381 | try:\r | |
382 | obj, end = self.scan_once(s, idx)\r | |
383 | except StopIteration:\r | |
384 | raise ValueError("No JSON object could be decoded")\r | |
385 | return obj, end\r |