]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | """Implementation of JSONEncoder\r |
2 | """\r | |
3 | import re\r | |
4 | \r | |
5 | try:\r | |
6 | from _json import encode_basestring_ascii as c_encode_basestring_ascii\r | |
7 | except ImportError:\r | |
8 | c_encode_basestring_ascii = None\r | |
9 | try:\r | |
10 | from _json import make_encoder as c_make_encoder\r | |
11 | except ImportError:\r | |
12 | c_make_encoder = None\r | |
13 | \r | |
14 | ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')\r | |
15 | ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')\r | |
16 | HAS_UTF8 = re.compile(r'[\x80-\xff]')\r | |
17 | ESCAPE_DCT = {\r | |
18 | '\\': '\\\\',\r | |
19 | '"': '\\"',\r | |
20 | '\b': '\\b',\r | |
21 | '\f': '\\f',\r | |
22 | '\n': '\\n',\r | |
23 | '\r': '\\r',\r | |
24 | '\t': '\\t',\r | |
25 | }\r | |
26 | for i in range(0x20):\r | |
27 | ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))\r | |
28 | #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))\r | |
29 | \r | |
30 | # Assume this produces an infinity on all machines (probably not guaranteed)\r | |
31 | INFINITY = float('1e66666')\r | |
32 | FLOAT_REPR = repr\r | |
33 | \r | |
34 | def encode_basestring(s):\r | |
35 | """Return a JSON representation of a Python string\r | |
36 | \r | |
37 | """\r | |
38 | def replace(match):\r | |
39 | return ESCAPE_DCT[match.group(0)]\r | |
40 | return '"' + ESCAPE.sub(replace, s) + '"'\r | |
41 | \r | |
42 | \r | |
43 | def py_encode_basestring_ascii(s):\r | |
44 | """Return an ASCII-only JSON representation of a Python string\r | |
45 | \r | |
46 | """\r | |
47 | if isinstance(s, str) and HAS_UTF8.search(s) is not None:\r | |
48 | s = s.decode('utf-8')\r | |
49 | def replace(match):\r | |
50 | s = match.group(0)\r | |
51 | try:\r | |
52 | return ESCAPE_DCT[s]\r | |
53 | except KeyError:\r | |
54 | n = ord(s)\r | |
55 | if n < 0x10000:\r | |
56 | return '\\u{0:04x}'.format(n)\r | |
57 | #return '\\u%04x' % (n,)\r | |
58 | else:\r | |
59 | # surrogate pair\r | |
60 | n -= 0x10000\r | |
61 | s1 = 0xd800 | ((n >> 10) & 0x3ff)\r | |
62 | s2 = 0xdc00 | (n & 0x3ff)\r | |
63 | return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)\r | |
64 | #return '\\u%04x\\u%04x' % (s1, s2)\r | |
65 | return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'\r | |
66 | \r | |
67 | \r | |
68 | encode_basestring_ascii = (\r | |
69 | c_encode_basestring_ascii or py_encode_basestring_ascii)\r | |
70 | \r | |
71 | class JSONEncoder(object):\r | |
72 | """Extensible JSON <http://json.org> encoder for Python data structures.\r | |
73 | \r | |
74 | Supports the following objects and types by default:\r | |
75 | \r | |
76 | +-------------------+---------------+\r | |
77 | | Python | JSON |\r | |
78 | +===================+===============+\r | |
79 | | dict | object |\r | |
80 | +-------------------+---------------+\r | |
81 | | list, tuple | array |\r | |
82 | +-------------------+---------------+\r | |
83 | | str, unicode | string |\r | |
84 | +-------------------+---------------+\r | |
85 | | int, long, float | number |\r | |
86 | +-------------------+---------------+\r | |
87 | | True | true |\r | |
88 | +-------------------+---------------+\r | |
89 | | False | false |\r | |
90 | +-------------------+---------------+\r | |
91 | | None | null |\r | |
92 | +-------------------+---------------+\r | |
93 | \r | |
94 | To extend this to recognize other objects, subclass and implement a\r | |
95 | ``.default()`` method with another method that returns a serializable\r | |
96 | object for ``o`` if possible, otherwise it should call the superclass\r | |
97 | implementation (to raise ``TypeError``).\r | |
98 | \r | |
99 | """\r | |
100 | item_separator = ', '\r | |
101 | key_separator = ': '\r | |
102 | def __init__(self, skipkeys=False, ensure_ascii=True,\r | |
103 | check_circular=True, allow_nan=True, sort_keys=False,\r | |
104 | indent=None, separators=None, encoding='utf-8', default=None):\r | |
105 | """Constructor for JSONEncoder, with sensible defaults.\r | |
106 | \r | |
107 | If skipkeys is false, then it is a TypeError to attempt\r | |
108 | encoding of keys that are not str, int, long, float or None. If\r | |
109 | skipkeys is True, such items are simply skipped.\r | |
110 | \r | |
111 | If ensure_ascii is true, the output is guaranteed to be str\r | |
112 | objects with all incoming unicode characters escaped. If\r | |
113 | ensure_ascii is false, the output will be unicode object.\r | |
114 | \r | |
115 | If check_circular is true, then lists, dicts, and custom encoded\r | |
116 | objects will be checked for circular references during encoding to\r | |
117 | prevent an infinite recursion (which would cause an OverflowError).\r | |
118 | Otherwise, no such check takes place.\r | |
119 | \r | |
120 | If allow_nan is true, then NaN, Infinity, and -Infinity will be\r | |
121 | encoded as such. This behavior is not JSON specification compliant,\r | |
122 | but is consistent with most JavaScript based encoders and decoders.\r | |
123 | Otherwise, it will be a ValueError to encode such floats.\r | |
124 | \r | |
125 | If sort_keys is true, then the output of dictionaries will be\r | |
126 | sorted by key; this is useful for regression tests to ensure\r | |
127 | that JSON serializations can be compared on a day-to-day basis.\r | |
128 | \r | |
129 | If indent is a non-negative integer, then JSON array\r | |
130 | elements and object members will be pretty-printed with that\r | |
131 | indent level. An indent level of 0 will only insert newlines.\r | |
132 | None is the most compact representation.\r | |
133 | \r | |
134 | If specified, separators should be a (item_separator, key_separator)\r | |
135 | tuple. The default is (', ', ': '). To get the most compact JSON\r | |
136 | representation you should specify (',', ':') to eliminate whitespace.\r | |
137 | \r | |
138 | If specified, default is a function that gets called for objects\r | |
139 | that can't otherwise be serialized. It should return a JSON encodable\r | |
140 | version of the object or raise a ``TypeError``.\r | |
141 | \r | |
142 | If encoding is not None, then all input strings will be\r | |
143 | transformed into unicode using that encoding prior to JSON-encoding.\r | |
144 | The default is UTF-8.\r | |
145 | \r | |
146 | """\r | |
147 | \r | |
148 | self.skipkeys = skipkeys\r | |
149 | self.ensure_ascii = ensure_ascii\r | |
150 | self.check_circular = check_circular\r | |
151 | self.allow_nan = allow_nan\r | |
152 | self.sort_keys = sort_keys\r | |
153 | self.indent = indent\r | |
154 | if separators is not None:\r | |
155 | self.item_separator, self.key_separator = separators\r | |
156 | if default is not None:\r | |
157 | self.default = default\r | |
158 | self.encoding = encoding\r | |
159 | \r | |
160 | def default(self, o):\r | |
161 | """Implement this method in a subclass such that it returns\r | |
162 | a serializable object for ``o``, or calls the base implementation\r | |
163 | (to raise a ``TypeError``).\r | |
164 | \r | |
165 | For example, to support arbitrary iterators, you could\r | |
166 | implement default like this::\r | |
167 | \r | |
168 | def default(self, o):\r | |
169 | try:\r | |
170 | iterable = iter(o)\r | |
171 | except TypeError:\r | |
172 | pass\r | |
173 | else:\r | |
174 | return list(iterable)\r | |
175 | return JSONEncoder.default(self, o)\r | |
176 | \r | |
177 | """\r | |
178 | raise TypeError(repr(o) + " is not JSON serializable")\r | |
179 | \r | |
180 | def encode(self, o):\r | |
181 | """Return a JSON string representation of a Python data structure.\r | |
182 | \r | |
183 | >>> JSONEncoder().encode({"foo": ["bar", "baz"]})\r | |
184 | '{"foo": ["bar", "baz"]}'\r | |
185 | \r | |
186 | """\r | |
187 | # This is for extremely simple cases and benchmarks.\r | |
188 | if isinstance(o, basestring):\r | |
189 | if isinstance(o, str):\r | |
190 | _encoding = self.encoding\r | |
191 | if (_encoding is not None\r | |
192 | and not (_encoding == 'utf-8')):\r | |
193 | o = o.decode(_encoding)\r | |
194 | if self.ensure_ascii:\r | |
195 | return encode_basestring_ascii(o)\r | |
196 | else:\r | |
197 | return encode_basestring(o)\r | |
198 | # This doesn't pass the iterator directly to ''.join() because the\r | |
199 | # exceptions aren't as detailed. The list call should be roughly\r | |
200 | # equivalent to the PySequence_Fast that ''.join() would do.\r | |
201 | chunks = self.iterencode(o, _one_shot=True)\r | |
202 | if not isinstance(chunks, (list, tuple)):\r | |
203 | chunks = list(chunks)\r | |
204 | return ''.join(chunks)\r | |
205 | \r | |
206 | def iterencode(self, o, _one_shot=False):\r | |
207 | """Encode the given object and yield each string\r | |
208 | representation as available.\r | |
209 | \r | |
210 | For example::\r | |
211 | \r | |
212 | for chunk in JSONEncoder().iterencode(bigobject):\r | |
213 | mysocket.write(chunk)\r | |
214 | \r | |
215 | """\r | |
216 | if self.check_circular:\r | |
217 | markers = {}\r | |
218 | else:\r | |
219 | markers = None\r | |
220 | if self.ensure_ascii:\r | |
221 | _encoder = encode_basestring_ascii\r | |
222 | else:\r | |
223 | _encoder = encode_basestring\r | |
224 | if self.encoding != 'utf-8':\r | |
225 | def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):\r | |
226 | if isinstance(o, str):\r | |
227 | o = o.decode(_encoding)\r | |
228 | return _orig_encoder(o)\r | |
229 | \r | |
230 | def floatstr(o, allow_nan=self.allow_nan,\r | |
231 | _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):\r | |
232 | # Check for specials. Note that this type of test is processor\r | |
233 | # and/or platform-specific, so do tests which don't depend on the\r | |
234 | # internals.\r | |
235 | \r | |
236 | if o != o:\r | |
237 | text = 'NaN'\r | |
238 | elif o == _inf:\r | |
239 | text = 'Infinity'\r | |
240 | elif o == _neginf:\r | |
241 | text = '-Infinity'\r | |
242 | else:\r | |
243 | return _repr(o)\r | |
244 | \r | |
245 | if not allow_nan:\r | |
246 | raise ValueError(\r | |
247 | "Out of range float values are not JSON compliant: " +\r | |
248 | repr(o))\r | |
249 | \r | |
250 | return text\r | |
251 | \r | |
252 | \r | |
253 | if (_one_shot and c_make_encoder is not None\r | |
254 | and self.indent is None and not self.sort_keys):\r | |
255 | _iterencode = c_make_encoder(\r | |
256 | markers, self.default, _encoder, self.indent,\r | |
257 | self.key_separator, self.item_separator, self.sort_keys,\r | |
258 | self.skipkeys, self.allow_nan)\r | |
259 | else:\r | |
260 | _iterencode = _make_iterencode(\r | |
261 | markers, self.default, _encoder, self.indent, floatstr,\r | |
262 | self.key_separator, self.item_separator, self.sort_keys,\r | |
263 | self.skipkeys, _one_shot)\r | |
264 | return _iterencode(o, 0)\r | |
265 | \r | |
266 | def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,\r | |
267 | _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,\r | |
268 | ## HACK: hand-optimized bytecode; turn globals into locals\r | |
269 | ValueError=ValueError,\r | |
270 | basestring=basestring,\r | |
271 | dict=dict,\r | |
272 | float=float,\r | |
273 | id=id,\r | |
274 | int=int,\r | |
275 | isinstance=isinstance,\r | |
276 | list=list,\r | |
277 | long=long,\r | |
278 | str=str,\r | |
279 | tuple=tuple,\r | |
280 | ):\r | |
281 | \r | |
282 | def _iterencode_list(lst, _current_indent_level):\r | |
283 | if not lst:\r | |
284 | yield '[]'\r | |
285 | return\r | |
286 | if markers is not None:\r | |
287 | markerid = id(lst)\r | |
288 | if markerid in markers:\r | |
289 | raise ValueError("Circular reference detected")\r | |
290 | markers[markerid] = lst\r | |
291 | buf = '['\r | |
292 | if _indent is not None:\r | |
293 | _current_indent_level += 1\r | |
294 | newline_indent = '\n' + (' ' * (_indent * _current_indent_level))\r | |
295 | separator = _item_separator + newline_indent\r | |
296 | buf += newline_indent\r | |
297 | else:\r | |
298 | newline_indent = None\r | |
299 | separator = _item_separator\r | |
300 | first = True\r | |
301 | for value in lst:\r | |
302 | if first:\r | |
303 | first = False\r | |
304 | else:\r | |
305 | buf = separator\r | |
306 | if isinstance(value, basestring):\r | |
307 | yield buf + _encoder(value)\r | |
308 | elif value is None:\r | |
309 | yield buf + 'null'\r | |
310 | elif value is True:\r | |
311 | yield buf + 'true'\r | |
312 | elif value is False:\r | |
313 | yield buf + 'false'\r | |
314 | elif isinstance(value, (int, long)):\r | |
315 | yield buf + str(value)\r | |
316 | elif isinstance(value, float):\r | |
317 | yield buf + _floatstr(value)\r | |
318 | else:\r | |
319 | yield buf\r | |
320 | if isinstance(value, (list, tuple)):\r | |
321 | chunks = _iterencode_list(value, _current_indent_level)\r | |
322 | elif isinstance(value, dict):\r | |
323 | chunks = _iterencode_dict(value, _current_indent_level)\r | |
324 | else:\r | |
325 | chunks = _iterencode(value, _current_indent_level)\r | |
326 | for chunk in chunks:\r | |
327 | yield chunk\r | |
328 | if newline_indent is not None:\r | |
329 | _current_indent_level -= 1\r | |
330 | yield '\n' + (' ' * (_indent * _current_indent_level))\r | |
331 | yield ']'\r | |
332 | if markers is not None:\r | |
333 | del markers[markerid]\r | |
334 | \r | |
335 | def _iterencode_dict(dct, _current_indent_level):\r | |
336 | if not dct:\r | |
337 | yield '{}'\r | |
338 | return\r | |
339 | if markers is not None:\r | |
340 | markerid = id(dct)\r | |
341 | if markerid in markers:\r | |
342 | raise ValueError("Circular reference detected")\r | |
343 | markers[markerid] = dct\r | |
344 | yield '{'\r | |
345 | if _indent is not None:\r | |
346 | _current_indent_level += 1\r | |
347 | newline_indent = '\n' + (' ' * (_indent * _current_indent_level))\r | |
348 | item_separator = _item_separator + newline_indent\r | |
349 | yield newline_indent\r | |
350 | else:\r | |
351 | newline_indent = None\r | |
352 | item_separator = _item_separator\r | |
353 | first = True\r | |
354 | if _sort_keys:\r | |
355 | items = sorted(dct.items(), key=lambda kv: kv[0])\r | |
356 | else:\r | |
357 | items = dct.iteritems()\r | |
358 | for key, value in items:\r | |
359 | if isinstance(key, basestring):\r | |
360 | pass\r | |
361 | # JavaScript is weakly typed for these, so it makes sense to\r | |
362 | # also allow them. Many encoders seem to do something like this.\r | |
363 | elif isinstance(key, float):\r | |
364 | key = _floatstr(key)\r | |
365 | elif key is True:\r | |
366 | key = 'true'\r | |
367 | elif key is False:\r | |
368 | key = 'false'\r | |
369 | elif key is None:\r | |
370 | key = 'null'\r | |
371 | elif isinstance(key, (int, long)):\r | |
372 | key = str(key)\r | |
373 | elif _skipkeys:\r | |
374 | continue\r | |
375 | else:\r | |
376 | raise TypeError("key " + repr(key) + " is not a string")\r | |
377 | if first:\r | |
378 | first = False\r | |
379 | else:\r | |
380 | yield item_separator\r | |
381 | yield _encoder(key)\r | |
382 | yield _key_separator\r | |
383 | if isinstance(value, basestring):\r | |
384 | yield _encoder(value)\r | |
385 | elif value is None:\r | |
386 | yield 'null'\r | |
387 | elif value is True:\r | |
388 | yield 'true'\r | |
389 | elif value is False:\r | |
390 | yield 'false'\r | |
391 | elif isinstance(value, (int, long)):\r | |
392 | yield str(value)\r | |
393 | elif isinstance(value, float):\r | |
394 | yield _floatstr(value)\r | |
395 | else:\r | |
396 | if isinstance(value, (list, tuple)):\r | |
397 | chunks = _iterencode_list(value, _current_indent_level)\r | |
398 | elif isinstance(value, dict):\r | |
399 | chunks = _iterencode_dict(value, _current_indent_level)\r | |
400 | else:\r | |
401 | chunks = _iterencode(value, _current_indent_level)\r | |
402 | for chunk in chunks:\r | |
403 | yield chunk\r | |
404 | if newline_indent is not None:\r | |
405 | _current_indent_level -= 1\r | |
406 | yield '\n' + (' ' * (_indent * _current_indent_level))\r | |
407 | yield '}'\r | |
408 | if markers is not None:\r | |
409 | del markers[markerid]\r | |
410 | \r | |
411 | def _iterencode(o, _current_indent_level):\r | |
412 | if isinstance(o, basestring):\r | |
413 | yield _encoder(o)\r | |
414 | elif o is None:\r | |
415 | yield 'null'\r | |
416 | elif o is True:\r | |
417 | yield 'true'\r | |
418 | elif o is False:\r | |
419 | yield 'false'\r | |
420 | elif isinstance(o, (int, long)):\r | |
421 | yield str(o)\r | |
422 | elif isinstance(o, float):\r | |
423 | yield _floatstr(o)\r | |
424 | elif isinstance(o, (list, tuple)):\r | |
425 | for chunk in _iterencode_list(o, _current_indent_level):\r | |
426 | yield chunk\r | |
427 | elif isinstance(o, dict):\r | |
428 | for chunk in _iterencode_dict(o, _current_indent_level):\r | |
429 | yield chunk\r | |
430 | else:\r | |
431 | if markers is not None:\r | |
432 | markerid = id(o)\r | |
433 | if markerid in markers:\r | |
434 | raise ValueError("Circular reference detected")\r | |
435 | markers[markerid] = o\r | |
436 | o = _default(o)\r | |
437 | for chunk in _iterencode(o, _current_indent_level):\r | |
438 | yield chunk\r | |
439 | if markers is not None:\r | |
440 | del markers[markerid]\r | |
441 | \r | |
442 | return _iterencode\r |