1 """Implementation of JSONEncoder
6 from _json
import encode_basestring_ascii
as c_encode_basestring_ascii
8 c_encode_basestring_ascii
= None
10 from _json
import make_encoder
as c_make_encoder
14 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
16 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
27 ESCAPE_DCT
.setdefault(chr(i
), '\\u{0:04x}'.format(i
))
28 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
30 INFINITY
= float('inf')
33 def encode_basestring(s
):
34 """Return a JSON representation of a Python string
38 return ESCAPE_DCT
[match
.group(0)]
39 return '"' + ESCAPE
.sub(replace
, s
) + '"'
42 def py_encode_basestring_ascii(s
):
43 """Return an ASCII-only JSON representation of a Python string
46 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
55 return '\\u{0:04x}'.format(n
)
56 #return '\\u%04x' % (n,)
60 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
61 s2
= 0xdc00 |
(n
& 0x3ff)
62 return '\\u{0:04x}\\u{1:04x}'.format(s1
, s2
)
63 #return '\\u%04x\\u%04x' % (s1, s2)
64 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
67 encode_basestring_ascii
= (
68 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
70 class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
73 Supports the following objects and types by default:
75 +-------------------+---------------+
77 +===================+===============+
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
87 +-------------------+---------------+
89 +-------------------+---------------+
91 +-------------------+---------------+
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
101 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
102 check_circular
=True, allow_nan
=True, sort_keys
=False,
103 indent
=None, separators
=None, encoding
='utf-8', default
=None):
104 """Constructor for JSONEncoder, with sensible defaults.
106 If skipkeys is false, then it is a TypeError to attempt
107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
110 If *ensure_ascii* is true (the default), all non-ASCII
111 characters in the output are escaped with \uXXXX sequences,
112 and the results are str instances consisting of ASCII
113 characters only. If ensure_ascii is False, a result may be a
114 unicode instance. This usually happens if the input contains
115 unicode strings or the *encoding* parameter is used.
117 If check_circular is true, then lists, dicts, and custom encoded
118 objects will be checked for circular references during encoding to
119 prevent an infinite recursion (which would cause an OverflowError).
120 Otherwise, no such check takes place.
122 If allow_nan is true, then NaN, Infinity, and -Infinity will be
123 encoded as such. This behavior is not JSON specification compliant,
124 but is consistent with most JavaScript based encoders and decoders.
125 Otherwise, it will be a ValueError to encode such floats.
127 If sort_keys is true, then the output of dictionaries will be
128 sorted by key; this is useful for regression tests to ensure
129 that JSON serializations can be compared on a day-to-day basis.
131 If indent is a non-negative integer, then JSON array
132 elements and object members will be pretty-printed with that
133 indent level. An indent level of 0 will only insert newlines.
134 None is the most compact representation. Since the default
135 item separator is ', ', the output might include trailing
136 whitespace when indent is specified. You can use
137 separators=(',', ': ') to avoid this.
139 If specified, separators should be a (item_separator, key_separator)
140 tuple. The default is (', ', ': '). To get the most compact JSON
141 representation you should specify (',', ':') to eliminate whitespace.
143 If specified, default is a function that gets called for objects
144 that can't otherwise be serialized. It should return a JSON encodable
145 version of the object or raise a ``TypeError``.
147 If encoding is not None, then all input strings will be
148 transformed into unicode using that encoding prior to JSON-encoding.
149 The default is UTF-8.
153 self
.skipkeys
= skipkeys
154 self
.ensure_ascii
= ensure_ascii
155 self
.check_circular
= check_circular
156 self
.allow_nan
= allow_nan
157 self
.sort_keys
= sort_keys
159 if separators
is not None:
160 self
.item_separator
, self
.key_separator
= separators
161 if default
is not None:
162 self
.default
= default
163 self
.encoding
= encoding
165 def default(self
, o
):
166 """Implement this method in a subclass such that it returns
167 a serializable object for ``o``, or calls the base implementation
168 (to raise a ``TypeError``).
170 For example, to support arbitrary iterators, you could
171 implement default like this::
173 def default(self, o):
179 return list(iterable)
180 # Let the base class default method raise the TypeError
181 return JSONEncoder.default(self, o)
184 raise TypeError(repr(o
) + " is not JSON serializable")
187 """Return a JSON string representation of a Python data structure.
189 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
190 '{"foo": ["bar", "baz"]}'
193 # This is for extremely simple cases and benchmarks.
194 if isinstance(o
, basestring
):
195 if isinstance(o
, str):
196 _encoding
= self
.encoding
197 if (_encoding
is not None
198 and not (_encoding
== 'utf-8')):
199 o
= o
.decode(_encoding
)
200 if self
.ensure_ascii
:
201 return encode_basestring_ascii(o
)
203 return encode_basestring(o
)
204 # This doesn't pass the iterator directly to ''.join() because the
205 # exceptions aren't as detailed. The list call should be roughly
206 # equivalent to the PySequence_Fast that ''.join() would do.
207 chunks
= self
.iterencode(o
, _one_shot
=True)
208 if not isinstance(chunks
, (list, tuple)):
209 chunks
= list(chunks
)
210 return ''.join(chunks
)
212 def iterencode(self
, o
, _one_shot
=False):
213 """Encode the given object and yield each string
214 representation as available.
218 for chunk in JSONEncoder().iterencode(bigobject):
219 mysocket.write(chunk)
222 if self
.check_circular
:
226 if self
.ensure_ascii
:
227 _encoder
= encode_basestring_ascii
229 _encoder
= encode_basestring
230 if self
.encoding
!= 'utf-8':
231 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
232 if isinstance(o
, str):
233 o
= o
.decode(_encoding
)
234 return _orig_encoder(o
)
236 def floatstr(o
, allow_nan
=self
.allow_nan
,
237 _repr
=FLOAT_REPR
, _inf
=INFINITY
, _neginf
=-INFINITY
):
238 # Check for specials. Note that this type of test is processor
239 # and/or platform-specific, so do tests which don't depend on the
253 "Out of range float values are not JSON compliant: " +
259 if (_one_shot
and c_make_encoder
is not None
260 and self
.indent
is None and not self
.sort_keys
):
261 _iterencode
= c_make_encoder(
262 markers
, self
.default
, _encoder
, self
.indent
,
263 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
264 self
.skipkeys
, self
.allow_nan
)
266 _iterencode
= _make_iterencode(
267 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
268 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
269 self
.skipkeys
, _one_shot
)
270 return _iterencode(o
, 0)
272 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
273 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
274 ## HACK: hand-optimized bytecode; turn globals into locals
275 ValueError=ValueError,
276 basestring
=basestring
,
281 isinstance=isinstance,
288 def _iterencode_list(lst
, _current_indent_level
):
292 if markers
is not None:
294 if markerid
in markers
:
295 raise ValueError("Circular reference detected")
296 markers
[markerid
] = lst
298 if _indent
is not None:
299 _current_indent_level
+= 1
300 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
301 separator
= _item_separator
+ newline_indent
302 buf
+= newline_indent
304 newline_indent
= None
305 separator
= _item_separator
312 if isinstance(value
, basestring
):
313 yield buf
+ _encoder(value
)
320 elif isinstance(value
, (int, long)):
321 yield buf
+ str(value
)
322 elif isinstance(value
, float):
323 yield buf
+ _floatstr(value
)
326 if isinstance(value
, (list, tuple)):
327 chunks
= _iterencode_list(value
, _current_indent_level
)
328 elif isinstance(value
, dict):
329 chunks
= _iterencode_dict(value
, _current_indent_level
)
331 chunks
= _iterencode(value
, _current_indent_level
)
334 if newline_indent
is not None:
335 _current_indent_level
-= 1
336 yield '\n' + (' ' * (_indent
* _current_indent_level
))
338 if markers
is not None:
339 del markers
[markerid
]
341 def _iterencode_dict(dct
, _current_indent_level
):
345 if markers
is not None:
347 if markerid
in markers
:
348 raise ValueError("Circular reference detected")
349 markers
[markerid
] = dct
351 if _indent
is not None:
352 _current_indent_level
+= 1
353 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
354 item_separator
= _item_separator
+ newline_indent
357 newline_indent
= None
358 item_separator
= _item_separator
361 items
= sorted(dct
.items(), key
=lambda kv
: kv
[0])
363 items
= dct
.iteritems()
364 for key
, value
in items
:
365 if isinstance(key
, basestring
):
367 # JavaScript is weakly typed for these, so it makes sense to
368 # also allow them. Many encoders seem to do something like this.
369 elif isinstance(key
, float):
377 elif isinstance(key
, (int, long)):
382 raise TypeError("key " + repr(key
) + " is not a string")
389 if isinstance(value
, basestring
):
390 yield _encoder(value
)
397 elif isinstance(value
, (int, long)):
399 elif isinstance(value
, float):
400 yield _floatstr(value
)
402 if isinstance(value
, (list, tuple)):
403 chunks
= _iterencode_list(value
, _current_indent_level
)
404 elif isinstance(value
, dict):
405 chunks
= _iterencode_dict(value
, _current_indent_level
)
407 chunks
= _iterencode(value
, _current_indent_level
)
410 if newline_indent
is not None:
411 _current_indent_level
-= 1
412 yield '\n' + (' ' * (_indent
* _current_indent_level
))
414 if markers
is not None:
415 del markers
[markerid
]
417 def _iterencode(o
, _current_indent_level
):
418 if isinstance(o
, basestring
):
426 elif isinstance(o
, (int, long)):
428 elif isinstance(o
, float):
430 elif isinstance(o
, (list, tuple)):
431 for chunk
in _iterencode_list(o
, _current_indent_level
):
433 elif isinstance(o
, dict):
434 for chunk
in _iterencode_dict(o
, _current_indent_level
):
437 if markers
is not None:
439 if markerid
in markers
:
440 raise ValueError("Circular reference detected")
441 markers
[markerid
] = o
443 for chunk
in _iterencode(o
, _current_indent_level
):
445 if markers
is not None:
446 del markers
[markerid
]