1 """Implementation of JSONEncoder
6 from _json
import encode_basestring_ascii
as c_encode_basestring_ascii
8 c_encode_basestring_ascii
= None
10 from _json
import make_encoder
as c_make_encoder
14 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
16 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
27 ESCAPE_DCT
.setdefault(chr(i
), '\\u{0:04x}'.format(i
))
28 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
30 # Assume this produces an infinity on all machines (probably not guaranteed)
31 INFINITY
= float('1e66666')
34 def encode_basestring(s
):
35 """Return a JSON representation of a Python string
39 return ESCAPE_DCT
[match
.group(0)]
40 return '"' + ESCAPE
.sub(replace
, s
) + '"'
43 def py_encode_basestring_ascii(s
):
44 """Return an ASCII-only JSON representation of a Python string
47 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
56 return '\\u{0:04x}'.format(n
)
57 #return '\\u%04x' % (n,)
61 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
62 s2
= 0xdc00 |
(n
& 0x3ff)
63 return '\\u{0:04x}\\u{1:04x}'.format(s1
, s2
)
64 #return '\\u%04x\\u%04x' % (s1, s2)
65 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
68 encode_basestring_ascii
= (
69 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
71 class JSONEncoder(object):
72 """Extensible JSON <http://json.org> encoder for Python data structures.
74 Supports the following objects and types by default:
76 +-------------------+---------------+
78 +===================+===============+
80 +-------------------+---------------+
81 | list, tuple | array |
82 +-------------------+---------------+
83 | str, unicode | string |
84 +-------------------+---------------+
85 | int, long, float | number |
86 +-------------------+---------------+
88 +-------------------+---------------+
90 +-------------------+---------------+
92 +-------------------+---------------+
94 To extend this to recognize other objects, subclass and implement a
95 ``.default()`` method with another method that returns a serializable
96 object for ``o`` if possible, otherwise it should call the superclass
97 implementation (to raise ``TypeError``).
100 item_separator
= ', '
102 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
103 check_circular
=True, allow_nan
=True, sort_keys
=False,
104 indent
=None, separators
=None, encoding
='utf-8', default
=None):
105 """Constructor for JSONEncoder, with sensible defaults.
107 If skipkeys is false, then it is a TypeError to attempt
108 encoding of keys that are not str, int, long, float or None. If
109 skipkeys is True, such items are simply skipped.
111 If ensure_ascii is true, the output is guaranteed to be str
112 objects with all incoming unicode characters escaped. If
113 ensure_ascii is false, the output will be unicode object.
115 If check_circular is true, then lists, dicts, and custom encoded
116 objects will be checked for circular references during encoding to
117 prevent an infinite recursion (which would cause an OverflowError).
118 Otherwise, no such check takes place.
120 If allow_nan is true, then NaN, Infinity, and -Infinity will be
121 encoded as such. This behavior is not JSON specification compliant,
122 but is consistent with most JavaScript based encoders and decoders.
123 Otherwise, it will be a ValueError to encode such floats.
125 If sort_keys is true, then the output of dictionaries will be
126 sorted by key; this is useful for regression tests to ensure
127 that JSON serializations can be compared on a day-to-day basis.
129 If indent is a non-negative integer, then JSON array
130 elements and object members will be pretty-printed with that
131 indent level. An indent level of 0 will only insert newlines.
132 None is the most compact representation.
134 If specified, separators should be a (item_separator, key_separator)
135 tuple. The default is (', ', ': '). To get the most compact JSON
136 representation you should specify (',', ':') to eliminate whitespace.
138 If specified, default is a function that gets called for objects
139 that can't otherwise be serialized. It should return a JSON encodable
140 version of the object or raise a ``TypeError``.
142 If encoding is not None, then all input strings will be
143 transformed into unicode using that encoding prior to JSON-encoding.
144 The default is UTF-8.
148 self
.skipkeys
= skipkeys
149 self
.ensure_ascii
= ensure_ascii
150 self
.check_circular
= check_circular
151 self
.allow_nan
= allow_nan
152 self
.sort_keys
= sort_keys
154 if separators
is not None:
155 self
.item_separator
, self
.key_separator
= separators
156 if default
is not None:
157 self
.default
= default
158 self
.encoding
= encoding
160 def default(self
, o
):
161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
165 For example, to support arbitrary iterators, you could
166 implement default like this::
168 def default(self, o):
174 return list(iterable)
175 return JSONEncoder.default(self, o)
178 raise TypeError(repr(o
) + " is not JSON serializable")
181 """Return a JSON string representation of a Python data structure.
183 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
184 '{"foo": ["bar", "baz"]}'
187 # This is for extremely simple cases and benchmarks.
188 if isinstance(o
, basestring
):
189 if isinstance(o
, str):
190 _encoding
= self
.encoding
191 if (_encoding
is not None
192 and not (_encoding
== 'utf-8')):
193 o
= o
.decode(_encoding
)
194 if self
.ensure_ascii
:
195 return encode_basestring_ascii(o
)
197 return encode_basestring(o
)
198 # This doesn't pass the iterator directly to ''.join() because the
199 # exceptions aren't as detailed. The list call should be roughly
200 # equivalent to the PySequence_Fast that ''.join() would do.
201 chunks
= self
.iterencode(o
, _one_shot
=True)
202 if not isinstance(chunks
, (list, tuple)):
203 chunks
= list(chunks
)
204 return ''.join(chunks
)
206 def iterencode(self
, o
, _one_shot
=False):
207 """Encode the given object and yield each string
208 representation as available.
212 for chunk in JSONEncoder().iterencode(bigobject):
213 mysocket.write(chunk)
216 if self
.check_circular
:
220 if self
.ensure_ascii
:
221 _encoder
= encode_basestring_ascii
223 _encoder
= encode_basestring
224 if self
.encoding
!= 'utf-8':
225 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
226 if isinstance(o
, str):
227 o
= o
.decode(_encoding
)
228 return _orig_encoder(o
)
230 def floatstr(o
, allow_nan
=self
.allow_nan
,
231 _repr
=FLOAT_REPR
, _inf
=INFINITY
, _neginf
=-INFINITY
):
232 # Check for specials. Note that this type of test is processor
233 # and/or platform-specific, so do tests which don't depend on the
247 "Out of range float values are not JSON compliant: " +
253 if (_one_shot
and c_make_encoder
is not None
254 and self
.indent
is None and not self
.sort_keys
):
255 _iterencode
= c_make_encoder(
256 markers
, self
.default
, _encoder
, self
.indent
,
257 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
258 self
.skipkeys
, self
.allow_nan
)
260 _iterencode
= _make_iterencode(
261 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
262 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
263 self
.skipkeys
, _one_shot
)
264 return _iterencode(o
, 0)
266 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
267 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
268 ## HACK: hand-optimized bytecode; turn globals into locals
269 ValueError=ValueError,
270 basestring
=basestring
,
275 isinstance=isinstance,
282 def _iterencode_list(lst
, _current_indent_level
):
286 if markers
is not None:
288 if markerid
in markers
:
289 raise ValueError("Circular reference detected")
290 markers
[markerid
] = lst
292 if _indent
is not None:
293 _current_indent_level
+= 1
294 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
295 separator
= _item_separator
+ newline_indent
296 buf
+= newline_indent
298 newline_indent
= None
299 separator
= _item_separator
306 if isinstance(value
, basestring
):
307 yield buf
+ _encoder(value
)
314 elif isinstance(value
, (int, long)):
315 yield buf
+ str(value
)
316 elif isinstance(value
, float):
317 yield buf
+ _floatstr(value
)
320 if isinstance(value
, (list, tuple)):
321 chunks
= _iterencode_list(value
, _current_indent_level
)
322 elif isinstance(value
, dict):
323 chunks
= _iterencode_dict(value
, _current_indent_level
)
325 chunks
= _iterencode(value
, _current_indent_level
)
328 if newline_indent
is not None:
329 _current_indent_level
-= 1
330 yield '\n' + (' ' * (_indent
* _current_indent_level
))
332 if markers
is not None:
333 del markers
[markerid
]
335 def _iterencode_dict(dct
, _current_indent_level
):
339 if markers
is not None:
341 if markerid
in markers
:
342 raise ValueError("Circular reference detected")
343 markers
[markerid
] = dct
345 if _indent
is not None:
346 _current_indent_level
+= 1
347 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
348 item_separator
= _item_separator
+ newline_indent
351 newline_indent
= None
352 item_separator
= _item_separator
355 items
= sorted(dct
.items(), key
=lambda kv
: kv
[0])
357 items
= dct
.iteritems()
358 for key
, value
in items
:
359 if isinstance(key
, basestring
):
361 # JavaScript is weakly typed for these, so it makes sense to
362 # also allow them. Many encoders seem to do something like this.
363 elif isinstance(key
, float):
371 elif isinstance(key
, (int, long)):
376 raise TypeError("key " + repr(key
) + " is not a string")
383 if isinstance(value
, basestring
):
384 yield _encoder(value
)
391 elif isinstance(value
, (int, long)):
393 elif isinstance(value
, float):
394 yield _floatstr(value
)
396 if isinstance(value
, (list, tuple)):
397 chunks
= _iterencode_list(value
, _current_indent_level
)
398 elif isinstance(value
, dict):
399 chunks
= _iterencode_dict(value
, _current_indent_level
)
401 chunks
= _iterencode(value
, _current_indent_level
)
404 if newline_indent
is not None:
405 _current_indent_level
-= 1
406 yield '\n' + (' ' * (_indent
* _current_indent_level
))
408 if markers
is not None:
409 del markers
[markerid
]
411 def _iterencode(o
, _current_indent_level
):
412 if isinstance(o
, basestring
):
420 elif isinstance(o
, (int, long)):
422 elif isinstance(o
, float):
424 elif isinstance(o
, (list, tuple)):
425 for chunk
in _iterencode_list(o
, _current_indent_level
):
427 elif isinstance(o
, dict):
428 for chunk
in _iterencode_dict(o
, _current_indent_level
):
431 if markers
is not None:
433 if markerid
in markers
:
434 raise ValueError("Circular reference detected")
435 markers
[markerid
] = o
437 for chunk
in _iterencode(o
, _current_indent_level
):
439 if markers
is not None:
440 del markers
[markerid
]