2 * multibytecodec.c: Common Multibyte Codec Implementation
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
7 #define PY_SSIZE_T_CLEAN
9 #include "structmember.h"
10 #include "multibytecodec.h"
13 const Py_UNICODE
*inbuf
, *inbuf_top
, *inbuf_end
;
14 unsigned char *outbuf
, *outbuf_end
;
15 PyObject
*excobj
, *outobj
;
16 } MultibyteEncodeBuffer
;
19 const unsigned char *inbuf
, *inbuf_top
, *inbuf_end
;
20 Py_UNICODE
*outbuf
, *outbuf_end
;
21 PyObject
*excobj
, *outobj
;
22 } MultibyteDecodeBuffer
;
24 PyDoc_STRVAR(MultibyteCodec_Encode__doc__
,
25 "I.encode(unicode[, errors]) -> (string, length consumed)\n\
27 Return an encoded string version of `unicode'. errors may be given to\n\
28 set a different error handling scheme. Default is 'strict' meaning that\n\
29 encoding errors raise a UnicodeEncodeError. Other possible values are\n\
30 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
31 registered with codecs.register_error that can handle UnicodeEncodeErrors.");
33 PyDoc_STRVAR(MultibyteCodec_Decode__doc__
,
34 "I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
36 Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
37 to set a different error handling scheme. Default is 'strict' meaning\n\
38 that encoding errors raise a UnicodeDecodeError. Other possible values\n\
39 are 'ignore' and 'replace' as well as any other name registered with\n\
40 codecs.register_error that is able to handle UnicodeDecodeErrors.");
42 static char *codeckwarglist
[] = {"input", "errors", NULL
};
43 static char *incnewkwarglist
[] = {"errors", NULL
};
44 static char *incrementalkwarglist
[] = {"input", "final", NULL
};
45 static char *streamkwarglist
[] = {"stream", "errors", NULL
};
47 static PyObject
*multibytecodec_encode(MultibyteCodec
*,
48 MultibyteCodec_State
*, const Py_UNICODE
**, Py_ssize_t
,
51 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
54 make_tuple(PyObject
*object
, Py_ssize_t len
)
66 PyTuple_SET_ITEM(v
, 0, object
);
68 w
= PyInt_FromSsize_t(len
);
73 PyTuple_SET_ITEM(v
, 1, w
);
79 internal_error_callback(const char *errors
)
81 if (errors
== NULL
|| strcmp(errors
, "strict") == 0)
83 else if (strcmp(errors
, "ignore") == 0)
85 else if (strcmp(errors
, "replace") == 0)
88 return PyString_FromString(errors
);
92 call_error_callback(PyObject
*errors
, PyObject
*exc
)
94 PyObject
*args
, *cb
, *r
;
96 assert(PyString_Check(errors
));
97 cb
= PyCodec_LookupError(PyString_AS_STRING(errors
));
101 args
= PyTuple_New(1);
107 PyTuple_SET_ITEM(args
, 0, exc
);
110 r
= PyObject_CallObject(cb
, args
);
117 codecctx_errors_get(MultibyteStatefulCodecContext
*self
)
121 if (self
->errors
== ERROR_STRICT
)
123 else if (self
->errors
== ERROR_IGNORE
)
125 else if (self
->errors
== ERROR_REPLACE
)
128 Py_INCREF(self
->errors
);
132 return PyString_FromString(errors
);
136 codecctx_errors_set(MultibyteStatefulCodecContext
*self
, PyObject
*value
,
141 if (!PyString_Check(value
)) {
142 PyErr_SetString(PyExc_TypeError
, "errors must be a string");
146 cb
= internal_error_callback(PyString_AS_STRING(value
));
150 ERROR_DECREF(self
->errors
);
155 /* This getset handlers list is used by all the stateful codec objects */
156 static PyGetSetDef codecctx_getsets
[] = {
157 {"errors", (getter
)codecctx_errors_get
,
158 (setter
)codecctx_errors_set
,
159 PyDoc_STR("how to treat errors")},
164 expand_encodebuffer(MultibyteEncodeBuffer
*buf
, Py_ssize_t esize
)
166 Py_ssize_t orgpos
, orgsize
, incsize
;
168 orgpos
= (Py_ssize_t
)((char *)buf
->outbuf
-
169 PyString_AS_STRING(buf
->outobj
));
170 orgsize
= PyString_GET_SIZE(buf
->outobj
);
171 incsize
= (esize
< (orgsize
>> 1) ? (orgsize
>> 1) | 1 : esize
);
173 if (orgsize
> PY_SSIZE_T_MAX
- incsize
) {
178 if (_PyString_Resize(&buf
->outobj
, orgsize
+ incsize
) == -1)
181 buf
->outbuf
= (unsigned char *)PyString_AS_STRING(buf
->outobj
) +orgpos
;
182 buf
->outbuf_end
= (unsigned char *)PyString_AS_STRING(buf
->outobj
)
183 + PyString_GET_SIZE(buf
->outobj
);
187 #define REQUIRE_ENCODEBUFFER(buf, s) do { \
188 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
189 if (expand_encodebuffer(buf, s) == -1) \
194 expand_decodebuffer(MultibyteDecodeBuffer
*buf
, Py_ssize_t esize
)
196 Py_ssize_t orgpos
, orgsize
;
198 orgpos
= (Py_ssize_t
)(buf
->outbuf
- PyUnicode_AS_UNICODE(buf
->outobj
));
199 orgsize
= PyUnicode_GET_SIZE(buf
->outobj
);
200 if (PyUnicode_Resize(&buf
->outobj
, orgsize
+ (
201 esize
< (orgsize
>> 1) ? (orgsize
>> 1) | 1 : esize
)) == -1)
204 buf
->outbuf
= PyUnicode_AS_UNICODE(buf
->outobj
) + orgpos
;
205 buf
->outbuf_end
= PyUnicode_AS_UNICODE(buf
->outobj
)
206 + PyUnicode_GET_SIZE(buf
->outobj
);
210 #define REQUIRE_DECODEBUFFER(buf, s) do { \
211 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
212 if (expand_decodebuffer(buf, s) == -1) \
218 * MultibyteCodec object
222 multibytecodec_encerror(MultibyteCodec
*codec
,
223 MultibyteCodec_State
*state
,
224 MultibyteEncodeBuffer
*buf
,
225 PyObject
*errors
, Py_ssize_t e
)
227 PyObject
*retobj
= NULL
, *retstr
= NULL
, *tobj
;
228 Py_ssize_t retstrsize
, newpos
;
229 Py_ssize_t esize
, start
, end
;
233 reason
= "illegal multibyte sequence";
239 REQUIRE_ENCODEBUFFER(buf
, -1);
240 return 0; /* retry it */
242 reason
= "incomplete multibyte sequence";
243 esize
= (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf
);
246 PyErr_SetString(PyExc_RuntimeError
,
247 "internal codec error");
250 PyErr_SetString(PyExc_RuntimeError
,
251 "unknown runtime error");
256 if (errors
== ERROR_REPLACE
) {
257 const Py_UNICODE replchar
= '?', *inbuf
= &replchar
;
263 outleft
= (Py_ssize_t
)(buf
->outbuf_end
- buf
->outbuf
);
264 r
= codec
->encode(state
, codec
->config
, &inbuf
, 1,
265 &buf
->outbuf
, outleft
, 0);
266 if (r
== MBERR_TOOSMALL
) {
267 REQUIRE_ENCODEBUFFER(buf
, -1);
275 REQUIRE_ENCODEBUFFER(buf
, 1);
276 *buf
->outbuf
++ = '?';
279 if (errors
== ERROR_IGNORE
|| errors
== ERROR_REPLACE
) {
284 start
= (Py_ssize_t
)(buf
->inbuf
- buf
->inbuf_top
);
287 /* use cached exception object if available */
288 if (buf
->excobj
== NULL
) {
289 buf
->excobj
= PyUnicodeEncodeError_Create(codec
->encoding
,
291 buf
->inbuf_end
- buf
->inbuf_top
,
293 if (buf
->excobj
== NULL
)
297 if (PyUnicodeEncodeError_SetStart(buf
->excobj
, start
) != 0 ||
298 PyUnicodeEncodeError_SetEnd(buf
->excobj
, end
) != 0 ||
299 PyUnicodeEncodeError_SetReason(buf
->excobj
, reason
) != 0)
302 if (errors
== ERROR_STRICT
) {
303 PyCodec_StrictErrors(buf
->excobj
);
307 retobj
= call_error_callback(errors
, buf
->excobj
);
311 if (!PyTuple_Check(retobj
) || PyTuple_GET_SIZE(retobj
) != 2 ||
312 !PyUnicode_Check((tobj
= PyTuple_GET_ITEM(retobj
, 0))) ||
313 !(PyInt_Check(PyTuple_GET_ITEM(retobj
, 1)) ||
314 PyLong_Check(PyTuple_GET_ITEM(retobj
, 1)))) {
315 PyErr_SetString(PyExc_TypeError
,
316 "encoding error handler must return "
317 "(unicode, int) tuple");
322 const Py_UNICODE
*uraw
= PyUnicode_AS_UNICODE(tobj
);
324 retstr
= multibytecodec_encode(codec
, state
, &uraw
,
325 PyUnicode_GET_SIZE(tobj
), ERROR_STRICT
,
331 retstrsize
= PyString_GET_SIZE(retstr
);
332 if (retstrsize
> 0) {
333 REQUIRE_ENCODEBUFFER(buf
, retstrsize
);
334 memcpy(buf
->outbuf
, PyString_AS_STRING(retstr
), retstrsize
);
335 buf
->outbuf
+= retstrsize
;
338 newpos
= PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj
, 1));
339 if (newpos
< 0 && !PyErr_Occurred())
340 newpos
+= (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf_top
);
341 if (newpos
< 0 || buf
->inbuf_top
+ newpos
> buf
->inbuf_end
) {
343 PyErr_Format(PyExc_IndexError
,
344 "position %zd from error handler out of bounds",
348 buf
->inbuf
= buf
->inbuf_top
+ newpos
;
361 multibytecodec_decerror(MultibyteCodec
*codec
,
362 MultibyteCodec_State
*state
,
363 MultibyteDecodeBuffer
*buf
,
364 PyObject
*errors
, Py_ssize_t e
)
366 PyObject
*retobj
= NULL
, *retuni
= NULL
;
367 Py_ssize_t retunisize
, newpos
;
369 Py_ssize_t esize
, start
, end
;
372 reason
= "illegal multibyte sequence";
378 REQUIRE_DECODEBUFFER(buf
, -1);
379 return 0; /* retry it */
381 reason
= "incomplete multibyte sequence";
382 esize
= (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf
);
385 PyErr_SetString(PyExc_RuntimeError
,
386 "internal codec error");
389 PyErr_SetString(PyExc_RuntimeError
,
390 "unknown runtime error");
395 if (errors
== ERROR_REPLACE
) {
396 REQUIRE_DECODEBUFFER(buf
, 1);
397 *buf
->outbuf
++ = Py_UNICODE_REPLACEMENT_CHARACTER
;
399 if (errors
== ERROR_IGNORE
|| errors
== ERROR_REPLACE
) {
404 start
= (Py_ssize_t
)(buf
->inbuf
- buf
->inbuf_top
);
407 /* use cached exception object if available */
408 if (buf
->excobj
== NULL
) {
409 buf
->excobj
= PyUnicodeDecodeError_Create(codec
->encoding
,
410 (const char *)buf
->inbuf_top
,
411 (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf_top
),
413 if (buf
->excobj
== NULL
)
417 if (PyUnicodeDecodeError_SetStart(buf
->excobj
, start
) ||
418 PyUnicodeDecodeError_SetEnd(buf
->excobj
, end
) ||
419 PyUnicodeDecodeError_SetReason(buf
->excobj
, reason
))
422 if (errors
== ERROR_STRICT
) {
423 PyCodec_StrictErrors(buf
->excobj
);
427 retobj
= call_error_callback(errors
, buf
->excobj
);
431 if (!PyTuple_Check(retobj
) || PyTuple_GET_SIZE(retobj
) != 2 ||
432 !PyUnicode_Check((retuni
= PyTuple_GET_ITEM(retobj
, 0))) ||
433 !(PyInt_Check(PyTuple_GET_ITEM(retobj
, 1)) ||
434 PyLong_Check(PyTuple_GET_ITEM(retobj
, 1)))) {
435 PyErr_SetString(PyExc_TypeError
,
436 "decoding error handler must return "
437 "(unicode, int) tuple");
441 retunisize
= PyUnicode_GET_SIZE(retuni
);
442 if (retunisize
> 0) {
443 REQUIRE_DECODEBUFFER(buf
, retunisize
);
444 memcpy((char *)buf
->outbuf
, PyUnicode_AS_DATA(retuni
),
445 retunisize
* Py_UNICODE_SIZE
);
446 buf
->outbuf
+= retunisize
;
449 newpos
= PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj
, 1));
450 if (newpos
< 0 && !PyErr_Occurred())
451 newpos
+= (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf_top
);
452 if (newpos
< 0 || buf
->inbuf_top
+ newpos
> buf
->inbuf_end
) {
454 PyErr_Format(PyExc_IndexError
,
455 "position %zd from error handler out of bounds",
459 buf
->inbuf
= buf
->inbuf_top
+ newpos
;
469 multibytecodec_encode(MultibyteCodec
*codec
,
470 MultibyteCodec_State
*state
,
471 const Py_UNICODE
**data
, Py_ssize_t datalen
,
472 PyObject
*errors
, int flags
)
474 MultibyteEncodeBuffer buf
;
475 Py_ssize_t finalsize
, r
= 0;
477 if (datalen
== 0 && !(flags
& MBENC_RESET
))
478 return PyString_FromString("");
481 buf
.inbuf
= buf
.inbuf_top
= *data
;
482 buf
.inbuf_end
= buf
.inbuf_top
+ datalen
;
484 if (datalen
> (PY_SSIZE_T_MAX
- 16) / 2) {
489 buf
.outobj
= PyString_FromStringAndSize(NULL
, datalen
* 2 + 16);
490 if (buf
.outobj
== NULL
)
492 buf
.outbuf
= (unsigned char *)PyString_AS_STRING(buf
.outobj
);
493 buf
.outbuf_end
= buf
.outbuf
+ PyString_GET_SIZE(buf
.outobj
);
495 while (buf
.inbuf
< buf
.inbuf_end
) {
496 Py_ssize_t inleft
, outleft
;
498 /* we don't reuse inleft and outleft here.
499 * error callbacks can relocate the cursor anywhere on buffer*/
500 inleft
= (Py_ssize_t
)(buf
.inbuf_end
- buf
.inbuf
);
501 outleft
= (Py_ssize_t
)(buf
.outbuf_end
- buf
.outbuf
);
502 r
= codec
->encode(state
, codec
->config
, &buf
.inbuf
, inleft
,
503 &buf
.outbuf
, outleft
, flags
);
504 if ((r
== 0) || (r
== MBERR_TOOFEW
&& !(flags
& MBENC_FLUSH
)))
506 else if (multibytecodec_encerror(codec
, state
, &buf
, errors
,r
))
508 else if (r
== MBERR_TOOFEW
)
512 if (codec
->encreset
!= NULL
&& (flags
& MBENC_RESET
))
516 outleft
= (Py_ssize_t
)(buf
.outbuf_end
- buf
.outbuf
);
517 r
= codec
->encreset(state
, codec
->config
, &buf
.outbuf
,
521 else if (multibytecodec_encerror(codec
, state
,
526 finalsize
= (Py_ssize_t
)((char *)buf
.outbuf
-
527 PyString_AS_STRING(buf
.outobj
));
529 if (finalsize
!= PyString_GET_SIZE(buf
.outobj
))
530 if (_PyString_Resize(&buf
.outobj
, finalsize
) == -1)
534 Py_XDECREF(buf
.excobj
);
538 Py_XDECREF(buf
.excobj
);
539 Py_XDECREF(buf
.outobj
);
544 MultibyteCodec_Encode(MultibyteCodecObject
*self
,
545 PyObject
*args
, PyObject
*kwargs
)
547 MultibyteCodec_State state
;
549 PyObject
*errorcb
, *r
, *arg
, *ucvt
;
550 const char *errors
= NULL
;
553 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|z:encode",
554 codeckwarglist
, &arg
, &errors
))
557 if (PyUnicode_Check(arg
))
560 arg
= ucvt
= PyObject_Unicode(arg
);
563 else if (!PyUnicode_Check(arg
)) {
564 PyErr_SetString(PyExc_TypeError
,
565 "couldn't convert the object to unicode.");
571 data
= PyUnicode_AS_UNICODE(arg
);
572 datalen
= PyUnicode_GET_SIZE(arg
);
574 errorcb
= internal_error_callback(errors
);
575 if (errorcb
== NULL
) {
580 if (self
->codec
->encinit
!= NULL
&&
581 self
->codec
->encinit(&state
, self
->codec
->config
) != 0)
583 r
= multibytecodec_encode(self
->codec
, &state
,
584 (const Py_UNICODE
**)&data
, datalen
, errorcb
,
585 MBENC_FLUSH
| MBENC_RESET
);
589 ERROR_DECREF(errorcb
);
591 return make_tuple(r
, datalen
);
594 ERROR_DECREF(errorcb
);
600 MultibyteCodec_Decode(MultibyteCodecObject
*self
,
601 PyObject
*args
, PyObject
*kwargs
)
603 MultibyteCodec_State state
;
604 MultibyteDecodeBuffer buf
;
607 const char *data
, *errors
= NULL
;
608 Py_ssize_t datalen
, finalsize
;
610 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s*|z:decode",
611 codeckwarglist
, &pdata
, &errors
))
616 errorcb
= internal_error_callback(errors
);
617 if (errorcb
== NULL
) {
618 PyBuffer_Release(&pdata
);
623 PyBuffer_Release(&pdata
);
624 ERROR_DECREF(errorcb
);
625 return make_tuple(PyUnicode_FromUnicode(NULL
, 0), 0);
629 buf
.inbuf
= buf
.inbuf_top
= (unsigned char *)data
;
630 buf
.inbuf_end
= buf
.inbuf_top
+ datalen
;
631 buf
.outobj
= PyUnicode_FromUnicode(NULL
, datalen
);
632 if (buf
.outobj
== NULL
)
634 buf
.outbuf
= PyUnicode_AS_UNICODE(buf
.outobj
);
635 buf
.outbuf_end
= buf
.outbuf
+ PyUnicode_GET_SIZE(buf
.outobj
);
637 if (self
->codec
->decinit
!= NULL
&&
638 self
->codec
->decinit(&state
, self
->codec
->config
) != 0)
641 while (buf
.inbuf
< buf
.inbuf_end
) {
642 Py_ssize_t inleft
, outleft
, r
;
644 inleft
= (Py_ssize_t
)(buf
.inbuf_end
- buf
.inbuf
);
645 outleft
= (Py_ssize_t
)(buf
.outbuf_end
- buf
.outbuf
);
647 r
= self
->codec
->decode(&state
, self
->codec
->config
,
648 &buf
.inbuf
, inleft
, &buf
.outbuf
, outleft
);
651 else if (multibytecodec_decerror(self
->codec
, &state
,
656 finalsize
= (Py_ssize_t
)(buf
.outbuf
-
657 PyUnicode_AS_UNICODE(buf
.outobj
));
659 if (finalsize
!= PyUnicode_GET_SIZE(buf
.outobj
))
660 if (PyUnicode_Resize(&buf
.outobj
, finalsize
) == -1)
663 PyBuffer_Release(&pdata
);
664 Py_XDECREF(buf
.excobj
);
665 ERROR_DECREF(errorcb
);
666 return make_tuple(buf
.outobj
, datalen
);
669 PyBuffer_Release(&pdata
);
670 ERROR_DECREF(errorcb
);
671 Py_XDECREF(buf
.excobj
);
672 Py_XDECREF(buf
.outobj
);
677 static struct PyMethodDef multibytecodec_methods
[] = {
678 {"encode", (PyCFunction
)MultibyteCodec_Encode
,
679 METH_VARARGS
| METH_KEYWORDS
,
680 MultibyteCodec_Encode__doc__
},
681 {"decode", (PyCFunction
)MultibyteCodec_Decode
,
682 METH_VARARGS
| METH_KEYWORDS
,
683 MultibyteCodec_Decode__doc__
},
688 multibytecodec_dealloc(MultibyteCodecObject
*self
)
693 static PyTypeObject MultibyteCodec_Type
= {
694 PyVarObject_HEAD_INIT(NULL
, 0)
695 "MultibyteCodec", /* tp_name */
696 sizeof(MultibyteCodecObject
), /* tp_basicsize */
699 (destructor
)multibytecodec_dealloc
, /* tp_dealloc */
705 0, /* tp_as_number */
706 0, /* tp_as_sequence */
707 0, /* tp_as_mapping */
711 PyObject_GenericGetAttr
, /* tp_getattro */
713 0, /* tp_as_buffer */
714 Py_TPFLAGS_DEFAULT
, /* tp_flags */
718 0, /* tp_richcompare */
719 0, /* tp_weaklistoffset */
722 multibytecodec_methods
, /* tp_methods */
727 * Utility functions for stateful codec mechanism
730 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
731 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
734 encoder_encode_stateful(MultibyteStatefulEncoderContext
*ctx
,
735 PyObject
*unistr
, int final
)
737 PyObject
*ucvt
, *r
= NULL
;
738 Py_UNICODE
*inbuf
, *inbuf_end
, *inbuf_tmp
= NULL
;
739 Py_ssize_t datalen
, origpending
;
741 if (PyUnicode_Check(unistr
))
744 unistr
= ucvt
= PyObject_Unicode(unistr
);
747 else if (!PyUnicode_Check(unistr
)) {
748 PyErr_SetString(PyExc_TypeError
,
749 "couldn't convert the object to unicode.");
755 datalen
= PyUnicode_GET_SIZE(unistr
);
756 origpending
= ctx
->pendingsize
;
758 if (origpending
> 0) {
759 if (datalen
> PY_SSIZE_T_MAX
- ctx
->pendingsize
) {
761 /* inbuf_tmp == NULL */
764 inbuf_tmp
= PyMem_New(Py_UNICODE
, datalen
+ ctx
->pendingsize
);
765 if (inbuf_tmp
== NULL
)
767 memcpy(inbuf_tmp
, ctx
->pending
,
768 Py_UNICODE_SIZE
* ctx
->pendingsize
);
769 memcpy(inbuf_tmp
+ ctx
->pendingsize
,
770 PyUnicode_AS_UNICODE(unistr
),
771 Py_UNICODE_SIZE
* datalen
);
772 datalen
+= ctx
->pendingsize
;
773 ctx
->pendingsize
= 0;
777 inbuf
= (Py_UNICODE
*)PyUnicode_AS_UNICODE(unistr
);
779 inbuf_end
= inbuf
+ datalen
;
781 r
= multibytecodec_encode(ctx
->codec
, &ctx
->state
,
782 (const Py_UNICODE
**)&inbuf
, datalen
,
783 ctx
->errors
, final
? MBENC_FLUSH
| MBENC_RESET
: 0);
785 /* recover the original pending buffer */
787 memcpy(ctx
->pending
, inbuf_tmp
,
788 Py_UNICODE_SIZE
* origpending
);
789 ctx
->pendingsize
= origpending
;
793 if (inbuf
< inbuf_end
) {
794 ctx
->pendingsize
= (Py_ssize_t
)(inbuf_end
- inbuf
);
795 if (ctx
->pendingsize
> MAXENCPENDING
) {
796 /* normal codecs can't reach here */
797 ctx
->pendingsize
= 0;
798 PyErr_SetString(PyExc_UnicodeError
,
799 "pending buffer overflow");
802 memcpy(ctx
->pending
, inbuf
,
803 ctx
->pendingsize
* Py_UNICODE_SIZE
);
806 if (inbuf_tmp
!= NULL
)
807 PyMem_Del(inbuf_tmp
);
812 if (inbuf_tmp
!= NULL
)
813 PyMem_Del(inbuf_tmp
);
820 decoder_append_pending(MultibyteStatefulDecoderContext
*ctx
,
821 MultibyteDecodeBuffer
*buf
)
823 Py_ssize_t npendings
;
825 npendings
= (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf
);
826 if (npendings
+ ctx
->pendingsize
> MAXDECPENDING
||
827 npendings
> PY_SSIZE_T_MAX
- ctx
->pendingsize
) {
828 PyErr_SetString(PyExc_UnicodeError
, "pending buffer overflow");
831 memcpy(ctx
->pending
+ ctx
->pendingsize
, buf
->inbuf
, npendings
);
832 ctx
->pendingsize
+= npendings
;
837 decoder_prepare_buffer(MultibyteDecodeBuffer
*buf
, const char *data
,
840 buf
->inbuf
= buf
->inbuf_top
= (const unsigned char *)data
;
841 buf
->inbuf_end
= buf
->inbuf_top
+ size
;
842 if (buf
->outobj
== NULL
) { /* only if outobj is not allocated yet */
843 buf
->outobj
= PyUnicode_FromUnicode(NULL
, size
);
844 if (buf
->outobj
== NULL
)
846 buf
->outbuf
= PyUnicode_AS_UNICODE(buf
->outobj
);
847 buf
->outbuf_end
= buf
->outbuf
+
848 PyUnicode_GET_SIZE(buf
->outobj
);
855 decoder_feed_buffer(MultibyteStatefulDecoderContext
*ctx
,
856 MultibyteDecodeBuffer
*buf
)
858 while (buf
->inbuf
< buf
->inbuf_end
) {
859 Py_ssize_t inleft
, outleft
;
862 inleft
= (Py_ssize_t
)(buf
->inbuf_end
- buf
->inbuf
);
863 outleft
= (Py_ssize_t
)(buf
->outbuf_end
- buf
->outbuf
);
865 r
= ctx
->codec
->decode(&ctx
->state
, ctx
->codec
->config
,
866 &buf
->inbuf
, inleft
, &buf
->outbuf
, outleft
);
867 if (r
== 0 || r
== MBERR_TOOFEW
)
869 else if (multibytecodec_decerror(ctx
->codec
, &ctx
->state
,
870 buf
, ctx
->errors
, r
))
878 * MultibyteIncrementalEncoder object
882 mbiencoder_encode(MultibyteIncrementalEncoderObject
*self
,
883 PyObject
*args
, PyObject
*kwargs
)
888 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|i:encode",
889 incrementalkwarglist
, &data
, &final
))
892 return encoder_encode_stateful(STATEFUL_ECTX(self
), data
, final
);
896 mbiencoder_reset(MultibyteIncrementalEncoderObject
*self
)
898 if (self
->codec
->decreset
!= NULL
&&
899 self
->codec
->decreset(&self
->state
, self
->codec
->config
) != 0)
901 self
->pendingsize
= 0;
906 static struct PyMethodDef mbiencoder_methods
[] = {
907 {"encode", (PyCFunction
)mbiencoder_encode
,
908 METH_VARARGS
| METH_KEYWORDS
, NULL
},
909 {"reset", (PyCFunction
)mbiencoder_reset
,
915 mbiencoder_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
917 MultibyteIncrementalEncoderObject
*self
;
918 PyObject
*codec
= NULL
;
921 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|s:IncrementalEncoder",
922 incnewkwarglist
, &errors
))
925 self
= (MultibyteIncrementalEncoderObject
*)type
->tp_alloc(type
, 0);
929 codec
= PyObject_GetAttrString((PyObject
*)type
, "codec");
932 if (!MultibyteCodec_Check(codec
)) {
933 PyErr_SetString(PyExc_TypeError
, "codec is unexpected type");
937 self
->codec
= ((MultibyteCodecObject
*)codec
)->codec
;
938 self
->pendingsize
= 0;
939 self
->errors
= internal_error_callback(errors
);
940 if (self
->errors
== NULL
)
942 if (self
->codec
->encinit
!= NULL
&&
943 self
->codec
->encinit(&self
->state
, self
->codec
->config
) != 0)
947 return (PyObject
*)self
;
956 mbiencoder_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
962 mbiencoder_traverse(MultibyteIncrementalEncoderObject
*self
,
963 visitproc visit
, void *arg
)
965 if (ERROR_ISCUSTOM(self
->errors
))
966 Py_VISIT(self
->errors
);
971 mbiencoder_dealloc(MultibyteIncrementalEncoderObject
*self
)
973 PyObject_GC_UnTrack(self
);
974 ERROR_DECREF(self
->errors
);
975 Py_TYPE(self
)->tp_free(self
);
978 static PyTypeObject MultibyteIncrementalEncoder_Type
= {
979 PyVarObject_HEAD_INIT(NULL
, 0)
980 "MultibyteIncrementalEncoder", /* tp_name */
981 sizeof(MultibyteIncrementalEncoderObject
), /* tp_basicsize */
984 (destructor
)mbiencoder_dealloc
, /* tp_dealloc */
990 0, /* tp_as_number */
991 0, /* tp_as_sequence */
992 0, /* tp_as_mapping */
996 PyObject_GenericGetAttr
, /* tp_getattro */
998 0, /* tp_as_buffer */
999 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
1000 | Py_TPFLAGS_BASETYPE
, /* tp_flags */
1002 (traverseproc
)mbiencoder_traverse
, /* tp_traverse */
1004 0, /* tp_richcompare */
1005 0, /* tp_weaklistoffset */
1008 mbiencoder_methods
, /* tp_methods */
1010 codecctx_getsets
, /* tp_getset */
1013 0, /* tp_descr_get */
1014 0, /* tp_descr_set */
1015 0, /* tp_dictoffset */
1016 mbiencoder_init
, /* tp_init */
1018 mbiencoder_new
, /* tp_new */
1023 * MultibyteIncrementalDecoder object
1027 mbidecoder_decode(MultibyteIncrementalDecoderObject
*self
,
1028 PyObject
*args
, PyObject
*kwargs
)
1030 MultibyteDecodeBuffer buf
;
1031 char *data
, *wdata
= NULL
;
1033 Py_ssize_t wsize
, finalsize
= 0, size
, origpending
;
1036 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s*|i:decode",
1037 incrementalkwarglist
, &pdata
, &final
))
1042 buf
.outobj
= buf
.excobj
= NULL
;
1043 origpending
= self
->pendingsize
;
1045 if (self
->pendingsize
== 0) {
1050 if (size
> PY_SSIZE_T_MAX
- self
->pendingsize
) {
1054 wsize
= size
+ self
->pendingsize
;
1055 wdata
= PyMem_Malloc(wsize
);
1058 memcpy(wdata
, self
->pending
, self
->pendingsize
);
1059 memcpy(wdata
+ self
->pendingsize
, data
, size
);
1060 self
->pendingsize
= 0;
1063 if (decoder_prepare_buffer(&buf
, wdata
, wsize
) != 0)
1066 if (decoder_feed_buffer(STATEFUL_DCTX(self
), &buf
))
1069 if (final
&& buf
.inbuf
< buf
.inbuf_end
) {
1070 if (multibytecodec_decerror(self
->codec
, &self
->state
,
1071 &buf
, self
->errors
, MBERR_TOOFEW
)) {
1072 /* recover the original pending buffer */
1073 memcpy(self
->pending
, wdata
, origpending
);
1074 self
->pendingsize
= origpending
;
1079 if (buf
.inbuf
< buf
.inbuf_end
) { /* pending sequence still exists */
1080 if (decoder_append_pending(STATEFUL_DCTX(self
), &buf
) != 0)
1084 finalsize
= (Py_ssize_t
)(buf
.outbuf
- PyUnicode_AS_UNICODE(buf
.outobj
));
1085 if (finalsize
!= PyUnicode_GET_SIZE(buf
.outobj
))
1086 if (PyUnicode_Resize(&buf
.outobj
, finalsize
) == -1)
1089 PyBuffer_Release(&pdata
);
1092 Py_XDECREF(buf
.excobj
);
1096 PyBuffer_Release(&pdata
);
1097 if (wdata
!= NULL
&& wdata
!= data
)
1099 Py_XDECREF(buf
.excobj
);
1100 Py_XDECREF(buf
.outobj
);
1105 mbidecoder_reset(MultibyteIncrementalDecoderObject
*self
)
1107 if (self
->codec
->decreset
!= NULL
&&
1108 self
->codec
->decreset(&self
->state
, self
->codec
->config
) != 0)
1110 self
->pendingsize
= 0;
1115 static struct PyMethodDef mbidecoder_methods
[] = {
1116 {"decode", (PyCFunction
)mbidecoder_decode
,
1117 METH_VARARGS
| METH_KEYWORDS
, NULL
},
1118 {"reset", (PyCFunction
)mbidecoder_reset
,
1124 mbidecoder_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1126 MultibyteIncrementalDecoderObject
*self
;
1127 PyObject
*codec
= NULL
;
1128 char *errors
= NULL
;
1130 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|s:IncrementalDecoder",
1131 incnewkwarglist
, &errors
))
1134 self
= (MultibyteIncrementalDecoderObject
*)type
->tp_alloc(type
, 0);
1138 codec
= PyObject_GetAttrString((PyObject
*)type
, "codec");
1141 if (!MultibyteCodec_Check(codec
)) {
1142 PyErr_SetString(PyExc_TypeError
, "codec is unexpected type");
1146 self
->codec
= ((MultibyteCodecObject
*)codec
)->codec
;
1147 self
->pendingsize
= 0;
1148 self
->errors
= internal_error_callback(errors
);
1149 if (self
->errors
== NULL
)
1151 if (self
->codec
->decinit
!= NULL
&&
1152 self
->codec
->decinit(&self
->state
, self
->codec
->config
) != 0)
1156 return (PyObject
*)self
;
1165 mbidecoder_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1171 mbidecoder_traverse(MultibyteIncrementalDecoderObject
*self
,
1172 visitproc visit
, void *arg
)
1174 if (ERROR_ISCUSTOM(self
->errors
))
1175 Py_VISIT(self
->errors
);
1180 mbidecoder_dealloc(MultibyteIncrementalDecoderObject
*self
)
1182 PyObject_GC_UnTrack(self
);
1183 ERROR_DECREF(self
->errors
);
1184 Py_TYPE(self
)->tp_free(self
);
1187 static PyTypeObject MultibyteIncrementalDecoder_Type
= {
1188 PyVarObject_HEAD_INIT(NULL
, 0)
1189 "MultibyteIncrementalDecoder", /* tp_name */
1190 sizeof(MultibyteIncrementalDecoderObject
), /* tp_basicsize */
1191 0, /* tp_itemsize */
1193 (destructor
)mbidecoder_dealloc
, /* tp_dealloc */
1199 0, /* tp_as_number */
1200 0, /* tp_as_sequence */
1201 0, /* tp_as_mapping */
1205 PyObject_GenericGetAttr
, /* tp_getattro */
1206 0, /* tp_setattro */
1207 0, /* tp_as_buffer */
1208 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
1209 | Py_TPFLAGS_BASETYPE
, /* tp_flags */
1211 (traverseproc
)mbidecoder_traverse
, /* tp_traverse */
1213 0, /* tp_richcompare */
1214 0, /* tp_weaklistoffset */
1217 mbidecoder_methods
, /* tp_methods */
1219 codecctx_getsets
, /* tp_getset */
1222 0, /* tp_descr_get */
1223 0, /* tp_descr_set */
1224 0, /* tp_dictoffset */
1225 mbidecoder_init
, /* tp_init */
1227 mbidecoder_new
, /* tp_new */
1232 * MultibyteStreamReader object
1236 mbstreamreader_iread(MultibyteStreamReaderObject
*self
,
1237 const char *method
, Py_ssize_t sizehint
)
1239 MultibyteDecodeBuffer buf
;
1241 Py_ssize_t rsize
, finalsize
= 0;
1244 return PyUnicode_FromUnicode(NULL
, 0);
1246 buf
.outobj
= buf
.excobj
= NULL
;
1253 cres
= PyObject_CallMethod(self
->stream
,
1254 (char *)method
, NULL
);
1256 cres
= PyObject_CallMethod(self
->stream
,
1257 (char *)method
, "i", sizehint
);
1261 if (!PyString_Check(cres
)) {
1262 PyErr_SetString(PyExc_TypeError
,
1263 "stream function returned a "
1264 "non-string object");
1268 endoffile
= (PyString_GET_SIZE(cres
) == 0);
1270 if (self
->pendingsize
> 0) {
1274 if (PyString_GET_SIZE(cres
) > PY_SSIZE_T_MAX
- self
->pendingsize
) {
1278 rsize
= PyString_GET_SIZE(cres
) + self
->pendingsize
;
1279 ctr
= PyString_FromStringAndSize(NULL
, rsize
);
1282 ctrdata
= PyString_AS_STRING(ctr
);
1283 memcpy(ctrdata
, self
->pending
, self
->pendingsize
);
1284 memcpy(ctrdata
+ self
->pendingsize
,
1285 PyString_AS_STRING(cres
),
1286 PyString_GET_SIZE(cres
));
1289 self
->pendingsize
= 0;
1292 rsize
= PyString_GET_SIZE(cres
);
1293 if (decoder_prepare_buffer(&buf
, PyString_AS_STRING(cres
),
1297 if (rsize
> 0 && decoder_feed_buffer(
1298 (MultibyteStatefulDecoderContext
*)self
, &buf
))
1301 if (endoffile
|| sizehint
< 0) {
1302 if (buf
.inbuf
< buf
.inbuf_end
&&
1303 multibytecodec_decerror(self
->codec
, &self
->state
,
1304 &buf
, self
->errors
, MBERR_TOOFEW
))
1308 if (buf
.inbuf
< buf
.inbuf_end
) { /* pending sequence exists */
1309 if (decoder_append_pending(STATEFUL_DCTX(self
),
1314 finalsize
= (Py_ssize_t
)(buf
.outbuf
-
1315 PyUnicode_AS_UNICODE(buf
.outobj
));
1319 if (sizehint
< 0 || finalsize
!= 0 || rsize
== 0)
1322 sizehint
= 1; /* read 1 more byte and retry */
1325 if (finalsize
!= PyUnicode_GET_SIZE(buf
.outobj
))
1326 if (PyUnicode_Resize(&buf
.outobj
, finalsize
) == -1)
1330 Py_XDECREF(buf
.excobj
);
1335 Py_XDECREF(buf
.excobj
);
1336 Py_XDECREF(buf
.outobj
);
1341 mbstreamreader_read(MultibyteStreamReaderObject
*self
, PyObject
*args
)
1343 PyObject
*sizeobj
= NULL
;
1346 if (!PyArg_UnpackTuple(args
, "read", 0, 1, &sizeobj
))
1349 if (sizeobj
== Py_None
|| sizeobj
== NULL
)
1351 else if (PyInt_Check(sizeobj
))
1352 size
= PyInt_AsSsize_t(sizeobj
);
1354 PyErr_SetString(PyExc_TypeError
, "arg 1 must be an integer");
1358 return mbstreamreader_iread(self
, "read", size
);
1362 mbstreamreader_readline(MultibyteStreamReaderObject
*self
, PyObject
*args
)
1364 PyObject
*sizeobj
= NULL
;
1367 if (!PyArg_UnpackTuple(args
, "readline", 0, 1, &sizeobj
))
1370 if (sizeobj
== Py_None
|| sizeobj
== NULL
)
1372 else if (PyInt_Check(sizeobj
))
1373 size
= PyInt_AsSsize_t(sizeobj
);
1375 PyErr_SetString(PyExc_TypeError
, "arg 1 must be an integer");
1379 return mbstreamreader_iread(self
, "readline", size
);
1383 mbstreamreader_readlines(MultibyteStreamReaderObject
*self
, PyObject
*args
)
1385 PyObject
*sizehintobj
= NULL
, *r
, *sr
;
1386 Py_ssize_t sizehint
;
1388 if (!PyArg_UnpackTuple(args
, "readlines", 0, 1, &sizehintobj
))
1391 if (sizehintobj
== Py_None
|| sizehintobj
== NULL
)
1393 else if (PyInt_Check(sizehintobj
))
1394 sizehint
= PyInt_AsSsize_t(sizehintobj
);
1396 PyErr_SetString(PyExc_TypeError
, "arg 1 must be an integer");
1400 r
= mbstreamreader_iread(self
, "read", sizehint
);
1404 sr
= PyUnicode_Splitlines(r
, 1);
1410 mbstreamreader_reset(MultibyteStreamReaderObject
*self
)
1412 if (self
->codec
->decreset
!= NULL
&&
1413 self
->codec
->decreset(&self
->state
, self
->codec
->config
) != 0)
1415 self
->pendingsize
= 0;
1420 static struct PyMethodDef mbstreamreader_methods
[] = {
1421 {"read", (PyCFunction
)mbstreamreader_read
,
1422 METH_VARARGS
, NULL
},
1423 {"readline", (PyCFunction
)mbstreamreader_readline
,
1424 METH_VARARGS
, NULL
},
1425 {"readlines", (PyCFunction
)mbstreamreader_readlines
,
1426 METH_VARARGS
, NULL
},
1427 {"reset", (PyCFunction
)mbstreamreader_reset
,
1432 static PyMemberDef mbstreamreader_members
[] = {
1433 {"stream", T_OBJECT
,
1434 offsetof(MultibyteStreamReaderObject
, stream
),
1440 mbstreamreader_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1442 MultibyteStreamReaderObject
*self
;
1443 PyObject
*stream
, *codec
= NULL
;
1444 char *errors
= NULL
;
1446 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|s:StreamReader",
1447 streamkwarglist
, &stream
, &errors
))
1450 self
= (MultibyteStreamReaderObject
*)type
->tp_alloc(type
, 0);
1454 codec
= PyObject_GetAttrString((PyObject
*)type
, "codec");
1457 if (!MultibyteCodec_Check(codec
)) {
1458 PyErr_SetString(PyExc_TypeError
, "codec is unexpected type");
1462 self
->codec
= ((MultibyteCodecObject
*)codec
)->codec
;
1463 self
->stream
= stream
;
1465 self
->pendingsize
= 0;
1466 self
->errors
= internal_error_callback(errors
);
1467 if (self
->errors
== NULL
)
1469 if (self
->codec
->decinit
!= NULL
&&
1470 self
->codec
->decinit(&self
->state
, self
->codec
->config
) != 0)
1474 return (PyObject
*)self
;
1483 mbstreamreader_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1489 mbstreamreader_traverse(MultibyteStreamReaderObject
*self
,
1490 visitproc visit
, void *arg
)
1492 if (ERROR_ISCUSTOM(self
->errors
))
1493 Py_VISIT(self
->errors
);
1494 Py_VISIT(self
->stream
);
1499 mbstreamreader_dealloc(MultibyteStreamReaderObject
*self
)
1501 PyObject_GC_UnTrack(self
);
1502 ERROR_DECREF(self
->errors
);
1503 Py_XDECREF(self
->stream
);
1504 Py_TYPE(self
)->tp_free(self
);
1507 static PyTypeObject MultibyteStreamReader_Type
= {
1508 PyVarObject_HEAD_INIT(NULL
, 0)
1509 "MultibyteStreamReader", /* tp_name */
1510 sizeof(MultibyteStreamReaderObject
), /* tp_basicsize */
1511 0, /* tp_itemsize */
1513 (destructor
)mbstreamreader_dealloc
, /* tp_dealloc */
1519 0, /* tp_as_number */
1520 0, /* tp_as_sequence */
1521 0, /* tp_as_mapping */
1525 PyObject_GenericGetAttr
, /* tp_getattro */
1526 0, /* tp_setattro */
1527 0, /* tp_as_buffer */
1528 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
1529 | Py_TPFLAGS_BASETYPE
, /* tp_flags */
1531 (traverseproc
)mbstreamreader_traverse
, /* tp_traverse */
1533 0, /* tp_richcompare */
1534 0, /* tp_weaklistoffset */
1537 mbstreamreader_methods
, /* tp_methods */
1538 mbstreamreader_members
, /* tp_members */
1539 codecctx_getsets
, /* tp_getset */
1542 0, /* tp_descr_get */
1543 0, /* tp_descr_set */
1544 0, /* tp_dictoffset */
1545 mbstreamreader_init
, /* tp_init */
1547 mbstreamreader_new
, /* tp_new */
1552 * MultibyteStreamWriter object
1556 mbstreamwriter_iwrite(MultibyteStreamWriterObject
*self
,
1561 str
= encoder_encode_stateful(STATEFUL_ECTX(self
), unistr
, 0);
1565 wr
= PyObject_CallMethod(self
->stream
, "write", "O", str
);
1575 mbstreamwriter_write(MultibyteStreamWriterObject
*self
, PyObject
*strobj
)
1577 if (mbstreamwriter_iwrite(self
, strobj
))
1584 mbstreamwriter_writelines(MultibyteStreamWriterObject
*self
, PyObject
*lines
)
1589 if (!PySequence_Check(lines
)) {
1590 PyErr_SetString(PyExc_TypeError
,
1591 "arg must be a sequence object");
1595 for (i
= 0; i
< PySequence_Length(lines
); i
++) {
1596 /* length can be changed even within this loop */
1597 strobj
= PySequence_GetItem(lines
, i
);
1601 r
= mbstreamwriter_iwrite(self
, strobj
);
1611 mbstreamwriter_reset(MultibyteStreamWriterObject
*self
)
1613 const Py_UNICODE
*pending
;
1616 pending
= self
->pending
;
1617 pwrt
= multibytecodec_encode(self
->codec
, &self
->state
,
1618 &pending
, self
->pendingsize
, self
->errors
,
1619 MBENC_FLUSH
| MBENC_RESET
);
1620 /* some pending buffer can be truncated when UnicodeEncodeError is
1621 * raised on 'strict' mode. but, 'reset' method is designed to
1622 * reset the pending buffer or states so failed string sequence
1623 * ought to be missed */
1624 self
->pendingsize
= 0;
1628 if (PyString_Size(pwrt
) > 0) {
1630 wr
= PyObject_CallMethod(self
->stream
, "write", "O", pwrt
);
1642 mbstreamwriter_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1644 MultibyteStreamWriterObject
*self
;
1645 PyObject
*stream
, *codec
= NULL
;
1646 char *errors
= NULL
;
1648 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|s:StreamWriter",
1649 streamkwarglist
, &stream
, &errors
))
1652 self
= (MultibyteStreamWriterObject
*)type
->tp_alloc(type
, 0);
1656 codec
= PyObject_GetAttrString((PyObject
*)type
, "codec");
1659 if (!MultibyteCodec_Check(codec
)) {
1660 PyErr_SetString(PyExc_TypeError
, "codec is unexpected type");
1664 self
->codec
= ((MultibyteCodecObject
*)codec
)->codec
;
1665 self
->stream
= stream
;
1667 self
->pendingsize
= 0;
1668 self
->errors
= internal_error_callback(errors
);
1669 if (self
->errors
== NULL
)
1671 if (self
->codec
->encinit
!= NULL
&&
1672 self
->codec
->encinit(&self
->state
, self
->codec
->config
) != 0)
1676 return (PyObject
*)self
;
1685 mbstreamwriter_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1691 mbstreamwriter_traverse(MultibyteStreamWriterObject
*self
,
1692 visitproc visit
, void *arg
)
1694 if (ERROR_ISCUSTOM(self
->errors
))
1695 Py_VISIT(self
->errors
);
1696 Py_VISIT(self
->stream
);
1701 mbstreamwriter_dealloc(MultibyteStreamWriterObject
*self
)
1703 PyObject_GC_UnTrack(self
);
1704 ERROR_DECREF(self
->errors
);
1705 Py_XDECREF(self
->stream
);
1706 Py_TYPE(self
)->tp_free(self
);
1709 static struct PyMethodDef mbstreamwriter_methods
[] = {
1710 {"write", (PyCFunction
)mbstreamwriter_write
,
1712 {"writelines", (PyCFunction
)mbstreamwriter_writelines
,
1714 {"reset", (PyCFunction
)mbstreamwriter_reset
,
1719 static PyMemberDef mbstreamwriter_members
[] = {
1720 {"stream", T_OBJECT
,
1721 offsetof(MultibyteStreamWriterObject
, stream
),
1726 static PyTypeObject MultibyteStreamWriter_Type
= {
1727 PyVarObject_HEAD_INIT(NULL
, 0)
1728 "MultibyteStreamWriter", /* tp_name */
1729 sizeof(MultibyteStreamWriterObject
), /* tp_basicsize */
1730 0, /* tp_itemsize */
1732 (destructor
)mbstreamwriter_dealloc
, /* tp_dealloc */
1738 0, /* tp_as_number */
1739 0, /* tp_as_sequence */
1740 0, /* tp_as_mapping */
1744 PyObject_GenericGetAttr
, /* tp_getattro */
1745 0, /* tp_setattro */
1746 0, /* tp_as_buffer */
1747 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
1748 | Py_TPFLAGS_BASETYPE
, /* tp_flags */
1750 (traverseproc
)mbstreamwriter_traverse
, /* tp_traverse */
1752 0, /* tp_richcompare */
1753 0, /* tp_weaklistoffset */
1756 mbstreamwriter_methods
, /* tp_methods */
1757 mbstreamwriter_members
, /* tp_members */
1758 codecctx_getsets
, /* tp_getset */
1761 0, /* tp_descr_get */
1762 0, /* tp_descr_set */
1763 0, /* tp_dictoffset */
1764 mbstreamwriter_init
, /* tp_init */
1766 mbstreamwriter_new
, /* tp_new */
1771 * Exposed factory function
1775 __create_codec(PyObject
*ignore
, PyObject
*arg
)
1777 MultibyteCodecObject
*self
;
1778 MultibyteCodec
*codec
;
1780 if (!PyCapsule_IsValid(arg
, PyMultibyteCodec_CAPSULE_NAME
)) {
1781 PyErr_SetString(PyExc_ValueError
, "argument type invalid");
1785 codec
= PyCapsule_GetPointer(arg
, PyMultibyteCodec_CAPSULE_NAME
);
1786 if (codec
->codecinit
!= NULL
&& codec
->codecinit(codec
->config
) != 0)
1789 self
= PyObject_New(MultibyteCodecObject
, &MultibyteCodec_Type
);
1792 self
->codec
= codec
;
1794 return (PyObject
*)self
;
1797 static struct PyMethodDef __methods
[] = {
1798 {"__create_codec", (PyCFunction
)__create_codec
, METH_O
},
1803 init_multibytecodec(void)
1807 PyTypeObject
*typelist
[] = {
1808 &MultibyteIncrementalEncoder_Type
,
1809 &MultibyteIncrementalDecoder_Type
,
1810 &MultibyteStreamReader_Type
,
1811 &MultibyteStreamWriter_Type
,
1815 if (PyType_Ready(&MultibyteCodec_Type
) < 0)
1818 m
= Py_InitModule("_multibytecodec", __methods
);
1822 for (i
= 0; typelist
[i
] != NULL
; i
++) {
1823 if (PyType_Ready(typelist
[i
]) < 0)
1825 Py_INCREF(typelist
[i
]);
1826 PyModule_AddObject(m
, typelist
[i
]->tp_name
,
1827 (PyObject
*)typelist
[i
]);
1830 if (PyErr_Occurred())
1831 Py_FatalError("can't initialize the _multibytecodec module");