]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | /*\r |
2 | * multibytecodec.c: Common Multibyte Codec Implementation\r | |
3 | *\r | |
4 | * Written by Hye-Shik Chang <perky@FreeBSD.org>\r | |
5 | */\r | |
6 | \r | |
7 | #define PY_SSIZE_T_CLEAN\r | |
8 | #include "Python.h"\r | |
9 | #include "structmember.h"\r | |
10 | #include "multibytecodec.h"\r | |
11 | \r | |
12 | typedef struct {\r | |
13 | const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;\r | |
14 | unsigned char *outbuf, *outbuf_end;\r | |
15 | PyObject *excobj, *outobj;\r | |
16 | } MultibyteEncodeBuffer;\r | |
17 | \r | |
18 | typedef struct {\r | |
19 | const unsigned char *inbuf, *inbuf_top, *inbuf_end;\r | |
20 | Py_UNICODE *outbuf, *outbuf_end;\r | |
21 | PyObject *excobj, *outobj;\r | |
22 | } MultibyteDecodeBuffer;\r | |
23 | \r | |
24 | PyDoc_STRVAR(MultibyteCodec_Encode__doc__,\r | |
25 | "I.encode(unicode[, errors]) -> (string, length consumed)\n\\r | |
26 | \n\\r | |
27 | Return an encoded string version of `unicode'. errors may be given to\n\\r | |
28 | set a different error handling scheme. Default is 'strict' meaning that\n\\r | |
29 | encoding errors raise a UnicodeEncodeError. Other possible values are\n\\r | |
30 | 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\\r | |
31 | registered with codecs.register_error that can handle UnicodeEncodeErrors.");\r | |
32 | \r | |
33 | PyDoc_STRVAR(MultibyteCodec_Decode__doc__,\r | |
34 | "I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\\r | |
35 | \n\\r | |
36 | Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\\r | |
37 | to set a different error handling scheme. Default is 'strict' meaning\n\\r | |
38 | that encoding errors raise a UnicodeDecodeError. Other possible values\n\\r | |
39 | are 'ignore' and 'replace' as well as any other name registered with\n\\r | |
40 | codecs.register_error that is able to handle UnicodeDecodeErrors.");\r | |
41 | \r | |
42 | static char *codeckwarglist[] = {"input", "errors", NULL};\r | |
43 | static char *incnewkwarglist[] = {"errors", NULL};\r | |
44 | static char *incrementalkwarglist[] = {"input", "final", NULL};\r | |
45 | static char *streamkwarglist[] = {"stream", "errors", NULL};\r | |
46 | \r | |
47 | static PyObject *multibytecodec_encode(MultibyteCodec *,\r | |
48 | MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,\r | |
49 | PyObject *, int);\r | |
50 | \r | |
51 | #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */\r | |
52 | \r | |
53 | static PyObject *\r | |
54 | make_tuple(PyObject *object, Py_ssize_t len)\r | |
55 | {\r | |
56 | PyObject *v, *w;\r | |
57 | \r | |
58 | if (object == NULL)\r | |
59 | return NULL;\r | |
60 | \r | |
61 | v = PyTuple_New(2);\r | |
62 | if (v == NULL) {\r | |
63 | Py_DECREF(object);\r | |
64 | return NULL;\r | |
65 | }\r | |
66 | PyTuple_SET_ITEM(v, 0, object);\r | |
67 | \r | |
68 | w = PyInt_FromSsize_t(len);\r | |
69 | if (w == NULL) {\r | |
70 | Py_DECREF(v);\r | |
71 | return NULL;\r | |
72 | }\r | |
73 | PyTuple_SET_ITEM(v, 1, w);\r | |
74 | \r | |
75 | return v;\r | |
76 | }\r | |
77 | \r | |
78 | static PyObject *\r | |
79 | internal_error_callback(const char *errors)\r | |
80 | {\r | |
81 | if (errors == NULL || strcmp(errors, "strict") == 0)\r | |
82 | return ERROR_STRICT;\r | |
83 | else if (strcmp(errors, "ignore") == 0)\r | |
84 | return ERROR_IGNORE;\r | |
85 | else if (strcmp(errors, "replace") == 0)\r | |
86 | return ERROR_REPLACE;\r | |
87 | else\r | |
88 | return PyString_FromString(errors);\r | |
89 | }\r | |
90 | \r | |
91 | static PyObject *\r | |
92 | call_error_callback(PyObject *errors, PyObject *exc)\r | |
93 | {\r | |
94 | PyObject *args, *cb, *r;\r | |
95 | \r | |
96 | assert(PyString_Check(errors));\r | |
97 | cb = PyCodec_LookupError(PyString_AS_STRING(errors));\r | |
98 | if (cb == NULL)\r | |
99 | return NULL;\r | |
100 | \r | |
101 | args = PyTuple_New(1);\r | |
102 | if (args == NULL) {\r | |
103 | Py_DECREF(cb);\r | |
104 | return NULL;\r | |
105 | }\r | |
106 | \r | |
107 | PyTuple_SET_ITEM(args, 0, exc);\r | |
108 | Py_INCREF(exc);\r | |
109 | \r | |
110 | r = PyObject_CallObject(cb, args);\r | |
111 | Py_DECREF(args);\r | |
112 | Py_DECREF(cb);\r | |
113 | return r;\r | |
114 | }\r | |
115 | \r | |
116 | static PyObject *\r | |
117 | codecctx_errors_get(MultibyteStatefulCodecContext *self)\r | |
118 | {\r | |
119 | const char *errors;\r | |
120 | \r | |
121 | if (self->errors == ERROR_STRICT)\r | |
122 | errors = "strict";\r | |
123 | else if (self->errors == ERROR_IGNORE)\r | |
124 | errors = "ignore";\r | |
125 | else if (self->errors == ERROR_REPLACE)\r | |
126 | errors = "replace";\r | |
127 | else {\r | |
128 | Py_INCREF(self->errors);\r | |
129 | return self->errors;\r | |
130 | }\r | |
131 | \r | |
132 | return PyString_FromString(errors);\r | |
133 | }\r | |
134 | \r | |
135 | static int\r | |
136 | codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,\r | |
137 | void *closure)\r | |
138 | {\r | |
139 | PyObject *cb;\r | |
140 | \r | |
141 | if (!PyString_Check(value)) {\r | |
142 | PyErr_SetString(PyExc_TypeError, "errors must be a string");\r | |
143 | return -1;\r | |
144 | }\r | |
145 | \r | |
146 | cb = internal_error_callback(PyString_AS_STRING(value));\r | |
147 | if (cb == NULL)\r | |
148 | return -1;\r | |
149 | \r | |
150 | ERROR_DECREF(self->errors);\r | |
151 | self->errors = cb;\r | |
152 | return 0;\r | |
153 | }\r | |
154 | \r | |
155 | /* This getset handlers list is used by all the stateful codec objects */\r | |
156 | static PyGetSetDef codecctx_getsets[] = {\r | |
157 | {"errors", (getter)codecctx_errors_get,\r | |
158 | (setter)codecctx_errors_set,\r | |
159 | PyDoc_STR("how to treat errors")},\r | |
160 | {NULL,}\r | |
161 | };\r | |
162 | \r | |
163 | static int\r | |
164 | expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)\r | |
165 | {\r | |
166 | Py_ssize_t orgpos, orgsize, incsize;\r | |
167 | \r | |
168 | orgpos = (Py_ssize_t)((char *)buf->outbuf -\r | |
169 | PyString_AS_STRING(buf->outobj));\r | |
170 | orgsize = PyString_GET_SIZE(buf->outobj);\r | |
171 | incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);\r | |
172 | \r | |
173 | if (orgsize > PY_SSIZE_T_MAX - incsize)\r | |
174 | return -1;\r | |
175 | \r | |
176 | if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1)\r | |
177 | return -1;\r | |
178 | \r | |
179 | buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;\r | |
180 | buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)\r | |
181 | + PyString_GET_SIZE(buf->outobj);\r | |
182 | \r | |
183 | return 0;\r | |
184 | }\r | |
185 | #define REQUIRE_ENCODEBUFFER(buf, s) { \\r | |
186 | if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \\r | |
187 | if (expand_encodebuffer(buf, s) == -1) \\r | |
188 | goto errorexit; \\r | |
189 | }\r | |
190 | \r | |
191 | static int\r | |
192 | expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)\r | |
193 | {\r | |
194 | Py_ssize_t orgpos, orgsize;\r | |
195 | \r | |
196 | orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));\r | |
197 | orgsize = PyUnicode_GET_SIZE(buf->outobj);\r | |
198 | if (PyUnicode_Resize(&buf->outobj, orgsize + (\r | |
199 | esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)\r | |
200 | return -1;\r | |
201 | \r | |
202 | buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;\r | |
203 | buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)\r | |
204 | + PyUnicode_GET_SIZE(buf->outobj);\r | |
205 | \r | |
206 | return 0;\r | |
207 | }\r | |
208 | #define REQUIRE_DECODEBUFFER(buf, s) { \\r | |
209 | if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \\r | |
210 | if (expand_decodebuffer(buf, s) == -1) \\r | |
211 | goto errorexit; \\r | |
212 | }\r | |
213 | \r | |
214 | \r | |
215 | /**\r | |
216 | * MultibyteCodec object\r | |
217 | */\r | |
218 | \r | |
219 | static int\r | |
220 | multibytecodec_encerror(MultibyteCodec *codec,\r | |
221 | MultibyteCodec_State *state,\r | |
222 | MultibyteEncodeBuffer *buf,\r | |
223 | PyObject *errors, Py_ssize_t e)\r | |
224 | {\r | |
225 | PyObject *retobj = NULL, *retstr = NULL, *tobj;\r | |
226 | Py_ssize_t retstrsize, newpos;\r | |
227 | Py_ssize_t esize, start, end;\r | |
228 | const char *reason;\r | |
229 | \r | |
230 | if (e > 0) {\r | |
231 | reason = "illegal multibyte sequence";\r | |
232 | esize = e;\r | |
233 | }\r | |
234 | else {\r | |
235 | switch (e) {\r | |
236 | case MBERR_TOOSMALL:\r | |
237 | REQUIRE_ENCODEBUFFER(buf, -1);\r | |
238 | return 0; /* retry it */\r | |
239 | case MBERR_TOOFEW:\r | |
240 | reason = "incomplete multibyte sequence";\r | |
241 | esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);\r | |
242 | break;\r | |
243 | case MBERR_INTERNAL:\r | |
244 | PyErr_SetString(PyExc_RuntimeError,\r | |
245 | "internal codec error");\r | |
246 | return -1;\r | |
247 | default:\r | |
248 | PyErr_SetString(PyExc_RuntimeError,\r | |
249 | "unknown runtime error");\r | |
250 | return -1;\r | |
251 | }\r | |
252 | }\r | |
253 | \r | |
254 | if (errors == ERROR_REPLACE) {\r | |
255 | const Py_UNICODE replchar = '?', *inbuf = &replchar;\r | |
256 | Py_ssize_t r;\r | |
257 | \r | |
258 | for (;;) {\r | |
259 | Py_ssize_t outleft;\r | |
260 | \r | |
261 | outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);\r | |
262 | r = codec->encode(state, codec->config, &inbuf, 1,\r | |
263 | &buf->outbuf, outleft, 0);\r | |
264 | if (r == MBERR_TOOSMALL) {\r | |
265 | REQUIRE_ENCODEBUFFER(buf, -1);\r | |
266 | continue;\r | |
267 | }\r | |
268 | else\r | |
269 | break;\r | |
270 | }\r | |
271 | \r | |
272 | if (r != 0) {\r | |
273 | REQUIRE_ENCODEBUFFER(buf, 1);\r | |
274 | *buf->outbuf++ = '?';\r | |
275 | }\r | |
276 | }\r | |
277 | if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {\r | |
278 | buf->inbuf += esize;\r | |
279 | return 0;\r | |
280 | }\r | |
281 | \r | |
282 | start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);\r | |
283 | end = start + esize;\r | |
284 | \r | |
285 | /* use cached exception object if available */\r | |
286 | if (buf->excobj == NULL) {\r | |
287 | buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,\r | |
288 | buf->inbuf_top,\r | |
289 | buf->inbuf_end - buf->inbuf_top,\r | |
290 | start, end, reason);\r | |
291 | if (buf->excobj == NULL)\r | |
292 | goto errorexit;\r | |
293 | }\r | |
294 | else\r | |
295 | if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||\r | |
296 | PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||\r | |
297 | PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)\r | |
298 | goto errorexit;\r | |
299 | \r | |
300 | if (errors == ERROR_STRICT) {\r | |
301 | PyCodec_StrictErrors(buf->excobj);\r | |
302 | goto errorexit;\r | |
303 | }\r | |
304 | \r | |
305 | retobj = call_error_callback(errors, buf->excobj);\r | |
306 | if (retobj == NULL)\r | |
307 | goto errorexit;\r | |
308 | \r | |
309 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||\r | |
310 | !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||\r | |
311 | !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) ||\r | |
312 | PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) {\r | |
313 | PyErr_SetString(PyExc_TypeError,\r | |
314 | "encoding error handler must return "\r | |
315 | "(unicode, int) tuple");\r | |
316 | goto errorexit;\r | |
317 | }\r | |
318 | \r | |
319 | {\r | |
320 | const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);\r | |
321 | \r | |
322 | retstr = multibytecodec_encode(codec, state, &uraw,\r | |
323 | PyUnicode_GET_SIZE(tobj), ERROR_STRICT,\r | |
324 | MBENC_FLUSH);\r | |
325 | if (retstr == NULL)\r | |
326 | goto errorexit;\r | |
327 | }\r | |
328 | \r | |
329 | retstrsize = PyString_GET_SIZE(retstr);\r | |
330 | REQUIRE_ENCODEBUFFER(buf, retstrsize);\r | |
331 | \r | |
332 | memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);\r | |
333 | buf->outbuf += retstrsize;\r | |
334 | \r | |
335 | newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));\r | |
336 | if (newpos < 0 && !PyErr_Occurred())\r | |
337 | newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);\r | |
338 | if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {\r | |
339 | PyErr_Clear();\r | |
340 | PyErr_Format(PyExc_IndexError,\r | |
341 | "position %zd from error handler out of bounds",\r | |
342 | newpos);\r | |
343 | goto errorexit;\r | |
344 | }\r | |
345 | buf->inbuf = buf->inbuf_top + newpos;\r | |
346 | \r | |
347 | Py_DECREF(retobj);\r | |
348 | Py_DECREF(retstr);\r | |
349 | return 0;\r | |
350 | \r | |
351 | errorexit:\r | |
352 | Py_XDECREF(retobj);\r | |
353 | Py_XDECREF(retstr);\r | |
354 | return -1;\r | |
355 | }\r | |
356 | \r | |
357 | static int\r | |
358 | multibytecodec_decerror(MultibyteCodec *codec,\r | |
359 | MultibyteCodec_State *state,\r | |
360 | MultibyteDecodeBuffer *buf,\r | |
361 | PyObject *errors, Py_ssize_t e)\r | |
362 | {\r | |
363 | PyObject *retobj = NULL, *retuni = NULL;\r | |
364 | Py_ssize_t retunisize, newpos;\r | |
365 | const char *reason;\r | |
366 | Py_ssize_t esize, start, end;\r | |
367 | \r | |
368 | if (e > 0) {\r | |
369 | reason = "illegal multibyte sequence";\r | |
370 | esize = e;\r | |
371 | }\r | |
372 | else {\r | |
373 | switch (e) {\r | |
374 | case MBERR_TOOSMALL:\r | |
375 | REQUIRE_DECODEBUFFER(buf, -1);\r | |
376 | return 0; /* retry it */\r | |
377 | case MBERR_TOOFEW:\r | |
378 | reason = "incomplete multibyte sequence";\r | |
379 | esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);\r | |
380 | break;\r | |
381 | case MBERR_INTERNAL:\r | |
382 | PyErr_SetString(PyExc_RuntimeError,\r | |
383 | "internal codec error");\r | |
384 | return -1;\r | |
385 | default:\r | |
386 | PyErr_SetString(PyExc_RuntimeError,\r | |
387 | "unknown runtime error");\r | |
388 | return -1;\r | |
389 | }\r | |
390 | }\r | |
391 | \r | |
392 | if (errors == ERROR_REPLACE) {\r | |
393 | REQUIRE_DECODEBUFFER(buf, 1);\r | |
394 | *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;\r | |
395 | }\r | |
396 | if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {\r | |
397 | buf->inbuf += esize;\r | |
398 | return 0;\r | |
399 | }\r | |
400 | \r | |
401 | start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);\r | |
402 | end = start + esize;\r | |
403 | \r | |
404 | /* use cached exception object if available */\r | |
405 | if (buf->excobj == NULL) {\r | |
406 | buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,\r | |
407 | (const char *)buf->inbuf_top,\r | |
408 | (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),\r | |
409 | start, end, reason);\r | |
410 | if (buf->excobj == NULL)\r | |
411 | goto errorexit;\r | |
412 | }\r | |
413 | else\r | |
414 | if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||\r | |
415 | PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||\r | |
416 | PyUnicodeDecodeError_SetReason(buf->excobj, reason))\r | |
417 | goto errorexit;\r | |
418 | \r | |
419 | if (errors == ERROR_STRICT) {\r | |
420 | PyCodec_StrictErrors(buf->excobj);\r | |
421 | goto errorexit;\r | |
422 | }\r | |
423 | \r | |
424 | retobj = call_error_callback(errors, buf->excobj);\r | |
425 | if (retobj == NULL)\r | |
426 | goto errorexit;\r | |
427 | \r | |
428 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||\r | |
429 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||\r | |
430 | !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) ||\r | |
431 | PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) {\r | |
432 | PyErr_SetString(PyExc_TypeError,\r | |
433 | "decoding error handler must return "\r | |
434 | "(unicode, int) tuple");\r | |
435 | goto errorexit;\r | |
436 | }\r | |
437 | \r | |
438 | retunisize = PyUnicode_GET_SIZE(retuni);\r | |
439 | if (retunisize > 0) {\r | |
440 | REQUIRE_DECODEBUFFER(buf, retunisize);\r | |
441 | memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),\r | |
442 | retunisize * Py_UNICODE_SIZE);\r | |
443 | buf->outbuf += retunisize;\r | |
444 | }\r | |
445 | \r | |
446 | newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));\r | |
447 | if (newpos < 0 && !PyErr_Occurred())\r | |
448 | newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);\r | |
449 | if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {\r | |
450 | PyErr_Clear();\r | |
451 | PyErr_Format(PyExc_IndexError,\r | |
452 | "position %zd from error handler out of bounds",\r | |
453 | newpos);\r | |
454 | goto errorexit;\r | |
455 | }\r | |
456 | buf->inbuf = buf->inbuf_top + newpos;\r | |
457 | Py_DECREF(retobj);\r | |
458 | return 0;\r | |
459 | \r | |
460 | errorexit:\r | |
461 | Py_XDECREF(retobj);\r | |
462 | return -1;\r | |
463 | }\r | |
464 | \r | |
465 | static PyObject *\r | |
466 | multibytecodec_encode(MultibyteCodec *codec,\r | |
467 | MultibyteCodec_State *state,\r | |
468 | const Py_UNICODE **data, Py_ssize_t datalen,\r | |
469 | PyObject *errors, int flags)\r | |
470 | {\r | |
471 | MultibyteEncodeBuffer buf;\r | |
472 | Py_ssize_t finalsize, r = 0;\r | |
473 | \r | |
474 | if (datalen == 0 && !(flags & MBENC_RESET))\r | |
475 | return PyString_FromString("");\r | |
476 | \r | |
477 | buf.excobj = NULL;\r | |
478 | buf.inbuf = buf.inbuf_top = *data;\r | |
479 | buf.inbuf_end = buf.inbuf_top + datalen;\r | |
480 | \r | |
481 | if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {\r | |
482 | PyErr_NoMemory();\r | |
483 | goto errorexit;\r | |
484 | }\r | |
485 | \r | |
486 | buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);\r | |
487 | if (buf.outobj == NULL)\r | |
488 | goto errorexit;\r | |
489 | buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);\r | |
490 | buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);\r | |
491 | \r | |
492 | while (buf.inbuf < buf.inbuf_end) {\r | |
493 | Py_ssize_t inleft, outleft;\r | |
494 | \r | |
495 | /* we don't reuse inleft and outleft here.\r | |
496 | * error callbacks can relocate the cursor anywhere on buffer*/\r | |
497 | inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);\r | |
498 | outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);\r | |
499 | r = codec->encode(state, codec->config, &buf.inbuf, inleft,\r | |
500 | &buf.outbuf, outleft, flags);\r | |
501 | if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))\r | |
502 | break;\r | |
503 | else if (multibytecodec_encerror(codec, state, &buf, errors,r))\r | |
504 | goto errorexit;\r | |
505 | else if (r == MBERR_TOOFEW)\r | |
506 | break;\r | |
507 | }\r | |
508 | \r | |
509 | if (codec->encreset != NULL && (flags & MBENC_RESET))\r | |
510 | for (;;) {\r | |
511 | Py_ssize_t outleft;\r | |
512 | \r | |
513 | outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);\r | |
514 | r = codec->encreset(state, codec->config, &buf.outbuf,\r | |
515 | outleft);\r | |
516 | if (r == 0)\r | |
517 | break;\r | |
518 | else if (multibytecodec_encerror(codec, state,\r | |
519 | &buf, errors, r))\r | |
520 | goto errorexit;\r | |
521 | }\r | |
522 | \r | |
523 | finalsize = (Py_ssize_t)((char *)buf.outbuf -\r | |
524 | PyString_AS_STRING(buf.outobj));\r | |
525 | \r | |
526 | if (finalsize != PyString_GET_SIZE(buf.outobj))\r | |
527 | if (_PyString_Resize(&buf.outobj, finalsize) == -1)\r | |
528 | goto errorexit;\r | |
529 | \r | |
530 | *data = buf.inbuf;\r | |
531 | Py_XDECREF(buf.excobj);\r | |
532 | return buf.outobj;\r | |
533 | \r | |
534 | errorexit:\r | |
535 | Py_XDECREF(buf.excobj);\r | |
536 | Py_XDECREF(buf.outobj);\r | |
537 | return NULL;\r | |
538 | }\r | |
539 | \r | |
540 | static PyObject *\r | |
541 | MultibyteCodec_Encode(MultibyteCodecObject *self,\r | |
542 | PyObject *args, PyObject *kwargs)\r | |
543 | {\r | |
544 | MultibyteCodec_State state;\r | |
545 | Py_UNICODE *data;\r | |
546 | PyObject *errorcb, *r, *arg, *ucvt;\r | |
547 | const char *errors = NULL;\r | |
548 | Py_ssize_t datalen;\r | |
549 | \r | |
550 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",\r | |
551 | codeckwarglist, &arg, &errors))\r | |
552 | return NULL;\r | |
553 | \r | |
554 | if (PyUnicode_Check(arg))\r | |
555 | ucvt = NULL;\r | |
556 | else {\r | |
557 | arg = ucvt = PyObject_Unicode(arg);\r | |
558 | if (arg == NULL)\r | |
559 | return NULL;\r | |
560 | else if (!PyUnicode_Check(arg)) {\r | |
561 | PyErr_SetString(PyExc_TypeError,\r | |
562 | "couldn't convert the object to unicode.");\r | |
563 | Py_DECREF(ucvt);\r | |
564 | return NULL;\r | |
565 | }\r | |
566 | }\r | |
567 | \r | |
568 | data = PyUnicode_AS_UNICODE(arg);\r | |
569 | datalen = PyUnicode_GET_SIZE(arg);\r | |
570 | \r | |
571 | errorcb = internal_error_callback(errors);\r | |
572 | if (errorcb == NULL) {\r | |
573 | Py_XDECREF(ucvt);\r | |
574 | return NULL;\r | |
575 | }\r | |
576 | \r | |
577 | if (self->codec->encinit != NULL &&\r | |
578 | self->codec->encinit(&state, self->codec->config) != 0)\r | |
579 | goto errorexit;\r | |
580 | r = multibytecodec_encode(self->codec, &state,\r | |
581 | (const Py_UNICODE **)&data, datalen, errorcb,\r | |
582 | MBENC_FLUSH | MBENC_RESET);\r | |
583 | if (r == NULL)\r | |
584 | goto errorexit;\r | |
585 | \r | |
586 | ERROR_DECREF(errorcb);\r | |
587 | Py_XDECREF(ucvt);\r | |
588 | return make_tuple(r, datalen);\r | |
589 | \r | |
590 | errorexit:\r | |
591 | ERROR_DECREF(errorcb);\r | |
592 | Py_XDECREF(ucvt);\r | |
593 | return NULL;\r | |
594 | }\r | |
595 | \r | |
596 | static PyObject *\r | |
597 | MultibyteCodec_Decode(MultibyteCodecObject *self,\r | |
598 | PyObject *args, PyObject *kwargs)\r | |
599 | {\r | |
600 | MultibyteCodec_State state;\r | |
601 | MultibyteDecodeBuffer buf;\r | |
602 | PyObject *errorcb;\r | |
603 | Py_buffer pdata;\r | |
604 | const char *data, *errors = NULL;\r | |
605 | Py_ssize_t datalen, finalsize;\r | |
606 | \r | |
607 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode",\r | |
608 | codeckwarglist, &pdata, &errors))\r | |
609 | return NULL;\r | |
610 | data = pdata.buf;\r | |
611 | datalen = pdata.len;\r | |
612 | \r | |
613 | errorcb = internal_error_callback(errors);\r | |
614 | if (errorcb == NULL) {\r | |
615 | PyBuffer_Release(&pdata);\r | |
616 | return NULL;\r | |
617 | }\r | |
618 | \r | |
619 | if (datalen == 0) {\r | |
620 | PyBuffer_Release(&pdata);\r | |
621 | ERROR_DECREF(errorcb);\r | |
622 | return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);\r | |
623 | }\r | |
624 | \r | |
625 | buf.excobj = NULL;\r | |
626 | buf.inbuf = buf.inbuf_top = (unsigned char *)data;\r | |
627 | buf.inbuf_end = buf.inbuf_top + datalen;\r | |
628 | buf.outobj = PyUnicode_FromUnicode(NULL, datalen);\r | |
629 | if (buf.outobj == NULL)\r | |
630 | goto errorexit;\r | |
631 | buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);\r | |
632 | buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);\r | |
633 | \r | |
634 | if (self->codec->decinit != NULL &&\r | |
635 | self->codec->decinit(&state, self->codec->config) != 0)\r | |
636 | goto errorexit;\r | |
637 | \r | |
638 | while (buf.inbuf < buf.inbuf_end) {\r | |
639 | Py_ssize_t inleft, outleft, r;\r | |
640 | \r | |
641 | inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);\r | |
642 | outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);\r | |
643 | \r | |
644 | r = self->codec->decode(&state, self->codec->config,\r | |
645 | &buf.inbuf, inleft, &buf.outbuf, outleft);\r | |
646 | if (r == 0)\r | |
647 | break;\r | |
648 | else if (multibytecodec_decerror(self->codec, &state,\r | |
649 | &buf, errorcb, r))\r | |
650 | goto errorexit;\r | |
651 | }\r | |
652 | \r | |
653 | finalsize = (Py_ssize_t)(buf.outbuf -\r | |
654 | PyUnicode_AS_UNICODE(buf.outobj));\r | |
655 | \r | |
656 | if (finalsize != PyUnicode_GET_SIZE(buf.outobj))\r | |
657 | if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)\r | |
658 | goto errorexit;\r | |
659 | \r | |
660 | PyBuffer_Release(&pdata);\r | |
661 | Py_XDECREF(buf.excobj);\r | |
662 | ERROR_DECREF(errorcb);\r | |
663 | return make_tuple(buf.outobj, datalen);\r | |
664 | \r | |
665 | errorexit:\r | |
666 | PyBuffer_Release(&pdata);\r | |
667 | ERROR_DECREF(errorcb);\r | |
668 | Py_XDECREF(buf.excobj);\r | |
669 | Py_XDECREF(buf.outobj);\r | |
670 | \r | |
671 | return NULL;\r | |
672 | }\r | |
673 | \r | |
674 | static struct PyMethodDef multibytecodec_methods[] = {\r | |
675 | {"encode", (PyCFunction)MultibyteCodec_Encode,\r | |
676 | METH_VARARGS | METH_KEYWORDS,\r | |
677 | MultibyteCodec_Encode__doc__},\r | |
678 | {"decode", (PyCFunction)MultibyteCodec_Decode,\r | |
679 | METH_VARARGS | METH_KEYWORDS,\r | |
680 | MultibyteCodec_Decode__doc__},\r | |
681 | {NULL, NULL},\r | |
682 | };\r | |
683 | \r | |
684 | static void\r | |
685 | multibytecodec_dealloc(MultibyteCodecObject *self)\r | |
686 | {\r | |
687 | PyObject_Del(self);\r | |
688 | }\r | |
689 | \r | |
690 | static PyTypeObject MultibyteCodec_Type = {\r | |
691 | PyVarObject_HEAD_INIT(NULL, 0)\r | |
692 | "MultibyteCodec", /* tp_name */\r | |
693 | sizeof(MultibyteCodecObject), /* tp_basicsize */\r | |
694 | 0, /* tp_itemsize */\r | |
695 | /* methods */\r | |
696 | (destructor)multibytecodec_dealloc, /* tp_dealloc */\r | |
697 | 0, /* tp_print */\r | |
698 | 0, /* tp_getattr */\r | |
699 | 0, /* tp_setattr */\r | |
700 | 0, /* tp_compare */\r | |
701 | 0, /* tp_repr */\r | |
702 | 0, /* tp_as_number */\r | |
703 | 0, /* tp_as_sequence */\r | |
704 | 0, /* tp_as_mapping */\r | |
705 | 0, /* tp_hash */\r | |
706 | 0, /* tp_call */\r | |
707 | 0, /* tp_str */\r | |
708 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
709 | 0, /* tp_setattro */\r | |
710 | 0, /* tp_as_buffer */\r | |
711 | Py_TPFLAGS_DEFAULT, /* tp_flags */\r | |
712 | 0, /* tp_doc */\r | |
713 | 0, /* tp_traverse */\r | |
714 | 0, /* tp_clear */\r | |
715 | 0, /* tp_richcompare */\r | |
716 | 0, /* tp_weaklistoffset */\r | |
717 | 0, /* tp_iter */\r | |
718 | 0, /* tp_iterext */\r | |
719 | multibytecodec_methods, /* tp_methods */\r | |
720 | };\r | |
721 | \r | |
722 | \r | |
723 | /**\r | |
724 | * Utility functions for stateful codec mechanism\r | |
725 | */\r | |
726 | \r | |
727 | #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))\r | |
728 | #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))\r | |
729 | \r | |
730 | static PyObject *\r | |
731 | encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,\r | |
732 | PyObject *unistr, int final)\r | |
733 | {\r | |
734 | PyObject *ucvt, *r = NULL;\r | |
735 | Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;\r | |
736 | Py_ssize_t datalen, origpending;\r | |
737 | \r | |
738 | if (PyUnicode_Check(unistr))\r | |
739 | ucvt = NULL;\r | |
740 | else {\r | |
741 | unistr = ucvt = PyObject_Unicode(unistr);\r | |
742 | if (unistr == NULL)\r | |
743 | return NULL;\r | |
744 | else if (!PyUnicode_Check(unistr)) {\r | |
745 | PyErr_SetString(PyExc_TypeError,\r | |
746 | "couldn't convert the object to unicode.");\r | |
747 | Py_DECREF(ucvt);\r | |
748 | return NULL;\r | |
749 | }\r | |
750 | }\r | |
751 | \r | |
752 | datalen = PyUnicode_GET_SIZE(unistr);\r | |
753 | origpending = ctx->pendingsize;\r | |
754 | \r | |
755 | if (origpending > 0) {\r | |
756 | if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) {\r | |
757 | PyErr_NoMemory();\r | |
758 | /* inbuf_tmp == NULL */\r | |
759 | goto errorexit;\r | |
760 | }\r | |
761 | inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);\r | |
762 | if (inbuf_tmp == NULL)\r | |
763 | goto errorexit;\r | |
764 | memcpy(inbuf_tmp, ctx->pending,\r | |
765 | Py_UNICODE_SIZE * ctx->pendingsize);\r | |
766 | memcpy(inbuf_tmp + ctx->pendingsize,\r | |
767 | PyUnicode_AS_UNICODE(unistr),\r | |
768 | Py_UNICODE_SIZE * datalen);\r | |
769 | datalen += ctx->pendingsize;\r | |
770 | ctx->pendingsize = 0;\r | |
771 | inbuf = inbuf_tmp;\r | |
772 | }\r | |
773 | else\r | |
774 | inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);\r | |
775 | \r | |
776 | inbuf_end = inbuf + datalen;\r | |
777 | \r | |
778 | r = multibytecodec_encode(ctx->codec, &ctx->state,\r | |
779 | (const Py_UNICODE **)&inbuf, datalen,\r | |
780 | ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);\r | |
781 | if (r == NULL) {\r | |
782 | /* recover the original pending buffer */\r | |
783 | if (origpending > 0)\r | |
784 | memcpy(ctx->pending, inbuf_tmp,\r | |
785 | Py_UNICODE_SIZE * origpending);\r | |
786 | ctx->pendingsize = origpending;\r | |
787 | goto errorexit;\r | |
788 | }\r | |
789 | \r | |
790 | if (inbuf < inbuf_end) {\r | |
791 | ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);\r | |
792 | if (ctx->pendingsize > MAXENCPENDING) {\r | |
793 | /* normal codecs can't reach here */\r | |
794 | ctx->pendingsize = 0;\r | |
795 | PyErr_SetString(PyExc_UnicodeError,\r | |
796 | "pending buffer overflow");\r | |
797 | goto errorexit;\r | |
798 | }\r | |
799 | memcpy(ctx->pending, inbuf,\r | |
800 | ctx->pendingsize * Py_UNICODE_SIZE);\r | |
801 | }\r | |
802 | \r | |
803 | if (inbuf_tmp != NULL)\r | |
804 | PyMem_Del(inbuf_tmp);\r | |
805 | Py_XDECREF(ucvt);\r | |
806 | return r;\r | |
807 | \r | |
808 | errorexit:\r | |
809 | if (inbuf_tmp != NULL)\r | |
810 | PyMem_Del(inbuf_tmp);\r | |
811 | Py_XDECREF(r);\r | |
812 | Py_XDECREF(ucvt);\r | |
813 | return NULL;\r | |
814 | }\r | |
815 | \r | |
816 | static int\r | |
817 | decoder_append_pending(MultibyteStatefulDecoderContext *ctx,\r | |
818 | MultibyteDecodeBuffer *buf)\r | |
819 | {\r | |
820 | Py_ssize_t npendings;\r | |
821 | \r | |
822 | npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);\r | |
823 | if (npendings + ctx->pendingsize > MAXDECPENDING ||\r | |
824 | npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {\r | |
825 | PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");\r | |
826 | return -1;\r | |
827 | }\r | |
828 | memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);\r | |
829 | ctx->pendingsize += npendings;\r | |
830 | return 0;\r | |
831 | }\r | |
832 | \r | |
833 | static int\r | |
834 | decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,\r | |
835 | Py_ssize_t size)\r | |
836 | {\r | |
837 | buf->inbuf = buf->inbuf_top = (const unsigned char *)data;\r | |
838 | buf->inbuf_end = buf->inbuf_top + size;\r | |
839 | if (buf->outobj == NULL) { /* only if outobj is not allocated yet */\r | |
840 | buf->outobj = PyUnicode_FromUnicode(NULL, size);\r | |
841 | if (buf->outobj == NULL)\r | |
842 | return -1;\r | |
843 | buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);\r | |
844 | buf->outbuf_end = buf->outbuf +\r | |
845 | PyUnicode_GET_SIZE(buf->outobj);\r | |
846 | }\r | |
847 | \r | |
848 | return 0;\r | |
849 | }\r | |
850 | \r | |
851 | static int\r | |
852 | decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,\r | |
853 | MultibyteDecodeBuffer *buf)\r | |
854 | {\r | |
855 | while (buf->inbuf < buf->inbuf_end) {\r | |
856 | Py_ssize_t inleft, outleft;\r | |
857 | Py_ssize_t r;\r | |
858 | \r | |
859 | inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);\r | |
860 | outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);\r | |
861 | \r | |
862 | r = ctx->codec->decode(&ctx->state, ctx->codec->config,\r | |
863 | &buf->inbuf, inleft, &buf->outbuf, outleft);\r | |
864 | if (r == 0 || r == MBERR_TOOFEW)\r | |
865 | break;\r | |
866 | else if (multibytecodec_decerror(ctx->codec, &ctx->state,\r | |
867 | buf, ctx->errors, r))\r | |
868 | return -1;\r | |
869 | }\r | |
870 | return 0;\r | |
871 | }\r | |
872 | \r | |
873 | \r | |
874 | /**\r | |
875 | * MultibyteIncrementalEncoder object\r | |
876 | */\r | |
877 | \r | |
878 | static PyObject *\r | |
879 | mbiencoder_encode(MultibyteIncrementalEncoderObject *self,\r | |
880 | PyObject *args, PyObject *kwargs)\r | |
881 | {\r | |
882 | PyObject *data;\r | |
883 | int final = 0;\r | |
884 | \r | |
885 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",\r | |
886 | incrementalkwarglist, &data, &final))\r | |
887 | return NULL;\r | |
888 | \r | |
889 | return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);\r | |
890 | }\r | |
891 | \r | |
892 | static PyObject *\r | |
893 | mbiencoder_reset(MultibyteIncrementalEncoderObject *self)\r | |
894 | {\r | |
895 | if (self->codec->decreset != NULL &&\r | |
896 | self->codec->decreset(&self->state, self->codec->config) != 0)\r | |
897 | return NULL;\r | |
898 | self->pendingsize = 0;\r | |
899 | \r | |
900 | Py_RETURN_NONE;\r | |
901 | }\r | |
902 | \r | |
903 | static struct PyMethodDef mbiencoder_methods[] = {\r | |
904 | {"encode", (PyCFunction)mbiencoder_encode,\r | |
905 | METH_VARARGS | METH_KEYWORDS, NULL},\r | |
906 | {"reset", (PyCFunction)mbiencoder_reset,\r | |
907 | METH_NOARGS, NULL},\r | |
908 | {NULL, NULL},\r | |
909 | };\r | |
910 | \r | |
911 | static PyObject *\r | |
912 | mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
913 | {\r | |
914 | MultibyteIncrementalEncoderObject *self;\r | |
915 | PyObject *codec = NULL;\r | |
916 | char *errors = NULL;\r | |
917 | \r | |
918 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",\r | |
919 | incnewkwarglist, &errors))\r | |
920 | return NULL;\r | |
921 | \r | |
922 | self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);\r | |
923 | if (self == NULL)\r | |
924 | return NULL;\r | |
925 | \r | |
926 | codec = PyObject_GetAttrString((PyObject *)type, "codec");\r | |
927 | if (codec == NULL)\r | |
928 | goto errorexit;\r | |
929 | if (!MultibyteCodec_Check(codec)) {\r | |
930 | PyErr_SetString(PyExc_TypeError, "codec is unexpected type");\r | |
931 | goto errorexit;\r | |
932 | }\r | |
933 | \r | |
934 | self->codec = ((MultibyteCodecObject *)codec)->codec;\r | |
935 | self->pendingsize = 0;\r | |
936 | self->errors = internal_error_callback(errors);\r | |
937 | if (self->errors == NULL)\r | |
938 | goto errorexit;\r | |
939 | if (self->codec->encinit != NULL &&\r | |
940 | self->codec->encinit(&self->state, self->codec->config) != 0)\r | |
941 | goto errorexit;\r | |
942 | \r | |
943 | Py_DECREF(codec);\r | |
944 | return (PyObject *)self;\r | |
945 | \r | |
946 | errorexit:\r | |
947 | Py_XDECREF(self);\r | |
948 | Py_XDECREF(codec);\r | |
949 | return NULL;\r | |
950 | }\r | |
951 | \r | |
952 | static int\r | |
953 | mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)\r | |
954 | {\r | |
955 | return 0;\r | |
956 | }\r | |
957 | \r | |
958 | static int\r | |
959 | mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,\r | |
960 | visitproc visit, void *arg)\r | |
961 | {\r | |
962 | if (ERROR_ISCUSTOM(self->errors))\r | |
963 | Py_VISIT(self->errors);\r | |
964 | return 0;\r | |
965 | }\r | |
966 | \r | |
967 | static void\r | |
968 | mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)\r | |
969 | {\r | |
970 | PyObject_GC_UnTrack(self);\r | |
971 | ERROR_DECREF(self->errors);\r | |
972 | Py_TYPE(self)->tp_free(self);\r | |
973 | }\r | |
974 | \r | |
975 | static PyTypeObject MultibyteIncrementalEncoder_Type = {\r | |
976 | PyVarObject_HEAD_INIT(NULL, 0)\r | |
977 | "MultibyteIncrementalEncoder", /* tp_name */\r | |
978 | sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */\r | |
979 | 0, /* tp_itemsize */\r | |
980 | /* methods */\r | |
981 | (destructor)mbiencoder_dealloc, /* tp_dealloc */\r | |
982 | 0, /* tp_print */\r | |
983 | 0, /* tp_getattr */\r | |
984 | 0, /* tp_setattr */\r | |
985 | 0, /* tp_compare */\r | |
986 | 0, /* tp_repr */\r | |
987 | 0, /* tp_as_number */\r | |
988 | 0, /* tp_as_sequence */\r | |
989 | 0, /* tp_as_mapping */\r | |
990 | 0, /* tp_hash */\r | |
991 | 0, /* tp_call */\r | |
992 | 0, /* tp_str */\r | |
993 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
994 | 0, /* tp_setattro */\r | |
995 | 0, /* tp_as_buffer */\r | |
996 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC\r | |
997 | | Py_TPFLAGS_BASETYPE, /* tp_flags */\r | |
998 | 0, /* tp_doc */\r | |
999 | (traverseproc)mbiencoder_traverse, /* tp_traverse */\r | |
1000 | 0, /* tp_clear */\r | |
1001 | 0, /* tp_richcompare */\r | |
1002 | 0, /* tp_weaklistoffset */\r | |
1003 | 0, /* tp_iter */\r | |
1004 | 0, /* tp_iterext */\r | |
1005 | mbiencoder_methods, /* tp_methods */\r | |
1006 | 0, /* tp_members */\r | |
1007 | codecctx_getsets, /* tp_getset */\r | |
1008 | 0, /* tp_base */\r | |
1009 | 0, /* tp_dict */\r | |
1010 | 0, /* tp_descr_get */\r | |
1011 | 0, /* tp_descr_set */\r | |
1012 | 0, /* tp_dictoffset */\r | |
1013 | mbiencoder_init, /* tp_init */\r | |
1014 | 0, /* tp_alloc */\r | |
1015 | mbiencoder_new, /* tp_new */\r | |
1016 | };\r | |
1017 | \r | |
1018 | \r | |
1019 | /**\r | |
1020 | * MultibyteIncrementalDecoder object\r | |
1021 | */\r | |
1022 | \r | |
1023 | static PyObject *\r | |
1024 | mbidecoder_decode(MultibyteIncrementalDecoderObject *self,\r | |
1025 | PyObject *args, PyObject *kwargs)\r | |
1026 | {\r | |
1027 | MultibyteDecodeBuffer buf;\r | |
1028 | char *data, *wdata = NULL;\r | |
1029 | Py_buffer pdata;\r | |
1030 | Py_ssize_t wsize, finalsize = 0, size, origpending;\r | |
1031 | int final = 0;\r | |
1032 | \r | |
1033 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode",\r | |
1034 | incrementalkwarglist, &pdata, &final))\r | |
1035 | return NULL;\r | |
1036 | data = pdata.buf;\r | |
1037 | size = pdata.len;\r | |
1038 | \r | |
1039 | buf.outobj = buf.excobj = NULL;\r | |
1040 | origpending = self->pendingsize;\r | |
1041 | \r | |
1042 | if (self->pendingsize == 0) {\r | |
1043 | wsize = size;\r | |
1044 | wdata = data;\r | |
1045 | }\r | |
1046 | else {\r | |
1047 | if (size > PY_SSIZE_T_MAX - self->pendingsize) {\r | |
1048 | PyErr_NoMemory();\r | |
1049 | goto errorexit;\r | |
1050 | }\r | |
1051 | wsize = size + self->pendingsize;\r | |
1052 | wdata = PyMem_Malloc(wsize);\r | |
1053 | if (wdata == NULL)\r | |
1054 | goto errorexit;\r | |
1055 | memcpy(wdata, self->pending, self->pendingsize);\r | |
1056 | memcpy(wdata + self->pendingsize, data, size);\r | |
1057 | self->pendingsize = 0;\r | |
1058 | }\r | |
1059 | \r | |
1060 | if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)\r | |
1061 | goto errorexit;\r | |
1062 | \r | |
1063 | if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))\r | |
1064 | goto errorexit;\r | |
1065 | \r | |
1066 | if (final && buf.inbuf < buf.inbuf_end) {\r | |
1067 | if (multibytecodec_decerror(self->codec, &self->state,\r | |
1068 | &buf, self->errors, MBERR_TOOFEW)) {\r | |
1069 | /* recover the original pending buffer */\r | |
1070 | memcpy(self->pending, wdata, origpending);\r | |
1071 | self->pendingsize = origpending;\r | |
1072 | goto errorexit;\r | |
1073 | }\r | |
1074 | }\r | |
1075 | \r | |
1076 | if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */\r | |
1077 | if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)\r | |
1078 | goto errorexit;\r | |
1079 | }\r | |
1080 | \r | |
1081 | finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));\r | |
1082 | if (finalsize != PyUnicode_GET_SIZE(buf.outobj))\r | |
1083 | if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)\r | |
1084 | goto errorexit;\r | |
1085 | \r | |
1086 | PyBuffer_Release(&pdata);\r | |
1087 | if (wdata != data)\r | |
1088 | PyMem_Del(wdata);\r | |
1089 | Py_XDECREF(buf.excobj);\r | |
1090 | return buf.outobj;\r | |
1091 | \r | |
1092 | errorexit:\r | |
1093 | PyBuffer_Release(&pdata);\r | |
1094 | if (wdata != NULL && wdata != data)\r | |
1095 | PyMem_Del(wdata);\r | |
1096 | Py_XDECREF(buf.excobj);\r | |
1097 | Py_XDECREF(buf.outobj);\r | |
1098 | return NULL;\r | |
1099 | }\r | |
1100 | \r | |
1101 | static PyObject *\r | |
1102 | mbidecoder_reset(MultibyteIncrementalDecoderObject *self)\r | |
1103 | {\r | |
1104 | if (self->codec->decreset != NULL &&\r | |
1105 | self->codec->decreset(&self->state, self->codec->config) != 0)\r | |
1106 | return NULL;\r | |
1107 | self->pendingsize = 0;\r | |
1108 | \r | |
1109 | Py_RETURN_NONE;\r | |
1110 | }\r | |
1111 | \r | |
1112 | static struct PyMethodDef mbidecoder_methods[] = {\r | |
1113 | {"decode", (PyCFunction)mbidecoder_decode,\r | |
1114 | METH_VARARGS | METH_KEYWORDS, NULL},\r | |
1115 | {"reset", (PyCFunction)mbidecoder_reset,\r | |
1116 | METH_NOARGS, NULL},\r | |
1117 | {NULL, NULL},\r | |
1118 | };\r | |
1119 | \r | |
1120 | static PyObject *\r | |
1121 | mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
1122 | {\r | |
1123 | MultibyteIncrementalDecoderObject *self;\r | |
1124 | PyObject *codec = NULL;\r | |
1125 | char *errors = NULL;\r | |
1126 | \r | |
1127 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",\r | |
1128 | incnewkwarglist, &errors))\r | |
1129 | return NULL;\r | |
1130 | \r | |
1131 | self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);\r | |
1132 | if (self == NULL)\r | |
1133 | return NULL;\r | |
1134 | \r | |
1135 | codec = PyObject_GetAttrString((PyObject *)type, "codec");\r | |
1136 | if (codec == NULL)\r | |
1137 | goto errorexit;\r | |
1138 | if (!MultibyteCodec_Check(codec)) {\r | |
1139 | PyErr_SetString(PyExc_TypeError, "codec is unexpected type");\r | |
1140 | goto errorexit;\r | |
1141 | }\r | |
1142 | \r | |
1143 | self->codec = ((MultibyteCodecObject *)codec)->codec;\r | |
1144 | self->pendingsize = 0;\r | |
1145 | self->errors = internal_error_callback(errors);\r | |
1146 | if (self->errors == NULL)\r | |
1147 | goto errorexit;\r | |
1148 | if (self->codec->decinit != NULL &&\r | |
1149 | self->codec->decinit(&self->state, self->codec->config) != 0)\r | |
1150 | goto errorexit;\r | |
1151 | \r | |
1152 | Py_DECREF(codec);\r | |
1153 | return (PyObject *)self;\r | |
1154 | \r | |
1155 | errorexit:\r | |
1156 | Py_XDECREF(self);\r | |
1157 | Py_XDECREF(codec);\r | |
1158 | return NULL;\r | |
1159 | }\r | |
1160 | \r | |
1161 | static int\r | |
1162 | mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)\r | |
1163 | {\r | |
1164 | return 0;\r | |
1165 | }\r | |
1166 | \r | |
1167 | static int\r | |
1168 | mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,\r | |
1169 | visitproc visit, void *arg)\r | |
1170 | {\r | |
1171 | if (ERROR_ISCUSTOM(self->errors))\r | |
1172 | Py_VISIT(self->errors);\r | |
1173 | return 0;\r | |
1174 | }\r | |
1175 | \r | |
1176 | static void\r | |
1177 | mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)\r | |
1178 | {\r | |
1179 | PyObject_GC_UnTrack(self);\r | |
1180 | ERROR_DECREF(self->errors);\r | |
1181 | Py_TYPE(self)->tp_free(self);\r | |
1182 | }\r | |
1183 | \r | |
1184 | static PyTypeObject MultibyteIncrementalDecoder_Type = {\r | |
1185 | PyVarObject_HEAD_INIT(NULL, 0)\r | |
1186 | "MultibyteIncrementalDecoder", /* tp_name */\r | |
1187 | sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */\r | |
1188 | 0, /* tp_itemsize */\r | |
1189 | /* methods */\r | |
1190 | (destructor)mbidecoder_dealloc, /* tp_dealloc */\r | |
1191 | 0, /* tp_print */\r | |
1192 | 0, /* tp_getattr */\r | |
1193 | 0, /* tp_setattr */\r | |
1194 | 0, /* tp_compare */\r | |
1195 | 0, /* tp_repr */\r | |
1196 | 0, /* tp_as_number */\r | |
1197 | 0, /* tp_as_sequence */\r | |
1198 | 0, /* tp_as_mapping */\r | |
1199 | 0, /* tp_hash */\r | |
1200 | 0, /* tp_call */\r | |
1201 | 0, /* tp_str */\r | |
1202 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
1203 | 0, /* tp_setattro */\r | |
1204 | 0, /* tp_as_buffer */\r | |
1205 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC\r | |
1206 | | Py_TPFLAGS_BASETYPE, /* tp_flags */\r | |
1207 | 0, /* tp_doc */\r | |
1208 | (traverseproc)mbidecoder_traverse, /* tp_traverse */\r | |
1209 | 0, /* tp_clear */\r | |
1210 | 0, /* tp_richcompare */\r | |
1211 | 0, /* tp_weaklistoffset */\r | |
1212 | 0, /* tp_iter */\r | |
1213 | 0, /* tp_iterext */\r | |
1214 | mbidecoder_methods, /* tp_methods */\r | |
1215 | 0, /* tp_members */\r | |
1216 | codecctx_getsets, /* tp_getset */\r | |
1217 | 0, /* tp_base */\r | |
1218 | 0, /* tp_dict */\r | |
1219 | 0, /* tp_descr_get */\r | |
1220 | 0, /* tp_descr_set */\r | |
1221 | 0, /* tp_dictoffset */\r | |
1222 | mbidecoder_init, /* tp_init */\r | |
1223 | 0, /* tp_alloc */\r | |
1224 | mbidecoder_new, /* tp_new */\r | |
1225 | };\r | |
1226 | \r | |
1227 | \r | |
1228 | /**\r | |
1229 | * MultibyteStreamReader object\r | |
1230 | */\r | |
1231 | \r | |
1232 | static PyObject *\r | |
1233 | mbstreamreader_iread(MultibyteStreamReaderObject *self,\r | |
1234 | const char *method, Py_ssize_t sizehint)\r | |
1235 | {\r | |
1236 | MultibyteDecodeBuffer buf;\r | |
1237 | PyObject *cres;\r | |
1238 | Py_ssize_t rsize, finalsize = 0;\r | |
1239 | \r | |
1240 | if (sizehint == 0)\r | |
1241 | return PyUnicode_FromUnicode(NULL, 0);\r | |
1242 | \r | |
1243 | buf.outobj = buf.excobj = NULL;\r | |
1244 | cres = NULL;\r | |
1245 | \r | |
1246 | for (;;) {\r | |
1247 | int endoffile;\r | |
1248 | \r | |
1249 | if (sizehint < 0)\r | |
1250 | cres = PyObject_CallMethod(self->stream,\r | |
1251 | (char *)method, NULL);\r | |
1252 | else\r | |
1253 | cres = PyObject_CallMethod(self->stream,\r | |
1254 | (char *)method, "i", sizehint);\r | |
1255 | if (cres == NULL)\r | |
1256 | goto errorexit;\r | |
1257 | \r | |
1258 | if (!PyString_Check(cres)) {\r | |
1259 | PyErr_SetString(PyExc_TypeError,\r | |
1260 | "stream function returned a "\r | |
1261 | "non-string object");\r | |
1262 | goto errorexit;\r | |
1263 | }\r | |
1264 | \r | |
1265 | endoffile = (PyString_GET_SIZE(cres) == 0);\r | |
1266 | \r | |
1267 | if (self->pendingsize > 0) {\r | |
1268 | PyObject *ctr;\r | |
1269 | char *ctrdata;\r | |
1270 | \r | |
1271 | if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {\r | |
1272 | PyErr_NoMemory();\r | |
1273 | goto errorexit;\r | |
1274 | }\r | |
1275 | rsize = PyString_GET_SIZE(cres) + self->pendingsize;\r | |
1276 | ctr = PyString_FromStringAndSize(NULL, rsize);\r | |
1277 | if (ctr == NULL)\r | |
1278 | goto errorexit;\r | |
1279 | ctrdata = PyString_AS_STRING(ctr);\r | |
1280 | memcpy(ctrdata, self->pending, self->pendingsize);\r | |
1281 | memcpy(ctrdata + self->pendingsize,\r | |
1282 | PyString_AS_STRING(cres),\r | |
1283 | PyString_GET_SIZE(cres));\r | |
1284 | Py_DECREF(cres);\r | |
1285 | cres = ctr;\r | |
1286 | self->pendingsize = 0;\r | |
1287 | }\r | |
1288 | \r | |
1289 | rsize = PyString_GET_SIZE(cres);\r | |
1290 | if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres),\r | |
1291 | rsize) != 0)\r | |
1292 | goto errorexit;\r | |
1293 | \r | |
1294 | if (rsize > 0 && decoder_feed_buffer(\r | |
1295 | (MultibyteStatefulDecoderContext *)self, &buf))\r | |
1296 | goto errorexit;\r | |
1297 | \r | |
1298 | if (endoffile || sizehint < 0) {\r | |
1299 | if (buf.inbuf < buf.inbuf_end &&\r | |
1300 | multibytecodec_decerror(self->codec, &self->state,\r | |
1301 | &buf, self->errors, MBERR_TOOFEW))\r | |
1302 | goto errorexit;\r | |
1303 | }\r | |
1304 | \r | |
1305 | if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */\r | |
1306 | if (decoder_append_pending(STATEFUL_DCTX(self),\r | |
1307 | &buf) != 0)\r | |
1308 | goto errorexit;\r | |
1309 | }\r | |
1310 | \r | |
1311 | finalsize = (Py_ssize_t)(buf.outbuf -\r | |
1312 | PyUnicode_AS_UNICODE(buf.outobj));\r | |
1313 | Py_DECREF(cres);\r | |
1314 | cres = NULL;\r | |
1315 | \r | |
1316 | if (sizehint < 0 || finalsize != 0 || rsize == 0)\r | |
1317 | break;\r | |
1318 | \r | |
1319 | sizehint = 1; /* read 1 more byte and retry */\r | |
1320 | }\r | |
1321 | \r | |
1322 | if (finalsize != PyUnicode_GET_SIZE(buf.outobj))\r | |
1323 | if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)\r | |
1324 | goto errorexit;\r | |
1325 | \r | |
1326 | Py_XDECREF(cres);\r | |
1327 | Py_XDECREF(buf.excobj);\r | |
1328 | return buf.outobj;\r | |
1329 | \r | |
1330 | errorexit:\r | |
1331 | Py_XDECREF(cres);\r | |
1332 | Py_XDECREF(buf.excobj);\r | |
1333 | Py_XDECREF(buf.outobj);\r | |
1334 | return NULL;\r | |
1335 | }\r | |
1336 | \r | |
1337 | static PyObject *\r | |
1338 | mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)\r | |
1339 | {\r | |
1340 | PyObject *sizeobj = NULL;\r | |
1341 | Py_ssize_t size;\r | |
1342 | \r | |
1343 | if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj))\r | |
1344 | return NULL;\r | |
1345 | \r | |
1346 | if (sizeobj == Py_None || sizeobj == NULL)\r | |
1347 | size = -1;\r | |
1348 | else if (PyInt_Check(sizeobj))\r | |
1349 | size = PyInt_AsSsize_t(sizeobj);\r | |
1350 | else {\r | |
1351 | PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");\r | |
1352 | return NULL;\r | |
1353 | }\r | |
1354 | \r | |
1355 | return mbstreamreader_iread(self, "read", size);\r | |
1356 | }\r | |
1357 | \r | |
1358 | static PyObject *\r | |
1359 | mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)\r | |
1360 | {\r | |
1361 | PyObject *sizeobj = NULL;\r | |
1362 | Py_ssize_t size;\r | |
1363 | \r | |
1364 | if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj))\r | |
1365 | return NULL;\r | |
1366 | \r | |
1367 | if (sizeobj == Py_None || sizeobj == NULL)\r | |
1368 | size = -1;\r | |
1369 | else if (PyInt_Check(sizeobj))\r | |
1370 | size = PyInt_AsSsize_t(sizeobj);\r | |
1371 | else {\r | |
1372 | PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");\r | |
1373 | return NULL;\r | |
1374 | }\r | |
1375 | \r | |
1376 | return mbstreamreader_iread(self, "readline", size);\r | |
1377 | }\r | |
1378 | \r | |
1379 | static PyObject *\r | |
1380 | mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)\r | |
1381 | {\r | |
1382 | PyObject *sizehintobj = NULL, *r, *sr;\r | |
1383 | Py_ssize_t sizehint;\r | |
1384 | \r | |
1385 | if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj))\r | |
1386 | return NULL;\r | |
1387 | \r | |
1388 | if (sizehintobj == Py_None || sizehintobj == NULL)\r | |
1389 | sizehint = -1;\r | |
1390 | else if (PyInt_Check(sizehintobj))\r | |
1391 | sizehint = PyInt_AsSsize_t(sizehintobj);\r | |
1392 | else {\r | |
1393 | PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");\r | |
1394 | return NULL;\r | |
1395 | }\r | |
1396 | \r | |
1397 | r = mbstreamreader_iread(self, "read", sizehint);\r | |
1398 | if (r == NULL)\r | |
1399 | return NULL;\r | |
1400 | \r | |
1401 | sr = PyUnicode_Splitlines(r, 1);\r | |
1402 | Py_DECREF(r);\r | |
1403 | return sr;\r | |
1404 | }\r | |
1405 | \r | |
1406 | static PyObject *\r | |
1407 | mbstreamreader_reset(MultibyteStreamReaderObject *self)\r | |
1408 | {\r | |
1409 | if (self->codec->decreset != NULL &&\r | |
1410 | self->codec->decreset(&self->state, self->codec->config) != 0)\r | |
1411 | return NULL;\r | |
1412 | self->pendingsize = 0;\r | |
1413 | \r | |
1414 | Py_RETURN_NONE;\r | |
1415 | }\r | |
1416 | \r | |
1417 | static struct PyMethodDef mbstreamreader_methods[] = {\r | |
1418 | {"read", (PyCFunction)mbstreamreader_read,\r | |
1419 | METH_VARARGS, NULL},\r | |
1420 | {"readline", (PyCFunction)mbstreamreader_readline,\r | |
1421 | METH_VARARGS, NULL},\r | |
1422 | {"readlines", (PyCFunction)mbstreamreader_readlines,\r | |
1423 | METH_VARARGS, NULL},\r | |
1424 | {"reset", (PyCFunction)mbstreamreader_reset,\r | |
1425 | METH_NOARGS, NULL},\r | |
1426 | {NULL, NULL},\r | |
1427 | };\r | |
1428 | \r | |
1429 | static PyMemberDef mbstreamreader_members[] = {\r | |
1430 | {"stream", T_OBJECT,\r | |
1431 | offsetof(MultibyteStreamReaderObject, stream),\r | |
1432 | READONLY, NULL},\r | |
1433 | {NULL,}\r | |
1434 | };\r | |
1435 | \r | |
1436 | static PyObject *\r | |
1437 | mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
1438 | {\r | |
1439 | MultibyteStreamReaderObject *self;\r | |
1440 | PyObject *stream, *codec = NULL;\r | |
1441 | char *errors = NULL;\r | |
1442 | \r | |
1443 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",\r | |
1444 | streamkwarglist, &stream, &errors))\r | |
1445 | return NULL;\r | |
1446 | \r | |
1447 | self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);\r | |
1448 | if (self == NULL)\r | |
1449 | return NULL;\r | |
1450 | \r | |
1451 | codec = PyObject_GetAttrString((PyObject *)type, "codec");\r | |
1452 | if (codec == NULL)\r | |
1453 | goto errorexit;\r | |
1454 | if (!MultibyteCodec_Check(codec)) {\r | |
1455 | PyErr_SetString(PyExc_TypeError, "codec is unexpected type");\r | |
1456 | goto errorexit;\r | |
1457 | }\r | |
1458 | \r | |
1459 | self->codec = ((MultibyteCodecObject *)codec)->codec;\r | |
1460 | self->stream = stream;\r | |
1461 | Py_INCREF(stream);\r | |
1462 | self->pendingsize = 0;\r | |
1463 | self->errors = internal_error_callback(errors);\r | |
1464 | if (self->errors == NULL)\r | |
1465 | goto errorexit;\r | |
1466 | if (self->codec->decinit != NULL &&\r | |
1467 | self->codec->decinit(&self->state, self->codec->config) != 0)\r | |
1468 | goto errorexit;\r | |
1469 | \r | |
1470 | Py_DECREF(codec);\r | |
1471 | return (PyObject *)self;\r | |
1472 | \r | |
1473 | errorexit:\r | |
1474 | Py_XDECREF(self);\r | |
1475 | Py_XDECREF(codec);\r | |
1476 | return NULL;\r | |
1477 | }\r | |
1478 | \r | |
1479 | static int\r | |
1480 | mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)\r | |
1481 | {\r | |
1482 | return 0;\r | |
1483 | }\r | |
1484 | \r | |
1485 | static int\r | |
1486 | mbstreamreader_traverse(MultibyteStreamReaderObject *self,\r | |
1487 | visitproc visit, void *arg)\r | |
1488 | {\r | |
1489 | if (ERROR_ISCUSTOM(self->errors))\r | |
1490 | Py_VISIT(self->errors);\r | |
1491 | Py_VISIT(self->stream);\r | |
1492 | return 0;\r | |
1493 | }\r | |
1494 | \r | |
1495 | static void\r | |
1496 | mbstreamreader_dealloc(MultibyteStreamReaderObject *self)\r | |
1497 | {\r | |
1498 | PyObject_GC_UnTrack(self);\r | |
1499 | ERROR_DECREF(self->errors);\r | |
1500 | Py_XDECREF(self->stream);\r | |
1501 | Py_TYPE(self)->tp_free(self);\r | |
1502 | }\r | |
1503 | \r | |
1504 | static PyTypeObject MultibyteStreamReader_Type = {\r | |
1505 | PyVarObject_HEAD_INIT(NULL, 0)\r | |
1506 | "MultibyteStreamReader", /* tp_name */\r | |
1507 | sizeof(MultibyteStreamReaderObject), /* tp_basicsize */\r | |
1508 | 0, /* tp_itemsize */\r | |
1509 | /* methods */\r | |
1510 | (destructor)mbstreamreader_dealloc, /* tp_dealloc */\r | |
1511 | 0, /* tp_print */\r | |
1512 | 0, /* tp_getattr */\r | |
1513 | 0, /* tp_setattr */\r | |
1514 | 0, /* tp_compare */\r | |
1515 | 0, /* tp_repr */\r | |
1516 | 0, /* tp_as_number */\r | |
1517 | 0, /* tp_as_sequence */\r | |
1518 | 0, /* tp_as_mapping */\r | |
1519 | 0, /* tp_hash */\r | |
1520 | 0, /* tp_call */\r | |
1521 | 0, /* tp_str */\r | |
1522 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
1523 | 0, /* tp_setattro */\r | |
1524 | 0, /* tp_as_buffer */\r | |
1525 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC\r | |
1526 | | Py_TPFLAGS_BASETYPE, /* tp_flags */\r | |
1527 | 0, /* tp_doc */\r | |
1528 | (traverseproc)mbstreamreader_traverse, /* tp_traverse */\r | |
1529 | 0, /* tp_clear */\r | |
1530 | 0, /* tp_richcompare */\r | |
1531 | 0, /* tp_weaklistoffset */\r | |
1532 | 0, /* tp_iter */\r | |
1533 | 0, /* tp_iterext */\r | |
1534 | mbstreamreader_methods, /* tp_methods */\r | |
1535 | mbstreamreader_members, /* tp_members */\r | |
1536 | codecctx_getsets, /* tp_getset */\r | |
1537 | 0, /* tp_base */\r | |
1538 | 0, /* tp_dict */\r | |
1539 | 0, /* tp_descr_get */\r | |
1540 | 0, /* tp_descr_set */\r | |
1541 | 0, /* tp_dictoffset */\r | |
1542 | mbstreamreader_init, /* tp_init */\r | |
1543 | 0, /* tp_alloc */\r | |
1544 | mbstreamreader_new, /* tp_new */\r | |
1545 | };\r | |
1546 | \r | |
1547 | \r | |
1548 | /**\r | |
1549 | * MultibyteStreamWriter object\r | |
1550 | */\r | |
1551 | \r | |
1552 | static int\r | |
1553 | mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,\r | |
1554 | PyObject *unistr)\r | |
1555 | {\r | |
1556 | PyObject *str, *wr;\r | |
1557 | \r | |
1558 | str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);\r | |
1559 | if (str == NULL)\r | |
1560 | return -1;\r | |
1561 | \r | |
1562 | wr = PyObject_CallMethod(self->stream, "write", "O", str);\r | |
1563 | Py_DECREF(str);\r | |
1564 | if (wr == NULL)\r | |
1565 | return -1;\r | |
1566 | \r | |
1567 | Py_DECREF(wr);\r | |
1568 | return 0;\r | |
1569 | }\r | |
1570 | \r | |
1571 | static PyObject *\r | |
1572 | mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj)\r | |
1573 | {\r | |
1574 | if (mbstreamwriter_iwrite(self, strobj))\r | |
1575 | return NULL;\r | |
1576 | else\r | |
1577 | Py_RETURN_NONE;\r | |
1578 | }\r | |
1579 | \r | |
1580 | static PyObject *\r | |
1581 | mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines)\r | |
1582 | {\r | |
1583 | PyObject *strobj;\r | |
1584 | int i, r;\r | |
1585 | \r | |
1586 | if (!PySequence_Check(lines)) {\r | |
1587 | PyErr_SetString(PyExc_TypeError,\r | |
1588 | "arg must be a sequence object");\r | |
1589 | return NULL;\r | |
1590 | }\r | |
1591 | \r | |
1592 | for (i = 0; i < PySequence_Length(lines); i++) {\r | |
1593 | /* length can be changed even within this loop */\r | |
1594 | strobj = PySequence_GetItem(lines, i);\r | |
1595 | if (strobj == NULL)\r | |
1596 | return NULL;\r | |
1597 | \r | |
1598 | r = mbstreamwriter_iwrite(self, strobj);\r | |
1599 | Py_DECREF(strobj);\r | |
1600 | if (r == -1)\r | |
1601 | return NULL;\r | |
1602 | }\r | |
1603 | \r | |
1604 | Py_RETURN_NONE;\r | |
1605 | }\r | |
1606 | \r | |
1607 | static PyObject *\r | |
1608 | mbstreamwriter_reset(MultibyteStreamWriterObject *self)\r | |
1609 | {\r | |
1610 | const Py_UNICODE *pending;\r | |
1611 | PyObject *pwrt;\r | |
1612 | \r | |
1613 | pending = self->pending;\r | |
1614 | pwrt = multibytecodec_encode(self->codec, &self->state,\r | |
1615 | &pending, self->pendingsize, self->errors,\r | |
1616 | MBENC_FLUSH | MBENC_RESET);\r | |
1617 | /* some pending buffer can be truncated when UnicodeEncodeError is\r | |
1618 | * raised on 'strict' mode. but, 'reset' method is designed to\r | |
1619 | * reset the pending buffer or states so failed string sequence\r | |
1620 | * ought to be missed */\r | |
1621 | self->pendingsize = 0;\r | |
1622 | if (pwrt == NULL)\r | |
1623 | return NULL;\r | |
1624 | \r | |
1625 | if (PyString_Size(pwrt) > 0) {\r | |
1626 | PyObject *wr;\r | |
1627 | wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);\r | |
1628 | if (wr == NULL) {\r | |
1629 | Py_DECREF(pwrt);\r | |
1630 | return NULL;\r | |
1631 | }\r | |
1632 | }\r | |
1633 | Py_DECREF(pwrt);\r | |
1634 | \r | |
1635 | Py_RETURN_NONE;\r | |
1636 | }\r | |
1637 | \r | |
1638 | static PyObject *\r | |
1639 | mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
1640 | {\r | |
1641 | MultibyteStreamWriterObject *self;\r | |
1642 | PyObject *stream, *codec = NULL;\r | |
1643 | char *errors = NULL;\r | |
1644 | \r | |
1645 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",\r | |
1646 | streamkwarglist, &stream, &errors))\r | |
1647 | return NULL;\r | |
1648 | \r | |
1649 | self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);\r | |
1650 | if (self == NULL)\r | |
1651 | return NULL;\r | |
1652 | \r | |
1653 | codec = PyObject_GetAttrString((PyObject *)type, "codec");\r | |
1654 | if (codec == NULL)\r | |
1655 | goto errorexit;\r | |
1656 | if (!MultibyteCodec_Check(codec)) {\r | |
1657 | PyErr_SetString(PyExc_TypeError, "codec is unexpected type");\r | |
1658 | goto errorexit;\r | |
1659 | }\r | |
1660 | \r | |
1661 | self->codec = ((MultibyteCodecObject *)codec)->codec;\r | |
1662 | self->stream = stream;\r | |
1663 | Py_INCREF(stream);\r | |
1664 | self->pendingsize = 0;\r | |
1665 | self->errors = internal_error_callback(errors);\r | |
1666 | if (self->errors == NULL)\r | |
1667 | goto errorexit;\r | |
1668 | if (self->codec->encinit != NULL &&\r | |
1669 | self->codec->encinit(&self->state, self->codec->config) != 0)\r | |
1670 | goto errorexit;\r | |
1671 | \r | |
1672 | Py_DECREF(codec);\r | |
1673 | return (PyObject *)self;\r | |
1674 | \r | |
1675 | errorexit:\r | |
1676 | Py_XDECREF(self);\r | |
1677 | Py_XDECREF(codec);\r | |
1678 | return NULL;\r | |
1679 | }\r | |
1680 | \r | |
1681 | static int\r | |
1682 | mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)\r | |
1683 | {\r | |
1684 | return 0;\r | |
1685 | }\r | |
1686 | \r | |
1687 | static int\r | |
1688 | mbstreamwriter_traverse(MultibyteStreamWriterObject *self,\r | |
1689 | visitproc visit, void *arg)\r | |
1690 | {\r | |
1691 | if (ERROR_ISCUSTOM(self->errors))\r | |
1692 | Py_VISIT(self->errors);\r | |
1693 | Py_VISIT(self->stream);\r | |
1694 | return 0;\r | |
1695 | }\r | |
1696 | \r | |
1697 | static void\r | |
1698 | mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)\r | |
1699 | {\r | |
1700 | PyObject_GC_UnTrack(self);\r | |
1701 | ERROR_DECREF(self->errors);\r | |
1702 | Py_XDECREF(self->stream);\r | |
1703 | Py_TYPE(self)->tp_free(self);\r | |
1704 | }\r | |
1705 | \r | |
1706 | static struct PyMethodDef mbstreamwriter_methods[] = {\r | |
1707 | {"write", (PyCFunction)mbstreamwriter_write,\r | |
1708 | METH_O, NULL},\r | |
1709 | {"writelines", (PyCFunction)mbstreamwriter_writelines,\r | |
1710 | METH_O, NULL},\r | |
1711 | {"reset", (PyCFunction)mbstreamwriter_reset,\r | |
1712 | METH_NOARGS, NULL},\r | |
1713 | {NULL, NULL},\r | |
1714 | };\r | |
1715 | \r | |
1716 | static PyMemberDef mbstreamwriter_members[] = {\r | |
1717 | {"stream", T_OBJECT,\r | |
1718 | offsetof(MultibyteStreamWriterObject, stream),\r | |
1719 | READONLY, NULL},\r | |
1720 | {NULL,}\r | |
1721 | };\r | |
1722 | \r | |
1723 | static PyTypeObject MultibyteStreamWriter_Type = {\r | |
1724 | PyVarObject_HEAD_INIT(NULL, 0)\r | |
1725 | "MultibyteStreamWriter", /* tp_name */\r | |
1726 | sizeof(MultibyteStreamWriterObject), /* tp_basicsize */\r | |
1727 | 0, /* tp_itemsize */\r | |
1728 | /* methods */\r | |
1729 | (destructor)mbstreamwriter_dealloc, /* tp_dealloc */\r | |
1730 | 0, /* tp_print */\r | |
1731 | 0, /* tp_getattr */\r | |
1732 | 0, /* tp_setattr */\r | |
1733 | 0, /* tp_compare */\r | |
1734 | 0, /* tp_repr */\r | |
1735 | 0, /* tp_as_number */\r | |
1736 | 0, /* tp_as_sequence */\r | |
1737 | 0, /* tp_as_mapping */\r | |
1738 | 0, /* tp_hash */\r | |
1739 | 0, /* tp_call */\r | |
1740 | 0, /* tp_str */\r | |
1741 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
1742 | 0, /* tp_setattro */\r | |
1743 | 0, /* tp_as_buffer */\r | |
1744 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC\r | |
1745 | | Py_TPFLAGS_BASETYPE, /* tp_flags */\r | |
1746 | 0, /* tp_doc */\r | |
1747 | (traverseproc)mbstreamwriter_traverse, /* tp_traverse */\r | |
1748 | 0, /* tp_clear */\r | |
1749 | 0, /* tp_richcompare */\r | |
1750 | 0, /* tp_weaklistoffset */\r | |
1751 | 0, /* tp_iter */\r | |
1752 | 0, /* tp_iterext */\r | |
1753 | mbstreamwriter_methods, /* tp_methods */\r | |
1754 | mbstreamwriter_members, /* tp_members */\r | |
1755 | codecctx_getsets, /* tp_getset */\r | |
1756 | 0, /* tp_base */\r | |
1757 | 0, /* tp_dict */\r | |
1758 | 0, /* tp_descr_get */\r | |
1759 | 0, /* tp_descr_set */\r | |
1760 | 0, /* tp_dictoffset */\r | |
1761 | mbstreamwriter_init, /* tp_init */\r | |
1762 | 0, /* tp_alloc */\r | |
1763 | mbstreamwriter_new, /* tp_new */\r | |
1764 | };\r | |
1765 | \r | |
1766 | \r | |
1767 | /**\r | |
1768 | * Exposed factory function\r | |
1769 | */\r | |
1770 | \r | |
1771 | static PyObject *\r | |
1772 | __create_codec(PyObject *ignore, PyObject *arg)\r | |
1773 | {\r | |
1774 | MultibyteCodecObject *self;\r | |
1775 | MultibyteCodec *codec;\r | |
1776 | \r | |
1777 | if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {\r | |
1778 | PyErr_SetString(PyExc_ValueError, "argument type invalid");\r | |
1779 | return NULL;\r | |
1780 | }\r | |
1781 | \r | |
1782 | codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);\r | |
1783 | if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)\r | |
1784 | return NULL;\r | |
1785 | \r | |
1786 | self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);\r | |
1787 | if (self == NULL)\r | |
1788 | return NULL;\r | |
1789 | self->codec = codec;\r | |
1790 | \r | |
1791 | return (PyObject *)self;\r | |
1792 | }\r | |
1793 | \r | |
1794 | static struct PyMethodDef __methods[] = {\r | |
1795 | {"__create_codec", (PyCFunction)__create_codec, METH_O},\r | |
1796 | {NULL, NULL},\r | |
1797 | };\r | |
1798 | \r | |
1799 | PyMODINIT_FUNC\r | |
1800 | init_multibytecodec(void)\r | |
1801 | {\r | |
1802 | int i;\r | |
1803 | PyObject *m;\r | |
1804 | PyTypeObject *typelist[] = {\r | |
1805 | &MultibyteIncrementalEncoder_Type,\r | |
1806 | &MultibyteIncrementalDecoder_Type,\r | |
1807 | &MultibyteStreamReader_Type,\r | |
1808 | &MultibyteStreamWriter_Type,\r | |
1809 | NULL\r | |
1810 | };\r | |
1811 | \r | |
1812 | if (PyType_Ready(&MultibyteCodec_Type) < 0)\r | |
1813 | return;\r | |
1814 | \r | |
1815 | m = Py_InitModule("_multibytecodec", __methods);\r | |
1816 | if (m == NULL)\r | |
1817 | return;\r | |
1818 | \r | |
1819 | for (i = 0; typelist[i] != NULL; i++) {\r | |
1820 | if (PyType_Ready(typelist[i]) < 0)\r | |
1821 | return;\r | |
1822 | Py_INCREF(typelist[i]);\r | |
1823 | PyModule_AddObject(m, typelist[i]->tp_name,\r | |
1824 | (PyObject *)typelist[i]);\r | |
1825 | }\r | |
1826 | \r | |
1827 | if (PyErr_Occurred())\r | |
1828 | Py_FatalError("can't initialize the _multibytecodec module");\r | |
1829 | }\r |