]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | /*\r |
2 | * cjkcodecs.h: common header for cjkcodecs\r | |
3 | *\r | |
4 | * Written by Hye-Shik Chang <perky@FreeBSD.org>\r | |
5 | */\r | |
6 | \r | |
7 | #ifndef _CJKCODECS_H_\r | |
8 | #define _CJKCODECS_H_\r | |
9 | \r | |
10 | #define PY_SSIZE_T_CLEAN\r | |
11 | #include "Python.h"\r | |
12 | #include "multibytecodec.h"\r | |
13 | \r | |
14 | \r | |
15 | /* a unicode "undefined" codepoint */\r | |
16 | #define UNIINV 0xFFFE\r | |
17 | \r | |
18 | /* internal-use DBCS codepoints which aren't used by any charsets */\r | |
19 | #define NOCHAR 0xFFFF\r | |
20 | #define MULTIC 0xFFFE\r | |
21 | #define DBCINV 0xFFFD\r | |
22 | \r | |
23 | /* shorter macros to save source size of mapping tables */\r | |
24 | #define U UNIINV\r | |
25 | #define N NOCHAR\r | |
26 | #define M MULTIC\r | |
27 | #define D DBCINV\r | |
28 | \r | |
29 | struct dbcs_index {\r | |
30 | const ucs2_t *map;\r | |
31 | unsigned char bottom, top;\r | |
32 | };\r | |
33 | typedef struct dbcs_index decode_map;\r | |
34 | \r | |
35 | struct widedbcs_index {\r | |
36 | const ucs4_t *map;\r | |
37 | unsigned char bottom, top;\r | |
38 | };\r | |
39 | typedef struct widedbcs_index widedecode_map;\r | |
40 | \r | |
41 | struct unim_index {\r | |
42 | const DBCHAR *map;\r | |
43 | unsigned char bottom, top;\r | |
44 | };\r | |
45 | typedef struct unim_index encode_map;\r | |
46 | \r | |
47 | struct unim_index_bytebased {\r | |
48 | const unsigned char *map;\r | |
49 | unsigned char bottom, top;\r | |
50 | };\r | |
51 | \r | |
52 | struct dbcs_map {\r | |
53 | const char *charset;\r | |
54 | const struct unim_index *encmap;\r | |
55 | const struct dbcs_index *decmap;\r | |
56 | };\r | |
57 | \r | |
58 | struct pair_encodemap {\r | |
59 | ucs4_t uniseq;\r | |
60 | DBCHAR code;\r | |
61 | };\r | |
62 | \r | |
63 | static const MultibyteCodec *codec_list;\r | |
64 | static const struct dbcs_map *mapping_list;\r | |
65 | \r | |
66 | #define CODEC_INIT(encoding) \\r | |
67 | static int encoding##_codec_init(const void *config)\r | |
68 | \r | |
69 | #define ENCODER_INIT(encoding) \\r | |
70 | static int encoding##_encode_init( \\r | |
71 | MultibyteCodec_State *state, const void *config)\r | |
72 | #define ENCODER(encoding) \\r | |
73 | static Py_ssize_t encoding##_encode( \\r | |
74 | MultibyteCodec_State *state, const void *config, \\r | |
75 | const Py_UNICODE **inbuf, Py_ssize_t inleft, \\r | |
76 | unsigned char **outbuf, Py_ssize_t outleft, int flags)\r | |
77 | #define ENCODER_RESET(encoding) \\r | |
78 | static Py_ssize_t encoding##_encode_reset( \\r | |
79 | MultibyteCodec_State *state, const void *config, \\r | |
80 | unsigned char **outbuf, Py_ssize_t outleft)\r | |
81 | \r | |
82 | #define DECODER_INIT(encoding) \\r | |
83 | static int encoding##_decode_init( \\r | |
84 | MultibyteCodec_State *state, const void *config)\r | |
85 | #define DECODER(encoding) \\r | |
86 | static Py_ssize_t encoding##_decode( \\r | |
87 | MultibyteCodec_State *state, const void *config, \\r | |
88 | const unsigned char **inbuf, Py_ssize_t inleft, \\r | |
89 | Py_UNICODE **outbuf, Py_ssize_t outleft)\r | |
90 | #define DECODER_RESET(encoding) \\r | |
91 | static Py_ssize_t encoding##_decode_reset( \\r | |
92 | MultibyteCodec_State *state, const void *config)\r | |
93 | \r | |
94 | #if Py_UNICODE_SIZE == 4\r | |
95 | #define UCS4INVALID(code) \\r | |
96 | if ((code) > 0xFFFF) \\r | |
97 | return 1;\r | |
98 | #else\r | |
99 | #define UCS4INVALID(code) \\r | |
100 | if (0) ;\r | |
101 | #endif\r | |
102 | \r | |
103 | #define NEXT_IN(i) \\r | |
104 | (*inbuf) += (i); \\r | |
105 | (inleft) -= (i);\r | |
106 | #define NEXT_OUT(o) \\r | |
107 | (*outbuf) += (o); \\r | |
108 | (outleft) -= (o);\r | |
109 | #define NEXT(i, o) \\r | |
110 | NEXT_IN(i) NEXT_OUT(o)\r | |
111 | \r | |
112 | #define REQUIRE_INBUF(n) \\r | |
113 | if (inleft < (n)) \\r | |
114 | return MBERR_TOOFEW;\r | |
115 | #define REQUIRE_OUTBUF(n) \\r | |
116 | if (outleft < (n)) \\r | |
117 | return MBERR_TOOSMALL;\r | |
118 | \r | |
119 | #define IN1 ((*inbuf)[0])\r | |
120 | #define IN2 ((*inbuf)[1])\r | |
121 | #define IN3 ((*inbuf)[2])\r | |
122 | #define IN4 ((*inbuf)[3])\r | |
123 | \r | |
124 | #define OUT1(c) ((*outbuf)[0]) = (c);\r | |
125 | #define OUT2(c) ((*outbuf)[1]) = (c);\r | |
126 | #define OUT3(c) ((*outbuf)[2]) = (c);\r | |
127 | #define OUT4(c) ((*outbuf)[3]) = (c);\r | |
128 | \r | |
129 | #define WRITE1(c1) \\r | |
130 | REQUIRE_OUTBUF(1) \\r | |
131 | (*outbuf)[0] = (c1);\r | |
132 | #define WRITE2(c1, c2) \\r | |
133 | REQUIRE_OUTBUF(2) \\r | |
134 | (*outbuf)[0] = (c1); \\r | |
135 | (*outbuf)[1] = (c2);\r | |
136 | #define WRITE3(c1, c2, c3) \\r | |
137 | REQUIRE_OUTBUF(3) \\r | |
138 | (*outbuf)[0] = (c1); \\r | |
139 | (*outbuf)[1] = (c2); \\r | |
140 | (*outbuf)[2] = (c3);\r | |
141 | #define WRITE4(c1, c2, c3, c4) \\r | |
142 | REQUIRE_OUTBUF(4) \\r | |
143 | (*outbuf)[0] = (c1); \\r | |
144 | (*outbuf)[1] = (c2); \\r | |
145 | (*outbuf)[2] = (c3); \\r | |
146 | (*outbuf)[3] = (c4);\r | |
147 | \r | |
148 | #if Py_UNICODE_SIZE == 2\r | |
149 | # define WRITEUCS4(c) \\r | |
150 | REQUIRE_OUTBUF(2) \\r | |
151 | (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \\r | |
152 | (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \\r | |
153 | NEXT_OUT(2)\r | |
154 | #else\r | |
155 | # define WRITEUCS4(c) \\r | |
156 | REQUIRE_OUTBUF(1) \\r | |
157 | **outbuf = (Py_UNICODE)(c); \\r | |
158 | NEXT_OUT(1)\r | |
159 | #endif\r | |
160 | \r | |
161 | #define _TRYMAP_ENC(m, assi, val) \\r | |
162 | ((m)->map != NULL && (val) >= (m)->bottom && \\r | |
163 | (val)<= (m)->top && ((assi) = (m)->map[(val) - \\r | |
164 | (m)->bottom]) != NOCHAR)\r | |
165 | #define TRYMAP_ENC_COND(charset, assi, uni) \\r | |
166 | _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)\r | |
167 | #define TRYMAP_ENC(charset, assi, uni) \\r | |
168 | if TRYMAP_ENC_COND(charset, assi, uni)\r | |
169 | \r | |
170 | #define _TRYMAP_DEC(m, assi, val) \\r | |
171 | ((m)->map != NULL && (val) >= (m)->bottom && \\r | |
172 | (val)<= (m)->top && ((assi) = (m)->map[(val) - \\r | |
173 | (m)->bottom]) != UNIINV)\r | |
174 | #define TRYMAP_DEC(charset, assi, c1, c2) \\r | |
175 | if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)\r | |
176 | \r | |
177 | #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \\r | |
178 | ((m)->map != NULL && (val) >= (m)->bottom && \\r | |
179 | (val)<= (m)->top && \\r | |
180 | ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \\r | |
181 | (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \\r | |
182 | (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))\r | |
183 | #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \\r | |
184 | if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \\r | |
185 | assplane, asshi, asslo, (uni) & 0xff)\r | |
186 | #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \\r | |
187 | if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)\r | |
188 | \r | |
189 | #if Py_UNICODE_SIZE == 2\r | |
190 | #define DECODE_SURROGATE(c) \\r | |
191 | if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \\r | |
192 | REQUIRE_INBUF(2) \\r | |
193 | if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \\r | |
194 | c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \\r | |
195 | ((ucs4_t)(IN2) - 0xdc00); \\r | |
196 | } \\r | |
197 | }\r | |
198 | #define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)\r | |
199 | #else\r | |
200 | #define DECODE_SURROGATE(c) {;}\r | |
201 | #define GET_INSIZE(c) 1\r | |
202 | #endif\r | |
203 | \r | |
204 | #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {\r | |
205 | #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},\r | |
206 | #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},\r | |
207 | #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},\r | |
208 | #define END_MAPPINGS_LIST \\r | |
209 | {"", NULL, NULL} }; \\r | |
210 | static const struct dbcs_map *mapping_list = \\r | |
211 | (const struct dbcs_map *)_mapping_list;\r | |
212 | \r | |
213 | #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {\r | |
214 | #define _STATEFUL_METHODS(enc) \\r | |
215 | enc##_encode, \\r | |
216 | enc##_encode_init, \\r | |
217 | enc##_encode_reset, \\r | |
218 | enc##_decode, \\r | |
219 | enc##_decode_init, \\r | |
220 | enc##_decode_reset,\r | |
221 | #define _STATELESS_METHODS(enc) \\r | |
222 | enc##_encode, NULL, NULL, \\r | |
223 | enc##_decode, NULL, NULL,\r | |
224 | #define CODEC_STATEFUL(enc) { \\r | |
225 | #enc, NULL, NULL, \\r | |
226 | _STATEFUL_METHODS(enc) \\r | |
227 | },\r | |
228 | #define CODEC_STATELESS(enc) { \\r | |
229 | #enc, NULL, NULL, \\r | |
230 | _STATELESS_METHODS(enc) \\r | |
231 | },\r | |
232 | #define CODEC_STATELESS_WINIT(enc) { \\r | |
233 | #enc, NULL, \\r | |
234 | enc##_codec_init, \\r | |
235 | _STATELESS_METHODS(enc) \\r | |
236 | },\r | |
237 | #define END_CODECS_LIST \\r | |
238 | {"", NULL,} }; \\r | |
239 | static const MultibyteCodec *codec_list = \\r | |
240 | (const MultibyteCodec *)_codec_list;\r | |
241 | \r | |
242 | static PyObject *\r | |
243 | getmultibytecodec(void)\r | |
244 | {\r | |
245 | static PyObject *cofunc = NULL;\r | |
246 | \r | |
247 | if (cofunc == NULL) {\r | |
248 | PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");\r | |
249 | if (mod == NULL)\r | |
250 | return NULL;\r | |
251 | cofunc = PyObject_GetAttrString(mod, "__create_codec");\r | |
252 | Py_DECREF(mod);\r | |
253 | }\r | |
254 | return cofunc;\r | |
255 | }\r | |
256 | \r | |
257 | static PyObject *\r | |
258 | getcodec(PyObject *self, PyObject *encoding)\r | |
259 | {\r | |
260 | PyObject *codecobj, *r, *cofunc;\r | |
261 | const MultibyteCodec *codec;\r | |
262 | const char *enc;\r | |
263 | \r | |
264 | if (!PyString_Check(encoding)) {\r | |
265 | PyErr_SetString(PyExc_TypeError,\r | |
266 | "encoding name must be a string.");\r | |
267 | return NULL;\r | |
268 | }\r | |
269 | \r | |
270 | cofunc = getmultibytecodec();\r | |
271 | if (cofunc == NULL)\r | |
272 | return NULL;\r | |
273 | \r | |
274 | enc = PyString_AS_STRING(encoding);\r | |
275 | for (codec = codec_list; codec->encoding[0]; codec++)\r | |
276 | if (strcmp(codec->encoding, enc) == 0)\r | |
277 | break;\r | |
278 | \r | |
279 | if (codec->encoding[0] == '\0') {\r | |
280 | PyErr_SetString(PyExc_LookupError,\r | |
281 | "no such codec is supported.");\r | |
282 | return NULL;\r | |
283 | }\r | |
284 | \r | |
285 | codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);\r | |
286 | if (codecobj == NULL)\r | |
287 | return NULL;\r | |
288 | \r | |
289 | r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);\r | |
290 | Py_DECREF(codecobj);\r | |
291 | \r | |
292 | return r;\r | |
293 | }\r | |
294 | \r | |
295 | static struct PyMethodDef __methods[] = {\r | |
296 | {"getcodec", (PyCFunction)getcodec, METH_O, ""},\r | |
297 | {NULL, NULL},\r | |
298 | };\r | |
299 | \r | |
300 | static int\r | |
301 | register_maps(PyObject *module)\r | |
302 | {\r | |
303 | const struct dbcs_map *h;\r | |
304 | \r | |
305 | for (h = mapping_list; h->charset[0] != '\0'; h++) {\r | |
306 | char mhname[256] = "__map_";\r | |
307 | int r;\r | |
308 | strcpy(mhname + sizeof("__map_") - 1, h->charset);\r | |
309 | r = PyModule_AddObject(module, mhname,\r | |
310 | PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));\r | |
311 | if (r == -1)\r | |
312 | return -1;\r | |
313 | }\r | |
314 | return 0;\r | |
315 | }\r | |
316 | \r | |
317 | #ifdef USING_BINARY_PAIR_SEARCH\r | |
318 | static DBCHAR\r | |
319 | find_pairencmap(ucs2_t body, ucs2_t modifier,\r | |
320 | const struct pair_encodemap *haystack, int haystacksize)\r | |
321 | {\r | |
322 | int pos, min, max;\r | |
323 | ucs4_t value = body << 16 | modifier;\r | |
324 | \r | |
325 | min = 0;\r | |
326 | max = haystacksize;\r | |
327 | \r | |
328 | for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)\r | |
329 | if (value < haystack[pos].uniseq) {\r | |
330 | if (max == pos) break;\r | |
331 | else max = pos;\r | |
332 | }\r | |
333 | else if (value > haystack[pos].uniseq) {\r | |
334 | if (min == pos) break;\r | |
335 | else min = pos;\r | |
336 | }\r | |
337 | else\r | |
338 | break;\r | |
339 | \r | |
340 | if (value == haystack[pos].uniseq)\r | |
341 | return haystack[pos].code;\r | |
342 | else\r | |
343 | return DBCINV;\r | |
344 | }\r | |
345 | #endif\r | |
346 | \r | |
347 | #ifdef USING_IMPORTED_MAPS\r | |
348 | #define IMPORT_MAP(locale, charset, encmap, decmap) \\r | |
349 | importmap("_codecs_" #locale, "__map_" #charset, \\r | |
350 | (const void**)encmap, (const void**)decmap)\r | |
351 | \r | |
352 | static int\r | |
353 | importmap(const char *modname, const char *symbol,\r | |
354 | const void **encmap, const void **decmap)\r | |
355 | {\r | |
356 | PyObject *o, *mod;\r | |
357 | \r | |
358 | mod = PyImport_ImportModule((char *)modname);\r | |
359 | if (mod == NULL)\r | |
360 | return -1;\r | |
361 | \r | |
362 | o = PyObject_GetAttrString(mod, (char*)symbol);\r | |
363 | if (o == NULL)\r | |
364 | goto errorexit;\r | |
365 | else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {\r | |
366 | PyErr_SetString(PyExc_ValueError,\r | |
367 | "map data must be a Capsule.");\r | |
368 | goto errorexit;\r | |
369 | }\r | |
370 | else {\r | |
371 | struct dbcs_map *map;\r | |
372 | map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);\r | |
373 | if (encmap != NULL)\r | |
374 | *encmap = map->encmap;\r | |
375 | if (decmap != NULL)\r | |
376 | *decmap = map->decmap;\r | |
377 | Py_DECREF(o);\r | |
378 | }\r | |
379 | \r | |
380 | Py_DECREF(mod);\r | |
381 | return 0;\r | |
382 | \r | |
383 | errorexit:\r | |
384 | Py_DECREF(mod);\r | |
385 | return -1;\r | |
386 | }\r | |
387 | #endif\r | |
388 | \r | |
389 | #define I_AM_A_MODULE_FOR(loc) \\r | |
390 | void \\r | |
391 | init_codecs_##loc(void) \\r | |
392 | { \\r | |
393 | PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\\r | |
394 | if (m != NULL) \\r | |
395 | (void)register_maps(m); \\r | |
396 | }\r | |
397 | \r | |
398 | #endif\r |