]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Modules/cjkcodecs/_codecs_iso2022.c
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Modules / cjkcodecs / _codecs_iso2022.c
CommitLineData
7eb75bcc
DM
1/*\r
2 * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.\r
3 *\r
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>\r
5 */\r
6\r
7#define USING_IMPORTED_MAPS\r
8#define USING_BINARY_PAIR_SEARCH\r
9#define EXTERN_JISX0213_PAIR\r
10#define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE\r
11#define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE\r
12\r
13#include "cjkcodecs.h"\r
14#include "alg_jisx0201.h"\r
15#include "emu_jisx0213_2000.h"\r
16#include "mappings_jisx0213_pair.h"\r
17\r
18/* STATE\r
19\r
20 state->c[0-3]\r
21\r
22 00000000\r
23 ||^^^^^|\r
24 |+-----+---- G0-3 Character Set\r
25 +----------- Is G0-3 double byte?\r
26\r
27 state->c[4]\r
28\r
29 00000000\r
30 ||\r
31 |+---- Locked-Shift?\r
32 +----- ESC Throughout\r
33*/\r
34\r
35#define ESC 0x1B\r
36#define SO 0x0E\r
37#define SI 0x0F\r
38#define LF 0x0A\r
39\r
40#define MAX_ESCSEQLEN 16\r
41\r
42#define CHARSET_ISO8859_1 'A'\r
43#define CHARSET_ASCII 'B'\r
44#define CHARSET_ISO8859_7 'F'\r
45#define CHARSET_JISX0201_K 'I'\r
46#define CHARSET_JISX0201_R 'J'\r
47\r
48#define CHARSET_GB2312 ('A'|CHARSET_DBCS)\r
49#define CHARSET_JISX0208 ('B'|CHARSET_DBCS)\r
50#define CHARSET_KSX1001 ('C'|CHARSET_DBCS)\r
51#define CHARSET_JISX0212 ('D'|CHARSET_DBCS)\r
52#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)\r
53#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)\r
54#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)\r
55#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)\r
56#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)\r
57#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)\r
58#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)\r
59\r
60#define CHARSET_DBCS 0x80\r
61#define ESCMARK(mark) ((mark) & 0x7f)\r
62\r
63#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')\r
64#define IS_ISO2022ESC(c2) \\r
65 ((c2) == '(' || (c2) == ')' || (c2) == '$' || \\r
66 (c2) == '.' || (c2) == '&')\r
67 /* this is not a complete list of ISO-2022 escape sequence headers.\r
68 * but, it's enough to implement CJK instances of iso-2022. */\r
69\r
70#define MAP_UNMAPPABLE 0xFFFF\r
71#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */\r
72\r
73#define F_SHIFTED 0x01\r
74#define F_ESCTHROUGHOUT 0x02\r
75\r
76#define STATE_SETG(dn, v) ((state)->c[dn]) = (v);\r
77#define STATE_GETG(dn) ((state)->c[dn])\r
78\r
79#define STATE_G0 STATE_GETG(0)\r
80#define STATE_G1 STATE_GETG(1)\r
81#define STATE_G2 STATE_GETG(2)\r
82#define STATE_G3 STATE_GETG(3)\r
83#define STATE_SETG0(v) STATE_SETG(0, v)\r
84#define STATE_SETG1(v) STATE_SETG(1, v)\r
85#define STATE_SETG2(v) STATE_SETG(2, v)\r
86#define STATE_SETG3(v) STATE_SETG(3, v)\r
87\r
88#define STATE_SETFLAG(f) ((state)->c[4]) |= (f);\r
89#define STATE_GETFLAG(f) ((state)->c[4] & (f))\r
90#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);\r
91#define STATE_CLEARFLAGS() ((state)->c[4]) = 0;\r
92\r
93#define ISO2022_CONFIG ((const struct iso2022_config *)config)\r
94#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))\r
95#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)\r
96\r
97/* iso2022_config.flags */\r
98#define NO_SHIFT 0x01\r
99#define USE_G2 0x02\r
100#define USE_JISX0208_EXT 0x04\r
101\r
102/*-*- internal data structures -*-*/\r
103\r
104typedef int (*iso2022_init_func)(void);\r
105typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);\r
106typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);\r
107\r
108struct iso2022_designation {\r
109 unsigned char mark;\r
110 unsigned char plane;\r
111 unsigned char width;\r
112 iso2022_init_func initializer;\r
113 iso2022_decode_func decoder;\r
114 iso2022_encode_func encoder;\r
115};\r
116\r
117struct iso2022_config {\r
118 int flags;\r
119 const struct iso2022_designation *designations; /* non-ascii desigs */\r
120};\r
121\r
122/*-*- iso-2022 codec implementation -*-*/\r
123\r
124CODEC_INIT(iso2022)\r
125{\r
126 const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;\r
127 for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)\r
128 if (desig->initializer != NULL && desig->initializer() != 0)\r
129 return -1;\r
130 return 0;\r
131}\r
132\r
133ENCODER_INIT(iso2022)\r
134{\r
135 STATE_CLEARFLAGS()\r
136 STATE_SETG0(CHARSET_ASCII)\r
137 STATE_SETG1(CHARSET_ASCII)\r
138 return 0;\r
139}\r
140\r
141ENCODER_RESET(iso2022)\r
142{\r
143 if (STATE_GETFLAG(F_SHIFTED)) {\r
144 WRITE1(SI)\r
145 NEXT_OUT(1)\r
146 STATE_CLEARFLAG(F_SHIFTED)\r
147 }\r
148 if (STATE_G0 != CHARSET_ASCII) {\r
149 WRITE3(ESC, '(', 'B')\r
150 NEXT_OUT(3)\r
151 STATE_SETG0(CHARSET_ASCII)\r
152 }\r
153 return 0;\r
154}\r
155\r
156ENCODER(iso2022)\r
157{\r
158 while (inleft > 0) {\r
159 const struct iso2022_designation *dsg;\r
160 DBCHAR encoded;\r
161 ucs4_t c = **inbuf;\r
162 Py_ssize_t insize;\r
163\r
164 if (c < 0x80) {\r
165 if (STATE_G0 != CHARSET_ASCII) {\r
166 WRITE3(ESC, '(', 'B')\r
167 STATE_SETG0(CHARSET_ASCII)\r
168 NEXT_OUT(3)\r
169 }\r
170 if (STATE_GETFLAG(F_SHIFTED)) {\r
171 WRITE1(SI)\r
172 STATE_CLEARFLAG(F_SHIFTED)\r
173 NEXT_OUT(1)\r
174 }\r
175 WRITE1((unsigned char)c)\r
176 NEXT(1, 1)\r
177 continue;\r
178 }\r
179\r
180 DECODE_SURROGATE(c)\r
181 insize = GET_INSIZE(c);\r
182\r
183 encoded = MAP_UNMAPPABLE;\r
184 for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {\r
185 Py_ssize_t length = 1;\r
186 encoded = dsg->encoder(&c, &length);\r
187 if (encoded == MAP_MULTIPLE_AVAIL) {\r
188 /* this implementation won't work for pair\r
189 * of non-bmp characters. */\r
190 if (inleft < 2) {\r
191 if (!(flags & MBENC_FLUSH))\r
192 return MBERR_TOOFEW;\r
193 length = -1;\r
194 }\r
195 else\r
196 length = 2;\r
197#if Py_UNICODE_SIZE == 2\r
198 if (length == 2) {\r
199 ucs4_t u4in[2];\r
200 u4in[0] = (ucs4_t)IN1;\r
201 u4in[1] = (ucs4_t)IN2;\r
202 encoded = dsg->encoder(u4in, &length);\r
203 } else\r
204 encoded = dsg->encoder(&c, &length);\r
205#else\r
206 encoded = dsg->encoder(&c, &length);\r
207#endif\r
208 if (encoded != MAP_UNMAPPABLE) {\r
209 insize = length;\r
210 break;\r
211 }\r
212 }\r
213 else if (encoded != MAP_UNMAPPABLE)\r
214 break;\r
215 }\r
216\r
217 if (!dsg->mark)\r
218 return 1;\r
219 assert(dsg->width == 1 || dsg->width == 2);\r
220\r
221 switch (dsg->plane) {\r
222 case 0: /* G0 */\r
223 if (STATE_GETFLAG(F_SHIFTED)) {\r
224 WRITE1(SI)\r
225 STATE_CLEARFLAG(F_SHIFTED)\r
226 NEXT_OUT(1)\r
227 }\r
228 if (STATE_G0 != dsg->mark) {\r
229 if (dsg->width == 1) {\r
230 WRITE3(ESC, '(', ESCMARK(dsg->mark))\r
231 STATE_SETG0(dsg->mark)\r
232 NEXT_OUT(3)\r
233 }\r
234 else if (dsg->mark == CHARSET_JISX0208) {\r
235 WRITE3(ESC, '$', ESCMARK(dsg->mark))\r
236 STATE_SETG0(dsg->mark)\r
237 NEXT_OUT(3)\r
238 }\r
239 else {\r
240 WRITE4(ESC, '$', '(',\r
241 ESCMARK(dsg->mark))\r
242 STATE_SETG0(dsg->mark)\r
243 NEXT_OUT(4)\r
244 }\r
245 }\r
246 break;\r
247 case 1: /* G1 */\r
248 if (STATE_G1 != dsg->mark) {\r
249 if (dsg->width == 1) {\r
250 WRITE3(ESC, ')', ESCMARK(dsg->mark))\r
251 STATE_SETG1(dsg->mark)\r
252 NEXT_OUT(3)\r
253 }\r
254 else {\r
255 WRITE4(ESC, '$', ')',\r
256 ESCMARK(dsg->mark))\r
257 STATE_SETG1(dsg->mark)\r
258 NEXT_OUT(4)\r
259 }\r
260 }\r
261 if (!STATE_GETFLAG(F_SHIFTED)) {\r
262 WRITE1(SO)\r
263 STATE_SETFLAG(F_SHIFTED)\r
264 NEXT_OUT(1)\r
265 }\r
266 break;\r
267 default: /* G2 and G3 is not supported: no encoding in\r
268 * CJKCodecs are using them yet */\r
269 return MBERR_INTERNAL;\r
270 }\r
271\r
272 if (dsg->width == 1) {\r
273 WRITE1((unsigned char)encoded)\r
274 NEXT_OUT(1)\r
275 }\r
276 else {\r
277 WRITE2(encoded >> 8, encoded & 0xff)\r
278 NEXT_OUT(2)\r
279 }\r
280 NEXT_IN(insize)\r
281 }\r
282\r
283 return 0;\r
284}\r
285\r
286DECODER_INIT(iso2022)\r
287{\r
288 STATE_CLEARFLAGS()\r
289 STATE_SETG0(CHARSET_ASCII)\r
290 STATE_SETG1(CHARSET_ASCII)\r
291 STATE_SETG2(CHARSET_ASCII)\r
292 return 0;\r
293}\r
294\r
295DECODER_RESET(iso2022)\r
296{\r
297 STATE_SETG0(CHARSET_ASCII)\r
298 STATE_CLEARFLAG(F_SHIFTED)\r
299 return 0;\r
300}\r
301\r
302static Py_ssize_t\r
303iso2022processesc(const void *config, MultibyteCodec_State *state,\r
304 const unsigned char **inbuf, Py_ssize_t *inleft)\r
305{\r
306 unsigned char charset, designation;\r
307 Py_ssize_t i, esclen;\r
308\r
309 for (i = 1;i < MAX_ESCSEQLEN;i++) {\r
310 if (i >= *inleft)\r
311 return MBERR_TOOFEW;\r
312 if (IS_ESCEND((*inbuf)[i])) {\r
313 esclen = i + 1;\r
314 break;\r
315 }\r
316 else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&\r
317 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')\r
318 i += 2;\r
319 }\r
320\r
321 if (i >= MAX_ESCSEQLEN)\r
322 return 1; /* unterminated escape sequence */\r
323\r
324 switch (esclen) {\r
325 case 3:\r
326 if (IN2 == '$') {\r
327 charset = IN3 | CHARSET_DBCS;\r
328 designation = 0;\r
329 }\r
330 else {\r
331 charset = IN3;\r
332 if (IN2 == '(') designation = 0;\r
333 else if (IN2 == ')') designation = 1;\r
334 else if (CONFIG_ISSET(USE_G2) && IN2 == '.')\r
335 designation = 2;\r
336 else return 3;\r
337 }\r
338 break;\r
339 case 4:\r
340 if (IN2 != '$')\r
341 return 4;\r
342\r
343 charset = IN4 | CHARSET_DBCS;\r
344 if (IN3 == '(') designation = 0;\r
345 else if (IN3 == ')') designation = 1;\r
346 else return 4;\r
347 break;\r
348 case 6: /* designation with prefix */\r
349 if (CONFIG_ISSET(USE_JISX0208_EXT) &&\r
350 (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&\r
351 (*inbuf)[5] == 'B') {\r
352 charset = 'B' | CHARSET_DBCS;\r
353 designation = 0;\r
354 }\r
355 else\r
356 return 6;\r
357 break;\r
358 default:\r
359 return esclen;\r
360 }\r
361\r
362 /* raise error when the charset is not designated for this encoding */\r
363 if (charset != CHARSET_ASCII) {\r
364 const struct iso2022_designation *dsg;\r
365\r
366 for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)\r
367 if (dsg->mark == charset)\r
368 break;\r
369 if (!dsg->mark)\r
370 return esclen;\r
371 }\r
372\r
373 STATE_SETG(designation, charset)\r
374 *inleft -= esclen;\r
375 (*inbuf) += esclen;\r
376 return 0;\r
377}\r
378\r
379#define ISO8859_7_DECODE(c, assi) \\r
380 if ((c) < 0xa0) (assi) = (c); \\r
381 else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \\r
382 (assi) = (c); \\r
383 else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \\r
384 (0xbffffd77L & (1L << ((c)-0xb4))))) \\r
385 (assi) = 0x02d0 + (c); \\r
386 else if ((c) == 0xa1) (assi) = 0x2018; \\r
387 else if ((c) == 0xa2) (assi) = 0x2019; \\r
388 else if ((c) == 0xaf) (assi) = 0x2015;\r
389\r
390static Py_ssize_t\r
391iso2022processg2(const void *config, MultibyteCodec_State *state,\r
392 const unsigned char **inbuf, Py_ssize_t *inleft,\r
393 Py_UNICODE **outbuf, Py_ssize_t *outleft)\r
394{\r
395 /* not written to use encoder, decoder functions because only few\r
396 * encodings use G2 designations in CJKCodecs */\r
397 if (STATE_G2 == CHARSET_ISO8859_1) {\r
398 if (IN3 < 0x80)\r
399 OUT1(IN3 + 0x80)\r
400 else\r
401 return 3;\r
402 }\r
403 else if (STATE_G2 == CHARSET_ISO8859_7) {\r
404 ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)\r
405 else return 3;\r
406 }\r
407 else if (STATE_G2 == CHARSET_ASCII) {\r
408 if (IN3 & 0x80) return 3;\r
409 else **outbuf = IN3;\r
410 }\r
411 else\r
412 return MBERR_INTERNAL;\r
413\r
414 (*inbuf) += 3;\r
415 *inleft -= 3;\r
416 (*outbuf) += 1;\r
417 *outleft -= 1;\r
418 return 0;\r
419}\r
420\r
421DECODER(iso2022)\r
422{\r
423 const struct iso2022_designation *dsgcache = NULL;\r
424\r
425 while (inleft > 0) {\r
426 unsigned char c = IN1;\r
427 Py_ssize_t err;\r
428\r
429 if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {\r
430 /* ESC throughout mode:\r
431 * for non-iso2022 escape sequences */\r
432 WRITE1(c) /* assume as ISO-8859-1 */\r
433 NEXT(1, 1)\r
434 if (IS_ESCEND(c)) {\r
435 STATE_CLEARFLAG(F_ESCTHROUGHOUT)\r
436 }\r
437 continue;\r
438 }\r
439\r
440 switch (c) {\r
441 case ESC:\r
442 REQUIRE_INBUF(2)\r
443 if (IS_ISO2022ESC(IN2)) {\r
444 err = iso2022processesc(config, state,\r
445 inbuf, &inleft);\r
446 if (err != 0)\r
447 return err;\r
448 }\r
449 else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */\r
450 REQUIRE_INBUF(3)\r
451 err = iso2022processg2(config, state,\r
452 inbuf, &inleft, outbuf, &outleft);\r
453 if (err != 0)\r
454 return err;\r
455 }\r
456 else {\r
457 WRITE1(ESC)\r
458 STATE_SETFLAG(F_ESCTHROUGHOUT)\r
459 NEXT(1, 1)\r
460 }\r
461 break;\r
462 case SI:\r
463 if (CONFIG_ISSET(NO_SHIFT))\r
464 goto bypass;\r
465 STATE_CLEARFLAG(F_SHIFTED)\r
466 NEXT_IN(1)\r
467 break;\r
468 case SO:\r
469 if (CONFIG_ISSET(NO_SHIFT))\r
470 goto bypass;\r
471 STATE_SETFLAG(F_SHIFTED)\r
472 NEXT_IN(1)\r
473 break;\r
474 case LF:\r
475 STATE_CLEARFLAG(F_SHIFTED)\r
476 WRITE1(LF)\r
477 NEXT(1, 1)\r
478 break;\r
479 default:\r
480 if (c < 0x20) /* C0 */\r
481 goto bypass;\r
482 else if (c >= 0x80)\r
483 return 1;\r
484 else {\r
485 const struct iso2022_designation *dsg;\r
486 unsigned char charset;\r
487 ucs4_t decoded;\r
488\r
489 if (STATE_GETFLAG(F_SHIFTED))\r
490 charset = STATE_G1;\r
491 else\r
492 charset = STATE_G0;\r
493\r
494 if (charset == CHARSET_ASCII) {\r
495bypass: WRITE1(c)\r
496 NEXT(1, 1)\r
497 break;\r
498 }\r
499\r
500 if (dsgcache != NULL &&\r
501 dsgcache->mark == charset)\r
502 dsg = dsgcache;\r
503 else {\r
504 for (dsg = CONFIG_DESIGNATIONS;\r
505 dsg->mark != charset\r
506#ifdef Py_DEBUG\r
507 && dsg->mark != '\0'\r
508#endif\r
509 ;dsg++)\r
510 /* noop */;\r
511 assert(dsg->mark != '\0');\r
512 dsgcache = dsg;\r
513 }\r
514\r
515 REQUIRE_INBUF(dsg->width)\r
516 decoded = dsg->decoder(*inbuf);\r
517 if (decoded == MAP_UNMAPPABLE)\r
518 return dsg->width;\r
519\r
520 if (decoded < 0x10000) {\r
521 WRITE1(decoded)\r
522 NEXT_OUT(1)\r
523 }\r
524 else if (decoded < 0x30000) {\r
525 WRITEUCS4(decoded)\r
526 }\r
527 else { /* JIS X 0213 pairs */\r
528 WRITE2(decoded >> 16, decoded & 0xffff)\r
529 NEXT_OUT(2)\r
530 }\r
531 NEXT_IN(dsg->width)\r
532 }\r
533 break;\r
534 }\r
535 }\r
536 return 0;\r
537}\r
538\r
539/*-*- mapping table holders -*-*/\r
540\r
541#define ENCMAP(enc) static const encode_map *enc##_encmap = NULL;\r
542#define DECMAP(enc) static const decode_map *enc##_decmap = NULL;\r
543\r
544/* kr */\r
545ENCMAP(cp949)\r
546DECMAP(ksx1001)\r
547\r
548/* jp */\r
549ENCMAP(jisxcommon)\r
550DECMAP(jisx0208)\r
551DECMAP(jisx0212)\r
552ENCMAP(jisx0213_bmp)\r
553DECMAP(jisx0213_1_bmp)\r
554DECMAP(jisx0213_2_bmp)\r
555ENCMAP(jisx0213_emp)\r
556DECMAP(jisx0213_1_emp)\r
557DECMAP(jisx0213_2_emp)\r
558\r
559/* cn */\r
560ENCMAP(gbcommon)\r
561DECMAP(gb2312)\r
562\r
563/* tw */\r
564\r
565/*-*- mapping access functions -*-*/\r
566\r
567static int\r
568ksx1001_init(void)\r
569{\r
570 static int initialized = 0;\r
571\r
572 if (!initialized && (\r
573 IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||\r
574 IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))\r
575 return -1;\r
576 initialized = 1;\r
577 return 0;\r
578}\r
579\r
580static ucs4_t\r
581ksx1001_decoder(const unsigned char *data)\r
582{\r
583 ucs4_t u;\r
584 TRYMAP_DEC(ksx1001, u, data[0], data[1])\r
585 return u;\r
586 else\r
587 return MAP_UNMAPPABLE;\r
588}\r
589\r
590static DBCHAR\r
591ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)\r
592{\r
593 DBCHAR coded;\r
594 assert(*length == 1);\r
595 if (*data < 0x10000) {\r
596 TRYMAP_ENC(cp949, coded, *data)\r
597 if (!(coded & 0x8000))\r
598 return coded;\r
599 }\r
600 return MAP_UNMAPPABLE;\r
601}\r
602\r
603static int\r
604jisx0208_init(void)\r
605{\r
606 static int initialized = 0;\r
607\r
608 if (!initialized && (\r
609 IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||\r
610 IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))\r
611 return -1;\r
612 initialized = 1;\r
613 return 0;\r
614}\r
615\r
616static ucs4_t\r
617jisx0208_decoder(const unsigned char *data)\r
618{\r
619 ucs4_t u;\r
620 if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */\r
621 return 0xff3c;\r
622 else TRYMAP_DEC(jisx0208, u, data[0], data[1])\r
623 return u;\r
624 else\r
625 return MAP_UNMAPPABLE;\r
626}\r
627\r
628static DBCHAR\r
629jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)\r
630{\r
631 DBCHAR coded;\r
632 assert(*length == 1);\r
633 if (*data < 0x10000) {\r
634 if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */\r
635 return 0x2140;\r
636 else TRYMAP_ENC(jisxcommon, coded, *data) {\r
637 if (!(coded & 0x8000))\r
638 return coded;\r
639 }\r
640 }\r
641 return MAP_UNMAPPABLE;\r
642}\r
643\r
644static int\r
645jisx0212_init(void)\r
646{\r
647 static int initialized = 0;\r
648\r
649 if (!initialized && (\r
650 IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||\r
651 IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))\r
652 return -1;\r
653 initialized = 1;\r
654 return 0;\r
655}\r
656\r
657static ucs4_t\r
658jisx0212_decoder(const unsigned char *data)\r
659{\r
660 ucs4_t u;\r
661 TRYMAP_DEC(jisx0212, u, data[0], data[1])\r
662 return u;\r
663 else\r
664 return MAP_UNMAPPABLE;\r
665}\r
666\r
667static DBCHAR\r
668jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)\r
669{\r
670 DBCHAR coded;\r
671 assert(*length == 1);\r
672 if (*data < 0x10000) {\r
673 TRYMAP_ENC(jisxcommon, coded, *data) {\r
674 if (coded & 0x8000)\r
675 return coded & 0x7fff;\r
676 }\r
677 }\r
678 return MAP_UNMAPPABLE;\r
679}\r
680\r
681static int\r
682jisx0213_init(void)\r
683{\r
684 static int initialized = 0;\r
685\r
686 if (!initialized && (\r
687 jisx0208_init() ||\r
688 IMPORT_MAP(jp, jisx0213_bmp,\r
689 &jisx0213_bmp_encmap, NULL) ||\r
690 IMPORT_MAP(jp, jisx0213_1_bmp,\r
691 NULL, &jisx0213_1_bmp_decmap) ||\r
692 IMPORT_MAP(jp, jisx0213_2_bmp,\r
693 NULL, &jisx0213_2_bmp_decmap) ||\r
694 IMPORT_MAP(jp, jisx0213_emp,\r
695 &jisx0213_emp_encmap, NULL) ||\r
696 IMPORT_MAP(jp, jisx0213_1_emp,\r
697 NULL, &jisx0213_1_emp_decmap) ||\r
698 IMPORT_MAP(jp, jisx0213_2_emp,\r
699 NULL, &jisx0213_2_emp_decmap) ||\r
700 IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,\r
701 &jisx0213_pair_decmap)))\r
702 return -1;\r
703 initialized = 1;\r
704 return 0;\r
705}\r
706\r
707#define config ((void *)2000)\r
708static ucs4_t\r
709jisx0213_2000_1_decoder(const unsigned char *data)\r
710{\r
711 ucs4_t u;\r
712 EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])\r
713 else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */\r
714 return 0xff3c;\r
715 else TRYMAP_DEC(jisx0208, u, data[0], data[1]);\r
716 else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);\r
717 else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])\r
718 u |= 0x20000;\r
719 else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);\r
720 else\r
721 return MAP_UNMAPPABLE;\r
722 return u;\r
723}\r
724\r
725static ucs4_t\r
726jisx0213_2000_2_decoder(const unsigned char *data)\r
727{\r
728 ucs4_t u;\r
729 EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])\r
730 TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);\r
731 else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])\r
732 u |= 0x20000;\r
733 else\r
734 return MAP_UNMAPPABLE;\r
735 return u;\r
736}\r
737#undef config\r
738\r
739static ucs4_t\r
740jisx0213_2004_1_decoder(const unsigned char *data)\r
741{\r
742 ucs4_t u;\r
743 if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */\r
744 return 0xff3c;\r
745 else TRYMAP_DEC(jisx0208, u, data[0], data[1]);\r
746 else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);\r
747 else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])\r
748 u |= 0x20000;\r
749 else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);\r
750 else\r
751 return MAP_UNMAPPABLE;\r
752 return u;\r
753}\r
754\r
755static ucs4_t\r
756jisx0213_2004_2_decoder(const unsigned char *data)\r
757{\r
758 ucs4_t u;\r
759 TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);\r
760 else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])\r
761 u |= 0x20000;\r
762 else\r
763 return MAP_UNMAPPABLE;\r
764 return u;\r
765}\r
766\r
767static DBCHAR\r
768jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)\r
769{\r
770 DBCHAR coded;\r
771\r
772 switch (*length) {\r
773 case 1: /* first character */\r
774 if (*data >= 0x10000) {\r
775 if ((*data) >> 16 == 0x20000 >> 16) {\r
776 EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)\r
777 else TRYMAP_ENC(jisx0213_emp, coded,\r
778 (*data) & 0xffff)\r
779 return coded;\r
780 }\r
781 return MAP_UNMAPPABLE;\r
782 }\r
783\r
784 EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)\r
785 else TRYMAP_ENC(jisx0213_bmp, coded, *data) {\r
786 if (coded == MULTIC)\r
787 return MAP_MULTIPLE_AVAIL;\r
788 }\r
789 else TRYMAP_ENC(jisxcommon, coded, *data) {\r
790 if (coded & 0x8000)\r
791 return MAP_UNMAPPABLE;\r
792 }\r
793 else\r
794 return MAP_UNMAPPABLE;\r
795 return coded;\r
796 case 2: /* second character of unicode pair */\r
797 coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],\r
798 jisx0213_pair_encmap, JISX0213_ENCPAIRS);\r
799 if (coded == DBCINV) {\r
800 *length = 1;\r
801 coded = find_pairencmap((ucs2_t)data[0], 0,\r
802 jisx0213_pair_encmap, JISX0213_ENCPAIRS);\r
803 if (coded == DBCINV)\r
804 return MAP_UNMAPPABLE;\r
805 }\r
806 else\r
807 return coded;\r
808 case -1: /* flush unterminated */\r
809 *length = 1;\r
810 coded = find_pairencmap((ucs2_t)data[0], 0,\r
811 jisx0213_pair_encmap, JISX0213_ENCPAIRS);\r
812 if (coded == DBCINV)\r
813 return MAP_UNMAPPABLE;\r
814 else\r
815 return coded;\r
816 default:\r
817 return MAP_UNMAPPABLE;\r
818 }\r
819}\r
820\r
821static DBCHAR\r
822jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)\r
823{\r
824 DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);\r
825 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
826 return coded;\r
827 else if (coded & 0x8000)\r
828 return MAP_UNMAPPABLE;\r
829 else\r
830 return coded;\r
831}\r
832\r
833static DBCHAR\r
834jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)\r
835{\r
836 DBCHAR coded;\r
837 Py_ssize_t ilength = *length;\r
838\r
839 coded = jisx0213_encoder(data, length, (void *)2000);\r
840 switch (ilength) {\r
841 case 1:\r
842 if (coded == MAP_MULTIPLE_AVAIL)\r
843 return MAP_MULTIPLE_AVAIL;\r
844 else\r
845 return MAP_UNMAPPABLE;\r
846 case 2:\r
847 if (*length != 2)\r
848 return MAP_UNMAPPABLE;\r
849 else\r
850 return coded;\r
851 default:\r
852 return MAP_UNMAPPABLE;\r
853 }\r
854}\r
855\r
856static DBCHAR\r
857jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)\r
858{\r
859 DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);\r
860 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
861 return coded;\r
862 else if (coded & 0x8000)\r
863 return coded & 0x7fff;\r
864 else\r
865 return MAP_UNMAPPABLE;\r
866}\r
867\r
868static DBCHAR\r
869jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)\r
870{\r
871 DBCHAR coded = jisx0213_encoder(data, length, NULL);\r
872 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
873 return coded;\r
874 else if (coded & 0x8000)\r
875 return MAP_UNMAPPABLE;\r
876 else\r
877 return coded;\r
878}\r
879\r
880static DBCHAR\r
881jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)\r
882{\r
883 DBCHAR coded;\r
884 Py_ssize_t ilength = *length;\r
885\r
886 coded = jisx0213_encoder(data, length, NULL);\r
887 switch (ilength) {\r
888 case 1:\r
889 if (coded == MAP_MULTIPLE_AVAIL)\r
890 return MAP_MULTIPLE_AVAIL;\r
891 else\r
892 return MAP_UNMAPPABLE;\r
893 case 2:\r
894 if (*length != 2)\r
895 return MAP_UNMAPPABLE;\r
896 else\r
897 return coded;\r
898 default:\r
899 return MAP_UNMAPPABLE;\r
900 }\r
901}\r
902\r
903static DBCHAR\r
904jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)\r
905{\r
906 DBCHAR coded = jisx0213_encoder(data, length, NULL);\r
907 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
908 return coded;\r
909 else if (coded & 0x8000)\r
910 return coded & 0x7fff;\r
911 else\r
912 return MAP_UNMAPPABLE;\r
913}\r
914\r
915static ucs4_t\r
916jisx0201_r_decoder(const unsigned char *data)\r
917{\r
918 ucs4_t u;\r
919 JISX0201_R_DECODE(*data, u)\r
920 else return MAP_UNMAPPABLE;\r
921 return u;\r
922}\r
923\r
924static DBCHAR\r
925jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)\r
926{\r
927 DBCHAR coded;\r
928 JISX0201_R_ENCODE(*data, coded)\r
929 else return MAP_UNMAPPABLE;\r
930 return coded;\r
931}\r
932\r
933static ucs4_t\r
934jisx0201_k_decoder(const unsigned char *data)\r
935{\r
936 ucs4_t u;\r
937 JISX0201_K_DECODE(*data ^ 0x80, u)\r
938 else return MAP_UNMAPPABLE;\r
939 return u;\r
940}\r
941\r
942static DBCHAR\r
943jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)\r
944{\r
945 DBCHAR coded;\r
946 JISX0201_K_ENCODE(*data, coded)\r
947 else return MAP_UNMAPPABLE;\r
948 return coded - 0x80;\r
949}\r
950\r
951static int\r
952gb2312_init(void)\r
953{\r
954 static int initialized = 0;\r
955\r
956 if (!initialized && (\r
957 IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||\r
958 IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))\r
959 return -1;\r
960 initialized = 1;\r
961 return 0;\r
962}\r
963\r
964static ucs4_t\r
965gb2312_decoder(const unsigned char *data)\r
966{\r
967 ucs4_t u;\r
968 TRYMAP_DEC(gb2312, u, data[0], data[1])\r
969 return u;\r
970 else\r
971 return MAP_UNMAPPABLE;\r
972}\r
973\r
974static DBCHAR\r
975gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)\r
976{\r
977 DBCHAR coded;\r
978 assert(*length == 1);\r
979 if (*data < 0x10000) {\r
980 TRYMAP_ENC(gbcommon, coded, *data) {\r
981 if (!(coded & 0x8000))\r
982 return coded;\r
983 }\r
984 }\r
985 return MAP_UNMAPPABLE;\r
986}\r
987\r
988\r
989static ucs4_t\r
990dummy_decoder(const unsigned char *data)\r
991{\r
992 return MAP_UNMAPPABLE;\r
993}\r
994\r
995static DBCHAR\r
996dummy_encoder(const ucs4_t *data, Py_ssize_t *length)\r
997{\r
998 return MAP_UNMAPPABLE;\r
999}\r
1000\r
1001/*-*- registry tables -*-*/\r
1002\r
1003#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \\r
1004 ksx1001_init, \\r
1005 ksx1001_decoder, ksx1001_encoder }\r
1006#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \\r
1007 ksx1001_init, \\r
1008 ksx1001_decoder, ksx1001_encoder }\r
1009#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \\r
1010 NULL, \\r
1011 jisx0201_r_decoder, jisx0201_r_encoder }\r
1012#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \\r
1013 NULL, \\r
1014 jisx0201_k_decoder, jisx0201_k_encoder }\r
1015#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \\r
1016 jisx0208_init, \\r
1017 jisx0208_decoder, jisx0208_encoder }\r
1018#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \\r
1019 jisx0208_init, \\r
1020 jisx0208_decoder, jisx0208_encoder }\r
1021#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \\r
1022 jisx0212_init, \\r
1023 jisx0212_decoder, jisx0212_encoder }\r
1024#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \\r
1025 jisx0213_init, \\r
1026 jisx0213_2000_1_decoder, \\r
1027 jisx0213_2000_1_encoder }\r
1028#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \\r
1029 jisx0213_init, \\r
1030 jisx0213_2000_1_decoder, \\r
1031 jisx0213_2000_1_encoder_paironly }\r
1032#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \\r
1033 jisx0213_init, \\r
1034 jisx0213_2000_2_decoder, \\r
1035 jisx0213_2000_2_encoder }\r
1036#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \\r
1037 jisx0213_init, \\r
1038 jisx0213_2004_1_decoder, \\r
1039 jisx0213_2004_1_encoder }\r
1040#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \\r
1041 jisx0213_init, \\r
1042 jisx0213_2004_1_decoder, \\r
1043 jisx0213_2004_1_encoder_paironly }\r
1044#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \\r
1045 jisx0213_init, \\r
1046 jisx0213_2004_2_decoder, \\r
1047 jisx0213_2004_2_encoder }\r
1048#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \\r
1049 gb2312_init, \\r
1050 gb2312_decoder, gb2312_encoder }\r
1051#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \\r
1052 cns11643_init, \\r
1053 cns11643_1_decoder, cns11643_1_encoder }\r
1054#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \\r
1055 cns11643_init, \\r
1056 cns11643_2_decoder, cns11643_2_encoder }\r
1057#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \\r
1058 NULL, dummy_decoder, dummy_encoder }\r
1059#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \\r
1060 NULL, dummy_decoder, dummy_encoder }\r
1061#define REGISTRY_SENTINEL { 0, }\r
1062#define CONFIGDEF(var, attrs) \\r
1063 static const struct iso2022_config iso2022_##var##_config = { \\r
1064 attrs, iso2022_##var##_designations \\r
1065 };\r
1066\r
1067static const struct iso2022_designation iso2022_kr_designations[] = {\r
1068 REGISTRY_KSX1001_G1, REGISTRY_SENTINEL\r
1069};\r
1070CONFIGDEF(kr, 0)\r
1071\r
1072static const struct iso2022_designation iso2022_jp_designations[] = {\r
1073 REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,\r
1074 REGISTRY_SENTINEL\r
1075};\r
1076CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT)\r
1077\r
1078static const struct iso2022_designation iso2022_jp_1_designations[] = {\r
1079 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,\r
1080 REGISTRY_JISX0208_O, REGISTRY_SENTINEL\r
1081};\r
1082CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)\r
1083\r
1084static const struct iso2022_designation iso2022_jp_2_designations[] = {\r
1085 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,\r
1086 REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,\r
1087 REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL\r
1088};\r
1089CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT)\r
1090\r
1091static const struct iso2022_designation iso2022_jp_2004_designations[] = {\r
1092 REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,\r
1093 REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL\r
1094};\r
1095CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT)\r
1096\r
1097static const struct iso2022_designation iso2022_jp_3_designations[] = {\r
1098 REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,\r
1099 REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL\r
1100};\r
1101CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT)\r
1102\r
1103static const struct iso2022_designation iso2022_jp_ext_designations[] = {\r
1104 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,\r
1105 REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL\r
1106};\r
1107CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)\r
1108\r
1109\r
1110BEGIN_MAPPINGS_LIST\r
1111 /* no mapping table here */\r
1112END_MAPPINGS_LIST\r
1113\r
1114#define ISO2022_CODEC(variation) { \\r
1115 "iso2022_" #variation, \\r
1116 &iso2022_##variation##_config, \\r
1117 iso2022_codec_init, \\r
1118 _STATEFUL_METHODS(iso2022) \\r
1119},\r
1120\r
1121BEGIN_CODECS_LIST\r
1122 ISO2022_CODEC(kr)\r
1123 ISO2022_CODEC(jp)\r
1124 ISO2022_CODEC(jp_1)\r
1125 ISO2022_CODEC(jp_2)\r
1126 ISO2022_CODEC(jp_2004)\r
1127 ISO2022_CODEC(jp_3)\r
1128 ISO2022_CODEC(jp_ext)\r
1129END_CODECS_LIST\r
1130\r
1131I_AM_A_MODULE_FOR(iso2022)\r