]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Modules/cjkcodecs/_codecs_cn.c
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 2/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Modules / cjkcodecs / _codecs_cn.c
CommitLineData
7eb75bcc
DM
1/*\r
2 * _codecs_cn.c: Codecs collection for Mainland Chinese encodings\r
3 *\r
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>\r
5 */\r
6\r
7#include "cjkcodecs.h"\r
8#include "mappings_cn.h"\r
9\r
10/**\r
11 * hz is predefined as 100 on AIX. So we undefine it to avoid\r
12 * conflict against hz codec's.\r
13 */\r
14#ifdef _AIX\r
15#undef hz\r
16#endif\r
17\r
18/* GBK and GB2312 map differently in few code points that are listed below:\r
19 *\r
20 * gb2312 gbk\r
21 * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT\r
22 * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH\r
23 * A844 undefined U+2015 HORIZONTAL BAR\r
24 */\r
25\r
26#define GBK_DECODE(dc1, dc2, assi) \\r
27 if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \\r
28 else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \\r
29 else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \\r
30 else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \\r
31 else TRYMAP_DEC(gbkext, assi, dc1, dc2);\r
32\r
33#define GBK_ENCODE(code, assi) \\r
34 if ((code) == 0x2014) (assi) = 0xa1aa; \\r
35 else if ((code) == 0x2015) (assi) = 0xa844; \\r
36 else if ((code) == 0x00b7) (assi) = 0xa1a4; \\r
37 else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));\r
38\r
39/*\r
40 * GB2312 codec\r
41 */\r
42\r
43ENCODER(gb2312)\r
44{\r
45 while (inleft > 0) {\r
46 Py_UNICODE c = IN1;\r
47 DBCHAR code;\r
48\r
49 if (c < 0x80) {\r
50 WRITE1((unsigned char)c)\r
51 NEXT(1, 1)\r
52 continue;\r
53 }\r
54 UCS4INVALID(c)\r
55\r
56 REQUIRE_OUTBUF(2)\r
57 TRYMAP_ENC(gbcommon, code, c);\r
58 else return 1;\r
59\r
60 if (code & 0x8000) /* MSB set: GBK */\r
61 return 1;\r
62\r
63 OUT1((code >> 8) | 0x80)\r
64 OUT2((code & 0xFF) | 0x80)\r
65 NEXT(1, 2)\r
66 }\r
67\r
68 return 0;\r
69}\r
70\r
71DECODER(gb2312)\r
72{\r
73 while (inleft > 0) {\r
74 unsigned char c = **inbuf;\r
75\r
76 REQUIRE_OUTBUF(1)\r
77\r
78 if (c < 0x80) {\r
79 OUT1(c)\r
80 NEXT(1, 1)\r
81 continue;\r
82 }\r
83\r
84 REQUIRE_INBUF(2)\r
85 TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {\r
86 NEXT(2, 1)\r
87 }\r
88 else return 2;\r
89 }\r
90\r
91 return 0;\r
92}\r
93\r
94\r
95/*\r
96 * GBK codec\r
97 */\r
98\r
99ENCODER(gbk)\r
100{\r
101 while (inleft > 0) {\r
102 Py_UNICODE c = IN1;\r
103 DBCHAR code;\r
104\r
105 if (c < 0x80) {\r
106 WRITE1((unsigned char)c)\r
107 NEXT(1, 1)\r
108 continue;\r
109 }\r
110 UCS4INVALID(c)\r
111\r
112 REQUIRE_OUTBUF(2)\r
113\r
114 GBK_ENCODE(c, code)\r
115 else return 1;\r
116\r
117 OUT1((code >> 8) | 0x80)\r
118 if (code & 0x8000)\r
119 OUT2((code & 0xFF)) /* MSB set: GBK */\r
120 else\r
121 OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */\r
122 NEXT(1, 2)\r
123 }\r
124\r
125 return 0;\r
126}\r
127\r
128DECODER(gbk)\r
129{\r
130 while (inleft > 0) {\r
131 unsigned char c = IN1;\r
132\r
133 REQUIRE_OUTBUF(1)\r
134\r
135 if (c < 0x80) {\r
136 OUT1(c)\r
137 NEXT(1, 1)\r
138 continue;\r
139 }\r
140\r
141 REQUIRE_INBUF(2)\r
142\r
143 GBK_DECODE(c, IN2, **outbuf)\r
144 else return 2;\r
145\r
146 NEXT(2, 1)\r
147 }\r
148\r
149 return 0;\r
150}\r
151\r
152\r
153/*\r
154 * GB18030 codec\r
155 */\r
156\r
157ENCODER(gb18030)\r
158{\r
159 while (inleft > 0) {\r
160 ucs4_t c = IN1;\r
161 DBCHAR code;\r
162\r
163 if (c < 0x80) {\r
164 WRITE1(c)\r
165 NEXT(1, 1)\r
166 continue;\r
167 }\r
168\r
169 DECODE_SURROGATE(c)\r
170 if (c > 0x10FFFF)\r
171#if Py_UNICODE_SIZE == 2\r
172 return 2; /* surrogates pair */\r
173#else\r
174 return 1;\r
175#endif\r
176 else if (c >= 0x10000) {\r
177 ucs4_t tc = c - 0x10000;\r
178\r
179 REQUIRE_OUTBUF(4)\r
180\r
181 OUT4((unsigned char)(tc % 10) + 0x30)\r
182 tc /= 10;\r
183 OUT3((unsigned char)(tc % 126) + 0x81)\r
184 tc /= 126;\r
185 OUT2((unsigned char)(tc % 10) + 0x30)\r
186 tc /= 10;\r
187 OUT1((unsigned char)(tc + 0x90))\r
188\r
189#if Py_UNICODE_SIZE == 2\r
190 NEXT(2, 4) /* surrogates pair */\r
191#else\r
192 NEXT(1, 4)\r
193#endif\r
194 continue;\r
195 }\r
196\r
197 REQUIRE_OUTBUF(2)\r
198\r
199 GBK_ENCODE(c, code)\r
200 else TRYMAP_ENC(gb18030ext, code, c);\r
201 else {\r
202 const struct _gb18030_to_unibmp_ranges *utrrange;\r
203\r
204 REQUIRE_OUTBUF(4)\r
205\r
206 for (utrrange = gb18030_to_unibmp_ranges;\r
207 utrrange->first != 0;\r
208 utrrange++)\r
209 if (utrrange->first <= c &&\r
210 c <= utrrange->last) {\r
211 Py_UNICODE tc;\r
212\r
213 tc = c - utrrange->first +\r
214 utrrange->base;\r
215\r
216 OUT4((unsigned char)(tc % 10) + 0x30)\r
217 tc /= 10;\r
218 OUT3((unsigned char)(tc % 126) + 0x81)\r
219 tc /= 126;\r
220 OUT2((unsigned char)(tc % 10) + 0x30)\r
221 tc /= 10;\r
222 OUT1((unsigned char)tc + 0x81)\r
223\r
224 NEXT(1, 4)\r
225 break;\r
226 }\r
227\r
228 if (utrrange->first == 0)\r
229 return 1;\r
230 continue;\r
231 }\r
232\r
233 OUT1((code >> 8) | 0x80)\r
234 if (code & 0x8000)\r
235 OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */\r
236 else\r
237 OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */\r
238\r
239 NEXT(1, 2)\r
240 }\r
241\r
242 return 0;\r
243}\r
244\r
245DECODER(gb18030)\r
246{\r
247 while (inleft > 0) {\r
248 unsigned char c = IN1, c2;\r
249\r
250 REQUIRE_OUTBUF(1)\r
251\r
252 if (c < 0x80) {\r
253 OUT1(c)\r
254 NEXT(1, 1)\r
255 continue;\r
256 }\r
257\r
258 REQUIRE_INBUF(2)\r
259\r
260 c2 = IN2;\r
261 if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */\r
262 const struct _gb18030_to_unibmp_ranges *utr;\r
263 unsigned char c3, c4;\r
264 ucs4_t lseq;\r
265\r
266 REQUIRE_INBUF(4)\r
267 c3 = IN3;\r
268 c4 = IN4;\r
269 if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)\r
270 return 4;\r
271 c -= 0x81; c2 -= 0x30;\r
272 c3 -= 0x81; c4 -= 0x30;\r
273\r
274 if (c < 4) { /* U+0080 - U+FFFF */\r
275 lseq = ((ucs4_t)c * 10 + c2) * 1260 +\r
276 (ucs4_t)c3 * 10 + c4;\r
277 if (lseq < 39420) {\r
278 for (utr = gb18030_to_unibmp_ranges;\r
279 lseq >= (utr + 1)->base;\r
280 utr++) ;\r
281 OUT1(utr->first - utr->base + lseq)\r
282 NEXT(4, 1)\r
283 continue;\r
284 }\r
285 }\r
286 else if (c >= 15) { /* U+10000 - U+10FFFF */\r
287 lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)\r
288 * 1260 + (ucs4_t)c3 * 10 + c4;\r
289 if (lseq <= 0x10FFFF) {\r
290 WRITEUCS4(lseq);\r
291 NEXT_IN(4)\r
292 continue;\r
293 }\r
294 }\r
295 return 4;\r
296 }\r
297\r
298 GBK_DECODE(c, c2, **outbuf)\r
299 else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);\r
300 else return 2;\r
301\r
302 NEXT(2, 1)\r
303 }\r
304\r
305 return 0;\r
306}\r
307\r
308\r
309/*\r
310 * HZ codec\r
311 */\r
312\r
313ENCODER_INIT(hz)\r
314{\r
315 state->i = 0;\r
316 return 0;\r
317}\r
318\r
319ENCODER_RESET(hz)\r
320{\r
321 if (state->i != 0) {\r
322 WRITE2('~', '}')\r
323 state->i = 0;\r
324 NEXT_OUT(2)\r
325 }\r
326 return 0;\r
327}\r
328\r
329ENCODER(hz)\r
330{\r
331 while (inleft > 0) {\r
332 Py_UNICODE c = IN1;\r
333 DBCHAR code;\r
334\r
335 if (c < 0x80) {\r
336 if (state->i == 0) {\r
337 WRITE1((unsigned char)c)\r
338 NEXT(1, 1)\r
339 }\r
340 else {\r
341 WRITE3('~', '}', (unsigned char)c)\r
342 NEXT(1, 3)\r
343 state->i = 0;\r
344 }\r
345 continue;\r
346 }\r
347\r
348 UCS4INVALID(c)\r
349\r
350 TRYMAP_ENC(gbcommon, code, c);\r
351 else return 1;\r
352\r
353 if (code & 0x8000) /* MSB set: GBK */\r
354 return 1;\r
355\r
356 if (state->i == 0) {\r
357 WRITE4('~', '{', code >> 8, code & 0xff)\r
358 NEXT(1, 4)\r
359 state->i = 1;\r
360 }\r
361 else {\r
362 WRITE2(code >> 8, code & 0xff)\r
363 NEXT(1, 2)\r
364 }\r
365 }\r
366\r
367 return 0;\r
368}\r
369\r
370DECODER_INIT(hz)\r
371{\r
372 state->i = 0;\r
373 return 0;\r
374}\r
375\r
376DECODER_RESET(hz)\r
377{\r
378 state->i = 0;\r
379 return 0;\r
380}\r
381\r
382DECODER(hz)\r
383{\r
384 while (inleft > 0) {\r
385 unsigned char c = IN1;\r
386\r
387 if (c == '~') {\r
388 unsigned char c2 = IN2;\r
389\r
390 REQUIRE_INBUF(2)\r
391 if (c2 == '~') {\r
392 WRITE1('~')\r
393 NEXT(2, 1)\r
394 continue;\r
395 }\r
396 else if (c2 == '{' && state->i == 0)\r
397 state->i = 1; /* set GB */\r
398 else if (c2 == '}' && state->i == 1)\r
399 state->i = 0; /* set ASCII */\r
400 else if (c2 == '\n')\r
401 ; /* line-continuation */\r
402 else\r
403 return 2;\r
404 NEXT(2, 0);\r
405 continue;\r
406 }\r
407\r
408 if (c & 0x80)\r
409 return 1;\r
410\r
411 if (state->i == 0) { /* ASCII mode */\r
412 WRITE1(c)\r
413 NEXT(1, 1)\r
414 }\r
415 else { /* GB mode */\r
416 REQUIRE_INBUF(2)\r
417 REQUIRE_OUTBUF(1)\r
418 TRYMAP_DEC(gb2312, **outbuf, c, IN2) {\r
419 NEXT(2, 1)\r
420 }\r
421 else\r
422 return 2;\r
423 }\r
424 }\r
425\r
426 return 0;\r
427}\r
428\r
429\r
430BEGIN_MAPPINGS_LIST\r
431 MAPPING_DECONLY(gb2312)\r
432 MAPPING_DECONLY(gbkext)\r
433 MAPPING_ENCONLY(gbcommon)\r
434 MAPPING_ENCDEC(gb18030ext)\r
435END_MAPPINGS_LIST\r
436\r
437BEGIN_CODECS_LIST\r
438 CODEC_STATELESS(gb2312)\r
439 CODEC_STATELESS(gbk)\r
440 CODEC_STATELESS(gb18030)\r
441 CODEC_STATEFUL(hz)\r
442END_CODECS_LIST\r
443\r
444I_AM_A_MODULE_FOR(cn)\r