]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Modules/cjkcodecs/_codecs_kr.c
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 2/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Modules / cjkcodecs / _codecs_kr.c
CommitLineData
7eb75bcc
DM
1/*\r
2 * _codecs_kr.c: Codecs collection for Korean encodings\r
3 *\r
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>\r
5 */\r
6\r
7#include "cjkcodecs.h"\r
8#include "mappings_kr.h"\r
9\r
10/*\r
11 * EUC-KR codec\r
12 */\r
13\r
14#define EUCKR_JAMO_FIRSTBYTE 0xA4\r
15#define EUCKR_JAMO_FILLER 0xD4\r
16\r
17static const unsigned char u2cgk_choseong[19] = {\r
18 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,\r
19 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,\r
20 0xbc, 0xbd, 0xbe\r
21};\r
22static const unsigned char u2cgk_jungseong[21] = {\r
23 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,\r
24 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,\r
25 0xcf, 0xd0, 0xd1, 0xd2, 0xd3\r
26};\r
27static const unsigned char u2cgk_jongseong[28] = {\r
28 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,\r
29 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,\r
30 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,\r
31 0xbb, 0xbc, 0xbd, 0xbe\r
32};\r
33\r
34ENCODER(euc_kr)\r
35{\r
36 while (inleft > 0) {\r
37 Py_UNICODE c = IN1;\r
38 DBCHAR code;\r
39\r
40 if (c < 0x80) {\r
41 WRITE1((unsigned char)c)\r
42 NEXT(1, 1)\r
43 continue;\r
44 }\r
45 UCS4INVALID(c)\r
46\r
47 REQUIRE_OUTBUF(2)\r
48 TRYMAP_ENC(cp949, code, c);\r
49 else return 1;\r
50\r
51 if ((code & 0x8000) == 0) {\r
52 /* KS X 1001 coded character */\r
53 OUT1((code >> 8) | 0x80)\r
54 OUT2((code & 0xFF) | 0x80)\r
55 NEXT(1, 2)\r
56 }\r
57 else { /* Mapping is found in CP949 extension,\r
58 * but we encode it in KS X 1001:1998 Annex 3,\r
59 * make-up sequence for EUC-KR. */\r
60\r
61 REQUIRE_OUTBUF(8)\r
62\r
63 /* syllable composition precedence */\r
64 OUT1(EUCKR_JAMO_FIRSTBYTE)\r
65 OUT2(EUCKR_JAMO_FILLER)\r
66\r
67 /* All code points in CP949 extension are in unicode\r
68 * Hangul Syllable area. */\r
69 assert(0xac00 <= c && c <= 0xd7a3);\r
70 c -= 0xac00;\r
71\r
72 OUT3(EUCKR_JAMO_FIRSTBYTE)\r
73 OUT4(u2cgk_choseong[c / 588])\r
74 NEXT_OUT(4)\r
75\r
76 OUT1(EUCKR_JAMO_FIRSTBYTE)\r
77 OUT2(u2cgk_jungseong[(c / 28) % 21])\r
78 OUT3(EUCKR_JAMO_FIRSTBYTE)\r
79 OUT4(u2cgk_jongseong[c % 28])\r
80 NEXT(1, 4)\r
81 }\r
82 }\r
83\r
84 return 0;\r
85}\r
86\r
87#define NONE 127\r
88\r
89static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */\r
90 0, 1, NONE, 2, NONE, NONE, 3, 4,\r
91 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,\r
92 6, 7, 8, NONE, 9, 10, 11, 12,\r
93 13, 14, 15, 16, 17, 18\r
94};\r
95static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */\r
96 1, 2, 3, 4, 5, 6, 7, NONE,\r
97 8, 9, 10, 11, 12, 13, 14, 15,\r
98 16, 17, NONE, 18, 19, 20, 21, 22,\r
99 NONE, 23, 24, 25, 26, 27\r
100};\r
101\r
102DECODER(euc_kr)\r
103{\r
104 while (inleft > 0) {\r
105 unsigned char c = IN1;\r
106\r
107 REQUIRE_OUTBUF(1)\r
108\r
109 if (c < 0x80) {\r
110 OUT1(c)\r
111 NEXT(1, 1)\r
112 continue;\r
113 }\r
114\r
115 REQUIRE_INBUF(2)\r
116\r
117 if (c == EUCKR_JAMO_FIRSTBYTE &&\r
118 IN2 == EUCKR_JAMO_FILLER) {\r
119 /* KS X 1001:1998 Annex 3 make-up sequence */\r
120 DBCHAR cho, jung, jong;\r
121\r
122 REQUIRE_INBUF(8)\r
123 if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||\r
124 (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||\r
125 (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)\r
126 return 8;\r
127\r
128 c = (*inbuf)[3];\r
129 if (0xa1 <= c && c <= 0xbe)\r
130 cho = cgk2u_choseong[c - 0xa1];\r
131 else\r
132 cho = NONE;\r
133\r
134 c = (*inbuf)[5];\r
135 jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;\r
136\r
137 c = (*inbuf)[7];\r
138 if (c == EUCKR_JAMO_FILLER)\r
139 jong = 0;\r
140 else if (0xa1 <= c && c <= 0xbe)\r
141 jong = cgk2u_jongseong[c - 0xa1];\r
142 else\r
143 jong = NONE;\r
144\r
145 if (cho == NONE || jung == NONE || jong == NONE)\r
146 return 8;\r
147\r
148 OUT1(0xac00 + cho*588 + jung*28 + jong);\r
149 NEXT(8, 1)\r
150 }\r
151 else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {\r
152 NEXT(2, 1)\r
153 }\r
154 else\r
155 return 2;\r
156 }\r
157\r
158 return 0;\r
159}\r
160#undef NONE\r
161\r
162\r
163/*\r
164 * CP949 codec\r
165 */\r
166\r
167ENCODER(cp949)\r
168{\r
169 while (inleft > 0) {\r
170 Py_UNICODE c = IN1;\r
171 DBCHAR code;\r
172\r
173 if (c < 0x80) {\r
174 WRITE1((unsigned char)c)\r
175 NEXT(1, 1)\r
176 continue;\r
177 }\r
178 UCS4INVALID(c)\r
179\r
180 REQUIRE_OUTBUF(2)\r
181 TRYMAP_ENC(cp949, code, c);\r
182 else return 1;\r
183\r
184 OUT1((code >> 8) | 0x80)\r
185 if (code & 0x8000)\r
186 OUT2(code & 0xFF) /* MSB set: CP949 */\r
187 else\r
188 OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */\r
189 NEXT(1, 2)\r
190 }\r
191\r
192 return 0;\r
193}\r
194\r
195DECODER(cp949)\r
196{\r
197 while (inleft > 0) {\r
198 unsigned char c = IN1;\r
199\r
200 REQUIRE_OUTBUF(1)\r
201\r
202 if (c < 0x80) {\r
203 OUT1(c)\r
204 NEXT(1, 1)\r
205 continue;\r
206 }\r
207\r
208 REQUIRE_INBUF(2)\r
209 TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);\r
210 else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);\r
211 else return 2;\r
212\r
213 NEXT(2, 1)\r
214 }\r
215\r
216 return 0;\r
217}\r
218\r
219\r
220/*\r
221 * JOHAB codec\r
222 */\r
223\r
224static const unsigned char u2johabidx_choseong[32] = {\r
225 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,\r
226 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,\r
227 0x10, 0x11, 0x12, 0x13, 0x14,\r
228};\r
229static const unsigned char u2johabidx_jungseong[32] = {\r
230 0x03, 0x04, 0x05, 0x06, 0x07,\r
231 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,\r
232 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,\r
233 0x1a, 0x1b, 0x1c, 0x1d,\r
234};\r
235static const unsigned char u2johabidx_jongseong[32] = {\r
236 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,\r
237 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,\r
238 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,\r
239 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,\r
240};\r
241static const DBCHAR u2johabjamo[] = {\r
242 0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,\r
243 0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,\r
244 0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,\r
245 0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,\r
246 0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,\r
247 0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,\r
248 0x8741, 0x8761, 0x8781, 0x87a1,\r
249};\r
250\r
251ENCODER(johab)\r
252{\r
253 while (inleft > 0) {\r
254 Py_UNICODE c = IN1;\r
255 DBCHAR code;\r
256\r
257 if (c < 0x80) {\r
258 WRITE1((unsigned char)c)\r
259 NEXT(1, 1)\r
260 continue;\r
261 }\r
262 UCS4INVALID(c)\r
263\r
264 REQUIRE_OUTBUF(2)\r
265\r
266 if (c >= 0xac00 && c <= 0xd7a3) {\r
267 c -= 0xac00;\r
268 code = 0x8000 |\r
269 (u2johabidx_choseong[c / 588] << 10) |\r
270 (u2johabidx_jungseong[(c / 28) % 21] << 5) |\r
271 u2johabidx_jongseong[c % 28];\r
272 }\r
273 else if (c >= 0x3131 && c <= 0x3163)\r
274 code = u2johabjamo[c - 0x3131];\r
275 else TRYMAP_ENC(cp949, code, c) {\r
276 unsigned char c1, c2, t2;\r
277 unsigned short t1;\r
278\r
279 assert((code & 0x8000) == 0);\r
280 c1 = code >> 8;\r
281 c2 = code & 0xff;\r
282 if (((c1 >= 0x21 && c1 <= 0x2c) ||\r
283 (c1 >= 0x4a && c1 <= 0x7d)) &&\r
284 (c2 >= 0x21 && c2 <= 0x7e)) {\r
285 t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :\r
286 (c1 - 0x21 + 0x197));\r
287 t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);\r
288 OUT1(t1 >> 1)\r
289 OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)\r
290 NEXT(1, 2)\r
291 continue;\r
292 }\r
293 else\r
294 return 1;\r
295 }\r
296 else\r
297 return 1;\r
298\r
299 OUT1(code >> 8)\r
300 OUT2(code & 0xff)\r
301 NEXT(1, 2)\r
302 }\r
303\r
304 return 0;\r
305}\r
306\r
307#define FILL 0xfd\r
308#define NONE 0xff\r
309\r
310static const unsigned char johabidx_choseong[32] = {\r
311 NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,\r
312 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,\r
313 0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,\r
314 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,\r
315};\r
316static const unsigned char johabidx_jungseong[32] = {\r
317 NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,\r
318 NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,\r
319 NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,\r
320 NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,\r
321};\r
322static const unsigned char johabidx_jongseong[32] = {\r
323 NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,\r
324 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,\r
325 0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,\r
326 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,\r
327};\r
328\r
329static const unsigned char johabjamo_choseong[32] = {\r
330 NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,\r
331 0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,\r
332 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,\r
333 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,\r
334};\r
335static const unsigned char johabjamo_jungseong[32] = {\r
336 NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,\r
337 NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,\r
338 NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,\r
339 NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,\r
340};\r
341static const unsigned char johabjamo_jongseong[32] = {\r
342 NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,\r
343 0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,\r
344 0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,\r
345 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,\r
346};\r
347\r
348DECODER(johab)\r
349{\r
350 while (inleft > 0) {\r
351 unsigned char c = IN1, c2;\r
352\r
353 REQUIRE_OUTBUF(1)\r
354\r
355 if (c < 0x80) {\r
356 OUT1(c)\r
357 NEXT(1, 1)\r
358 continue;\r
359 }\r
360\r
361 REQUIRE_INBUF(2)\r
362 c2 = IN2;\r
363\r
364 if (c < 0xd8) {\r
365 /* johab hangul */\r
366 unsigned char c_cho, c_jung, c_jong;\r
367 unsigned char i_cho, i_jung, i_jong;\r
368\r
369 c_cho = (c >> 2) & 0x1f;\r
370 c_jung = ((c << 3) | c2 >> 5) & 0x1f;\r
371 c_jong = c2 & 0x1f;\r
372\r
373 i_cho = johabidx_choseong[c_cho];\r
374 i_jung = johabidx_jungseong[c_jung];\r
375 i_jong = johabidx_jongseong[c_jong];\r
376\r
377 if (i_cho == NONE || i_jung == NONE || i_jong == NONE)\r
378 return 2;\r
379\r
380 /* we don't use U+1100 hangul jamo yet. */\r
381 if (i_cho == FILL) {\r
382 if (i_jung == FILL) {\r
383 if (i_jong == FILL)\r
384 OUT1(0x3000)\r
385 else\r
386 OUT1(0x3100 |\r
387 johabjamo_jongseong[c_jong])\r
388 }\r
389 else {\r
390 if (i_jong == FILL)\r
391 OUT1(0x3100 |\r
392 johabjamo_jungseong[c_jung])\r
393 else\r
394 return 2;\r
395 }\r
396 } else {\r
397 if (i_jung == FILL) {\r
398 if (i_jong == FILL)\r
399 OUT1(0x3100 |\r
400 johabjamo_choseong[c_cho])\r
401 else\r
402 return 2;\r
403 }\r
404 else\r
405 OUT1(0xac00 +\r
406 i_cho * 588 +\r
407 i_jung * 28 +\r
408 (i_jong == FILL ? 0 : i_jong))\r
409 }\r
410 NEXT(2, 1)\r
411 } else {\r
412 /* KS X 1001 except hangul jamos and syllables */\r
413 if (c == 0xdf || c > 0xf9 ||\r
414 c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||\r
415 (c2 & 0x7f) == 0x7f ||\r
416 (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))\r
417 return 2;\r
418 else {\r
419 unsigned char t1, t2;\r
420\r
421 t1 = (c < 0xe0 ? 2 * (c - 0xd9) :\r
422 2 * c - 0x197);\r
423 t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);\r
424 t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;\r
425 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;\r
426\r
427 TRYMAP_DEC(ksx1001, **outbuf, t1, t2);\r
428 else return 2;\r
429 NEXT(2, 1)\r
430 }\r
431 }\r
432 }\r
433\r
434 return 0;\r
435}\r
436#undef NONE\r
437#undef FILL\r
438\r
439\r
440BEGIN_MAPPINGS_LIST\r
441 MAPPING_DECONLY(ksx1001)\r
442 MAPPING_ENCONLY(cp949)\r
443 MAPPING_DECONLY(cp949ext)\r
444END_MAPPINGS_LIST\r
445\r
446BEGIN_CODECS_LIST\r
447 CODEC_STATELESS(euc_kr)\r
448 CODEC_STATELESS(cp949)\r
449 CODEC_STATELESS(johab)\r
450END_CODECS_LIST\r
451\r
452I_AM_A_MODULE_FOR(kr)\r