]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | /* |
2 | * Convert an 8-bit input string (e.g. ISO-8859-1) into CESU-8. | |
3 | * Calling code supplies the "code page" as a 256-entry array of | |
4 | * codepoints for the conversion. | |
5 | * | |
6 | * This is useful when input data is in non-UTF-8 format and must | |
7 | * be converted at runtime, e.g. when compiling non-UTF-8 source | |
8 | * code. Another alternative is to use e.g. iconv. | |
9 | */ | |
10 | ||
11 | #include "duktape.h" | |
12 | ||
13 | /* Decode an 8-bit string using 'codepage' into Unicode codepoints and | |
14 | * re-encode into CESU-8. Codepage argument must point to a 256-entry | |
15 | * table. Only supports BMP (codepoints U+0000 to U+FFFF). | |
16 | */ | |
17 | void duk_decode_string_codepage(duk_context *ctx, const char *str, size_t len, unsigned int *codepage) { | |
18 | unsigned char *tmp; | |
19 | size_t tmplen, i; | |
20 | unsigned char *p; | |
21 | unsigned int cp; | |
22 | ||
23 | tmplen = 3 * len; /* max expansion is 1 input byte -> 3 output bytes */ | |
24 | if (tmplen / 3 != len) { | |
25 | /* Temporary buffer length wraps. */ | |
26 | duk_error(ctx, DUK_ERR_RANGE_ERROR, "input string too long"); | |
27 | return; | |
28 | } | |
29 | ||
30 | tmp = (unsigned char *) duk_push_fixed_buffer(ctx, tmplen); | |
31 | ||
32 | for (i = 0, p = tmp; i < len; i++) { | |
33 | cp = codepage[((unsigned char *) str)[i]] & 0xffffUL; | |
34 | if (cp < 0x80UL) { | |
35 | *p++ = (unsigned char) cp; | |
36 | } else if (cp < 0x800UL) { | |
37 | *p++ = (unsigned char) (0xc0 + ((cp >> 6) & 0x1f)); | |
38 | *p++ = (unsigned char) (0x80 + (cp & 0x3f)); | |
39 | } else { | |
40 | /* In CESU-8 all codepoints in [0x0000,0xFFFF] are | |
41 | * allowed, including surrogates. | |
42 | */ | |
43 | *p++ = (unsigned char) (0xe0 + ((cp >> 12) & 0x0f)); | |
44 | *p++ = (unsigned char) (0x80 + ((cp >> 6) & 0x3f)); | |
45 | *p++ = (unsigned char) (0x80 + (cp & 0x3f)); | |
46 | } | |
47 | } | |
48 | ||
49 | duk_push_lstring(ctx, (const char *) tmp, (duk_size_t) (p - tmp)); | |
50 | ||
51 | /* [ ... tmp res ] */ | |
52 | ||
53 | duk_remove(ctx, -2); | |
54 | } |