]>
git.proxmox.com Git - ceph.git/blob - ceph/src/civetweb/src/third_party/duktape-1.8.0/examples/codepage-conv/duk_codepage_conv.c
2 * Convert an 8-bit input string (e.g. ISO-8859-1) into CESU-8.
3 * Calling code supplies the "code page" as a 256-entry array of
4 * codepoints for the conversion.
6 * This is useful when input data is in non-UTF-8 format and must
7 * be converted at runtime, e.g. when compiling non-UTF-8 source
8 * code. Another alternative is to use e.g. iconv.
13 /* Decode an 8-bit string using 'codepage' into Unicode codepoints and
14 * re-encode into CESU-8. Codepage argument must point to a 256-entry
15 * table. Only supports BMP (codepoints U+0000 to U+FFFF).
17 void duk_decode_string_codepage(duk_context
*ctx
, const char *str
, size_t len
, unsigned int *codepage
) {
23 tmplen
= 3 * len
; /* max expansion is 1 input byte -> 3 output bytes */
24 if (tmplen
/ 3 != len
) {
25 /* Temporary buffer length wraps. */
26 duk_error(ctx
, DUK_ERR_RANGE_ERROR
, "input string too long");
30 tmp
= (unsigned char *) duk_push_fixed_buffer(ctx
, tmplen
);
32 for (i
= 0, p
= tmp
; i
< len
; i
++) {
33 cp
= codepage
[((unsigned char *) str
)[i
]] & 0xffffUL
;
35 *p
++ = (unsigned char) cp
;
36 } else if (cp
< 0x800UL
) {
37 *p
++ = (unsigned char) (0xc0 + ((cp
>> 6) & 0x1f));
38 *p
++ = (unsigned char) (0x80 + (cp
& 0x3f));
40 /* In CESU-8 all codepoints in [0x0000,0xFFFF] are
41 * allowed, including surrogates.
43 *p
++ = (unsigned char) (0xe0 + ((cp
>> 12) & 0x0f));
44 *p
++ = (unsigned char) (0x80 + ((cp
>> 6) & 0x3f));
45 *p
++ = (unsigned char) (0x80 + (cp
& 0x3f));
49 duk_push_lstring(ctx
, (const char *) tmp
, (duk_size_t
) (p
- tmp
));