ceph/src/civetweb/src/third_party/duktape-1.8.0/examples/codepage-conv/duk_codepage_conv.c

   1 /*
   2  *  Convert an 8-bit input string (e.g. ISO-8859-1) into CESU-8.
   3  *  Calling code supplies the "code page" as a 256-entry array of
   4  *  codepoints for the conversion.
   5  *
   6  *  This is useful when input data is in non-UTF-8 format and must
   7  *  be converted at runtime, e.g. when compiling non-UTF-8 source
   8  *  code.  Another alternative is to use e.g. iconv.
   9  */
  10
  11 #include "duktape.h"
  12
  13 /* Decode an 8-bit string using 'codepage' into Unicode codepoints and
  14  * re-encode into CESU-8.  Codepage argument must point to a 256-entry
  15  * table.  Only supports BMP (codepoints U+0000 to U+FFFF).
  16  */
  17 void duk_decode_string_codepage(duk_context *ctx, const char *str, size_t len, unsigned int *codepage) {
  18         unsigned char *tmp;
  19         size_t tmplen, i;
  20         unsigned char *p;
  21         unsigned int cp;
  22
  23         tmplen = 3 * len;  /* max expansion is 1 input byte -> 3 output bytes */
  24         if (tmplen / 3 != len) {
  25                 /* Temporary buffer length wraps. */
  26                 duk_error(ctx, DUK_ERR_RANGE_ERROR, "input string too long");
  27                 return;
  28         }
  29
  30         tmp = (unsigned char *) duk_push_fixed_buffer(ctx, tmplen);
  31
  32         for (i = 0, p = tmp; i < len; i++) {
  33                 cp = codepage[((unsigned char *) str)[i]] & 0xffffUL;
  34                 if (cp < 0x80UL) {
  35                         *p++ = (unsigned char) cp;
  36                 } else if (cp < 0x800UL) {
  37                         *p++ = (unsigned char) (0xc0 + ((cp >> 6) & 0x1f));
  38                         *p++ = (unsigned char) (0x80 + (cp & 0x3f));
  39                 } else {
  40                         /* In CESU-8 all codepoints in [0x0000,0xFFFF] are
  41                          * allowed, including surrogates.
  42                          */
  43                         *p++ = (unsigned char) (0xe0 + ((cp >> 12) & 0x0f));
  44                         *p++ = (unsigned char) (0x80 + ((cp >> 6) & 0x3f));
  45                         *p++ = (unsigned char) (0x80 + (cp & 0x3f));
  46                 }
  47         }
  48
  49         duk_push_lstring(ctx, (const char *) tmp, (duk_size_t) (p - tmp));
  50
  51         /* [ ... tmp res ] */
  52
  53         duk_remove(ctx, -2);
  54 }