]> git.proxmox.com Git - mirror_edk2.git/blobdiff - AppPkg/Applications/Python/Python-2.7.10/Modules/cjkcodecs/_codecs_iso2022.c
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 2/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Modules / cjkcodecs / _codecs_iso2022.c
diff --git a/AppPkg/Applications/Python/Python-2.7.10/Modules/cjkcodecs/_codecs_iso2022.c b/AppPkg/Applications/Python/Python-2.7.10/Modules/cjkcodecs/_codecs_iso2022.c
new file mode 100644 (file)
index 0000000..14fc643
--- /dev/null
@@ -0,0 +1,1131 @@
+/*\r
+ * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.\r
+ *\r
+ * Written by Hye-Shik Chang <perky@FreeBSD.org>\r
+ */\r
+\r
+#define USING_IMPORTED_MAPS\r
+#define USING_BINARY_PAIR_SEARCH\r
+#define EXTERN_JISX0213_PAIR\r
+#define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE\r
+#define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE\r
+\r
+#include "cjkcodecs.h"\r
+#include "alg_jisx0201.h"\r
+#include "emu_jisx0213_2000.h"\r
+#include "mappings_jisx0213_pair.h"\r
+\r
+/* STATE\r
+\r
+   state->c[0-3]\r
+\r
+    00000000\r
+    ||^^^^^|\r
+    |+-----+----  G0-3 Character Set\r
+    +-----------  Is G0-3 double byte?\r
+\r
+   state->c[4]\r
+\r
+    00000000\r
+          ||\r
+          |+----  Locked-Shift?\r
+          +-----  ESC Throughout\r
+*/\r
+\r
+#define ESC                     0x1B\r
+#define SO                      0x0E\r
+#define SI                      0x0F\r
+#define LF                      0x0A\r
+\r
+#define MAX_ESCSEQLEN           16\r
+\r
+#define CHARSET_ISO8859_1       'A'\r
+#define CHARSET_ASCII           'B'\r
+#define CHARSET_ISO8859_7       'F'\r
+#define CHARSET_JISX0201_K      'I'\r
+#define CHARSET_JISX0201_R      'J'\r
+\r
+#define CHARSET_GB2312          ('A'|CHARSET_DBCS)\r
+#define CHARSET_JISX0208        ('B'|CHARSET_DBCS)\r
+#define CHARSET_KSX1001         ('C'|CHARSET_DBCS)\r
+#define CHARSET_JISX0212        ('D'|CHARSET_DBCS)\r
+#define CHARSET_GB2312_8565     ('E'|CHARSET_DBCS)\r
+#define CHARSET_CNS11643_1      ('G'|CHARSET_DBCS)\r
+#define CHARSET_CNS11643_2      ('H'|CHARSET_DBCS)\r
+#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)\r
+#define CHARSET_JISX0213_2      ('P'|CHARSET_DBCS)\r
+#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)\r
+#define CHARSET_JISX0208_O      ('@'|CHARSET_DBCS)\r
+\r
+#define CHARSET_DBCS            0x80\r
+#define ESCMARK(mark)           ((mark) & 0x7f)\r
+\r
+#define IS_ESCEND(c)    (((c) >= 'A' && (c) <= 'Z') || (c) == '@')\r
+#define IS_ISO2022ESC(c2) \\r
+        ((c2) == '(' || (c2) == ')' || (c2) == '$' || \\r
+         (c2) == '.' || (c2) == '&')\r
+    /* this is not a complete list of ISO-2022 escape sequence headers.\r
+     * but, it's enough to implement CJK instances of iso-2022. */\r
+\r
+#define MAP_UNMAPPABLE          0xFFFF\r
+#define MAP_MULTIPLE_AVAIL      0xFFFE /* for JIS X 0213 */\r
+\r
+#define F_SHIFTED               0x01\r
+#define F_ESCTHROUGHOUT         0x02\r
+\r
+#define STATE_SETG(dn, v)       ((state)->c[dn]) = (v);\r
+#define STATE_GETG(dn)          ((state)->c[dn])\r
+\r
+#define STATE_G0                STATE_GETG(0)\r
+#define STATE_G1                STATE_GETG(1)\r
+#define STATE_G2                STATE_GETG(2)\r
+#define STATE_G3                STATE_GETG(3)\r
+#define STATE_SETG0(v)          STATE_SETG(0, v)\r
+#define STATE_SETG1(v)          STATE_SETG(1, v)\r
+#define STATE_SETG2(v)          STATE_SETG(2, v)\r
+#define STATE_SETG3(v)          STATE_SETG(3, v)\r
+\r
+#define STATE_SETFLAG(f)        ((state)->c[4]) |= (f);\r
+#define STATE_GETFLAG(f)        ((state)->c[4] & (f))\r
+#define STATE_CLEARFLAG(f)      ((state)->c[4]) &= ~(f);\r
+#define STATE_CLEARFLAGS()      ((state)->c[4]) = 0;\r
+\r
+#define ISO2022_CONFIG          ((const struct iso2022_config *)config)\r
+#define CONFIG_ISSET(flag)      (ISO2022_CONFIG->flags & (flag))\r
+#define CONFIG_DESIGNATIONS     (ISO2022_CONFIG->designations)\r
+\r
+/* iso2022_config.flags */\r
+#define NO_SHIFT                0x01\r
+#define USE_G2                  0x02\r
+#define USE_JISX0208_EXT        0x04\r
+\r
+/*-*- internal data structures -*-*/\r
+\r
+typedef int (*iso2022_init_func)(void);\r
+typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);\r
+typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);\r
+\r
+struct iso2022_designation {\r
+    unsigned char mark;\r
+    unsigned char plane;\r
+    unsigned char width;\r
+    iso2022_init_func initializer;\r
+    iso2022_decode_func decoder;\r
+    iso2022_encode_func encoder;\r
+};\r
+\r
+struct iso2022_config {\r
+    int flags;\r
+    const struct iso2022_designation *designations; /* non-ascii desigs */\r
+};\r
+\r
+/*-*- iso-2022 codec implementation -*-*/\r
+\r
+CODEC_INIT(iso2022)\r
+{\r
+    const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;\r
+    for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)\r
+        if (desig->initializer != NULL && desig->initializer() != 0)\r
+            return -1;\r
+    return 0;\r
+}\r
+\r
+ENCODER_INIT(iso2022)\r
+{\r
+    STATE_CLEARFLAGS()\r
+    STATE_SETG0(CHARSET_ASCII)\r
+    STATE_SETG1(CHARSET_ASCII)\r
+    return 0;\r
+}\r
+\r
+ENCODER_RESET(iso2022)\r
+{\r
+    if (STATE_GETFLAG(F_SHIFTED)) {\r
+        WRITE1(SI)\r
+        NEXT_OUT(1)\r
+        STATE_CLEARFLAG(F_SHIFTED)\r
+    }\r
+    if (STATE_G0 != CHARSET_ASCII) {\r
+        WRITE3(ESC, '(', 'B')\r
+        NEXT_OUT(3)\r
+        STATE_SETG0(CHARSET_ASCII)\r
+    }\r
+    return 0;\r
+}\r
+\r
+ENCODER(iso2022)\r
+{\r
+    while (inleft > 0) {\r
+        const struct iso2022_designation *dsg;\r
+        DBCHAR encoded;\r
+        ucs4_t c = **inbuf;\r
+        Py_ssize_t insize;\r
+\r
+        if (c < 0x80) {\r
+            if (STATE_G0 != CHARSET_ASCII) {\r
+                WRITE3(ESC, '(', 'B')\r
+                STATE_SETG0(CHARSET_ASCII)\r
+                NEXT_OUT(3)\r
+            }\r
+            if (STATE_GETFLAG(F_SHIFTED)) {\r
+                WRITE1(SI)\r
+                STATE_CLEARFLAG(F_SHIFTED)\r
+                NEXT_OUT(1)\r
+            }\r
+            WRITE1((unsigned char)c)\r
+            NEXT(1, 1)\r
+            continue;\r
+        }\r
+\r
+        DECODE_SURROGATE(c)\r
+        insize = GET_INSIZE(c);\r
+\r
+        encoded = MAP_UNMAPPABLE;\r
+        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {\r
+            Py_ssize_t length = 1;\r
+            encoded = dsg->encoder(&c, &length);\r
+            if (encoded == MAP_MULTIPLE_AVAIL) {\r
+                /* this implementation won't work for pair\r
+                 * of non-bmp characters. */\r
+                if (inleft < 2) {\r
+                    if (!(flags & MBENC_FLUSH))\r
+                        return MBERR_TOOFEW;\r
+                    length = -1;\r
+                }\r
+                else\r
+                    length = 2;\r
+#if Py_UNICODE_SIZE == 2\r
+                if (length == 2) {\r
+                    ucs4_t u4in[2];\r
+                    u4in[0] = (ucs4_t)IN1;\r
+                    u4in[1] = (ucs4_t)IN2;\r
+                    encoded = dsg->encoder(u4in, &length);\r
+                } else\r
+                    encoded = dsg->encoder(&c, &length);\r
+#else\r
+                encoded = dsg->encoder(&c, &length);\r
+#endif\r
+                if (encoded != MAP_UNMAPPABLE) {\r
+                    insize = length;\r
+                    break;\r
+                }\r
+            }\r
+            else if (encoded != MAP_UNMAPPABLE)\r
+                break;\r
+        }\r
+\r
+        if (!dsg->mark)\r
+            return 1;\r
+        assert(dsg->width == 1 || dsg->width == 2);\r
+\r
+        switch (dsg->plane) {\r
+        case 0: /* G0 */\r
+            if (STATE_GETFLAG(F_SHIFTED)) {\r
+                WRITE1(SI)\r
+                STATE_CLEARFLAG(F_SHIFTED)\r
+                NEXT_OUT(1)\r
+            }\r
+            if (STATE_G0 != dsg->mark) {\r
+                if (dsg->width == 1) {\r
+                    WRITE3(ESC, '(', ESCMARK(dsg->mark))\r
+                    STATE_SETG0(dsg->mark)\r
+                    NEXT_OUT(3)\r
+                }\r
+                else if (dsg->mark == CHARSET_JISX0208) {\r
+                    WRITE3(ESC, '$', ESCMARK(dsg->mark))\r
+                    STATE_SETG0(dsg->mark)\r
+                    NEXT_OUT(3)\r
+                }\r
+                else {\r
+                    WRITE4(ESC, '$', '(',\r
+                        ESCMARK(dsg->mark))\r
+                    STATE_SETG0(dsg->mark)\r
+                    NEXT_OUT(4)\r
+                }\r
+            }\r
+            break;\r
+        case 1: /* G1 */\r
+            if (STATE_G1 != dsg->mark) {\r
+                if (dsg->width == 1) {\r
+                    WRITE3(ESC, ')', ESCMARK(dsg->mark))\r
+                    STATE_SETG1(dsg->mark)\r
+                    NEXT_OUT(3)\r
+                }\r
+                else {\r
+                    WRITE4(ESC, '$', ')',\r
+                        ESCMARK(dsg->mark))\r
+                    STATE_SETG1(dsg->mark)\r
+                    NEXT_OUT(4)\r
+                }\r
+            }\r
+            if (!STATE_GETFLAG(F_SHIFTED)) {\r
+                WRITE1(SO)\r
+                STATE_SETFLAG(F_SHIFTED)\r
+                NEXT_OUT(1)\r
+            }\r
+            break;\r
+        default: /* G2 and G3 is not supported: no encoding in\r
+                  * CJKCodecs are using them yet */\r
+            return MBERR_INTERNAL;\r
+        }\r
+\r
+        if (dsg->width == 1) {\r
+            WRITE1((unsigned char)encoded)\r
+            NEXT_OUT(1)\r
+        }\r
+        else {\r
+            WRITE2(encoded >> 8, encoded & 0xff)\r
+            NEXT_OUT(2)\r
+        }\r
+        NEXT_IN(insize)\r
+    }\r
+\r
+    return 0;\r
+}\r
+\r
+DECODER_INIT(iso2022)\r
+{\r
+    STATE_CLEARFLAGS()\r
+    STATE_SETG0(CHARSET_ASCII)\r
+    STATE_SETG1(CHARSET_ASCII)\r
+    STATE_SETG2(CHARSET_ASCII)\r
+    return 0;\r
+}\r
+\r
+DECODER_RESET(iso2022)\r
+{\r
+    STATE_SETG0(CHARSET_ASCII)\r
+    STATE_CLEARFLAG(F_SHIFTED)\r
+    return 0;\r
+}\r
+\r
+static Py_ssize_t\r
+iso2022processesc(const void *config, MultibyteCodec_State *state,\r
+                  const unsigned char **inbuf, Py_ssize_t *inleft)\r
+{\r
+    unsigned char charset, designation;\r
+    Py_ssize_t i, esclen;\r
+\r
+    for (i = 1;i < MAX_ESCSEQLEN;i++) {\r
+        if (i >= *inleft)\r
+            return MBERR_TOOFEW;\r
+        if (IS_ESCEND((*inbuf)[i])) {\r
+            esclen = i + 1;\r
+            break;\r
+        }\r
+        else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&\r
+                 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')\r
+            i += 2;\r
+    }\r
+\r
+    if (i >= MAX_ESCSEQLEN)\r
+        return 1; /* unterminated escape sequence */\r
+\r
+    switch (esclen) {\r
+    case 3:\r
+        if (IN2 == '$') {\r
+            charset = IN3 | CHARSET_DBCS;\r
+            designation = 0;\r
+        }\r
+        else {\r
+            charset = IN3;\r
+            if (IN2 == '(') designation = 0;\r
+            else if (IN2 == ')') designation = 1;\r
+            else if (CONFIG_ISSET(USE_G2) && IN2 == '.')\r
+                designation = 2;\r
+            else return 3;\r
+        }\r
+        break;\r
+    case 4:\r
+        if (IN2 != '$')\r
+            return 4;\r
+\r
+        charset = IN4 | CHARSET_DBCS;\r
+        if (IN3 == '(') designation = 0;\r
+        else if (IN3 == ')') designation = 1;\r
+        else return 4;\r
+        break;\r
+    case 6: /* designation with prefix */\r
+        if (CONFIG_ISSET(USE_JISX0208_EXT) &&\r
+            (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&\r
+            (*inbuf)[5] == 'B') {\r
+            charset = 'B' | CHARSET_DBCS;\r
+            designation = 0;\r
+        }\r
+        else\r
+            return 6;\r
+        break;\r
+    default:\r
+        return esclen;\r
+    }\r
+\r
+    /* raise error when the charset is not designated for this encoding */\r
+    if (charset != CHARSET_ASCII) {\r
+        const struct iso2022_designation *dsg;\r
+\r
+        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)\r
+            if (dsg->mark == charset)\r
+                break;\r
+        if (!dsg->mark)\r
+            return esclen;\r
+    }\r
+\r
+    STATE_SETG(designation, charset)\r
+    *inleft -= esclen;\r
+    (*inbuf) += esclen;\r
+    return 0;\r
+}\r
+\r
+#define ISO8859_7_DECODE(c, assi)                                       \\r
+    if ((c) < 0xa0) (assi) = (c);                                       \\r
+    else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0))))          \\r
+        (assi) = (c);                                                   \\r
+    else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||              \\r
+             (0xbffffd77L & (1L << ((c)-0xb4)))))                       \\r
+        (assi) = 0x02d0 + (c);                                          \\r
+    else if ((c) == 0xa1) (assi) = 0x2018;                              \\r
+    else if ((c) == 0xa2) (assi) = 0x2019;                              \\r
+    else if ((c) == 0xaf) (assi) = 0x2015;\r
+\r
+static Py_ssize_t\r
+iso2022processg2(const void *config, MultibyteCodec_State *state,\r
+                 const unsigned char **inbuf, Py_ssize_t *inleft,\r
+                 Py_UNICODE **outbuf, Py_ssize_t *outleft)\r
+{\r
+    /* not written to use encoder, decoder functions because only few\r
+     * encodings use G2 designations in CJKCodecs */\r
+    if (STATE_G2 == CHARSET_ISO8859_1) {\r
+        if (IN3 < 0x80)\r
+            OUT1(IN3 + 0x80)\r
+        else\r
+            return 3;\r
+    }\r
+    else if (STATE_G2 == CHARSET_ISO8859_7) {\r
+        ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)\r
+        else return 3;\r
+    }\r
+    else if (STATE_G2 == CHARSET_ASCII) {\r
+        if (IN3 & 0x80) return 3;\r
+        else **outbuf = IN3;\r
+    }\r
+    else\r
+        return MBERR_INTERNAL;\r
+\r
+    (*inbuf) += 3;\r
+    *inleft -= 3;\r
+    (*outbuf) += 1;\r
+    *outleft -= 1;\r
+    return 0;\r
+}\r
+\r
+DECODER(iso2022)\r
+{\r
+    const struct iso2022_designation *dsgcache = NULL;\r
+\r
+    while (inleft > 0) {\r
+        unsigned char c = IN1;\r
+        Py_ssize_t err;\r
+\r
+        if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {\r
+            /* ESC throughout mode:\r
+             * for non-iso2022 escape sequences */\r
+            WRITE1(c) /* assume as ISO-8859-1 */\r
+            NEXT(1, 1)\r
+            if (IS_ESCEND(c)) {\r
+                STATE_CLEARFLAG(F_ESCTHROUGHOUT)\r
+            }\r
+            continue;\r
+        }\r
+\r
+        switch (c) {\r
+        case ESC:\r
+            REQUIRE_INBUF(2)\r
+            if (IS_ISO2022ESC(IN2)) {\r
+                err = iso2022processesc(config, state,\r
+                                        inbuf, &inleft);\r
+                if (err != 0)\r
+                    return err;\r
+            }\r
+            else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */\r
+                REQUIRE_INBUF(3)\r
+                err = iso2022processg2(config, state,\r
+                    inbuf, &inleft, outbuf, &outleft);\r
+                if (err != 0)\r
+                    return err;\r
+            }\r
+            else {\r
+                WRITE1(ESC)\r
+                STATE_SETFLAG(F_ESCTHROUGHOUT)\r
+                NEXT(1, 1)\r
+            }\r
+            break;\r
+        case SI:\r
+            if (CONFIG_ISSET(NO_SHIFT))\r
+                goto bypass;\r
+            STATE_CLEARFLAG(F_SHIFTED)\r
+            NEXT_IN(1)\r
+            break;\r
+        case SO:\r
+            if (CONFIG_ISSET(NO_SHIFT))\r
+                goto bypass;\r
+            STATE_SETFLAG(F_SHIFTED)\r
+            NEXT_IN(1)\r
+            break;\r
+        case LF:\r
+            STATE_CLEARFLAG(F_SHIFTED)\r
+            WRITE1(LF)\r
+            NEXT(1, 1)\r
+            break;\r
+        default:\r
+            if (c < 0x20) /* C0 */\r
+                goto bypass;\r
+            else if (c >= 0x80)\r
+                return 1;\r
+            else {\r
+                const struct iso2022_designation *dsg;\r
+                unsigned char charset;\r
+                ucs4_t decoded;\r
+\r
+                if (STATE_GETFLAG(F_SHIFTED))\r
+                    charset = STATE_G1;\r
+                else\r
+                    charset = STATE_G0;\r
+\r
+                if (charset == CHARSET_ASCII) {\r
+bypass:                                 WRITE1(c)\r
+                                        NEXT(1, 1)\r
+                                        break;\r
+                                }\r
+\r
+                                if (dsgcache != NULL &&\r
+                                    dsgcache->mark == charset)\r
+                                        dsg = dsgcache;\r
+                                else {\r
+                                        for (dsg = CONFIG_DESIGNATIONS;\r
+                                             dsg->mark != charset\r
+#ifdef Py_DEBUG\r
+                                                && dsg->mark != '\0'\r
+#endif\r
+                                             ;dsg++)\r
+                                                /* noop */;\r
+                                        assert(dsg->mark != '\0');\r
+                                        dsgcache = dsg;\r
+                                }\r
+\r
+                                REQUIRE_INBUF(dsg->width)\r
+                                decoded = dsg->decoder(*inbuf);\r
+                                if (decoded == MAP_UNMAPPABLE)\r
+                                        return dsg->width;\r
+\r
+                                if (decoded < 0x10000) {\r
+                                        WRITE1(decoded)\r
+                                        NEXT_OUT(1)\r
+                                }\r
+                                else if (decoded < 0x30000) {\r
+                                        WRITEUCS4(decoded)\r
+                                }\r
+                                else { /* JIS X 0213 pairs */\r
+                    WRITE2(decoded >> 16, decoded & 0xffff)\r
+                    NEXT_OUT(2)\r
+                }\r
+                NEXT_IN(dsg->width)\r
+            }\r
+            break;\r
+        }\r
+    }\r
+    return 0;\r
+}\r
+\r
+/*-*- mapping table holders -*-*/\r
+\r
+#define ENCMAP(enc) static const encode_map *enc##_encmap = NULL;\r
+#define DECMAP(enc) static const decode_map *enc##_decmap = NULL;\r
+\r
+/* kr */\r
+ENCMAP(cp949)\r
+DECMAP(ksx1001)\r
+\r
+/* jp */\r
+ENCMAP(jisxcommon)\r
+DECMAP(jisx0208)\r
+DECMAP(jisx0212)\r
+ENCMAP(jisx0213_bmp)\r
+DECMAP(jisx0213_1_bmp)\r
+DECMAP(jisx0213_2_bmp)\r
+ENCMAP(jisx0213_emp)\r
+DECMAP(jisx0213_1_emp)\r
+DECMAP(jisx0213_2_emp)\r
+\r
+/* cn */\r
+ENCMAP(gbcommon)\r
+DECMAP(gb2312)\r
+\r
+/* tw */\r
+\r
+/*-*- mapping access functions -*-*/\r
+\r
+static int\r
+ksx1001_init(void)\r
+{\r
+    static int initialized = 0;\r
+\r
+    if (!initialized && (\r
+                    IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||\r
+                    IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))\r
+        return -1;\r
+    initialized = 1;\r
+    return 0;\r
+}\r
+\r
+static ucs4_t\r
+ksx1001_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    TRYMAP_DEC(ksx1001, u, data[0], data[1])\r
+        return u;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+}\r
+\r
+static DBCHAR\r
+ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    assert(*length == 1);\r
+    if (*data < 0x10000) {\r
+        TRYMAP_ENC(cp949, coded, *data)\r
+            if (!(coded & 0x8000))\r
+                return coded;\r
+    }\r
+    return MAP_UNMAPPABLE;\r
+}\r
+\r
+static int\r
+jisx0208_init(void)\r
+{\r
+    static int initialized = 0;\r
+\r
+    if (!initialized && (\r
+                    IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||\r
+                    IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))\r
+        return -1;\r
+    initialized = 1;\r
+    return 0;\r
+}\r
+\r
+static ucs4_t\r
+jisx0208_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */\r
+        return 0xff3c;\r
+    else TRYMAP_DEC(jisx0208, u, data[0], data[1])\r
+        return u;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+}\r
+\r
+static DBCHAR\r
+jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    assert(*length == 1);\r
+    if (*data < 0x10000) {\r
+        if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */\r
+            return 0x2140;\r
+        else TRYMAP_ENC(jisxcommon, coded, *data) {\r
+            if (!(coded & 0x8000))\r
+                return coded;\r
+        }\r
+    }\r
+    return MAP_UNMAPPABLE;\r
+}\r
+\r
+static int\r
+jisx0212_init(void)\r
+{\r
+    static int initialized = 0;\r
+\r
+    if (!initialized && (\r
+                    IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||\r
+                    IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))\r
+        return -1;\r
+    initialized = 1;\r
+    return 0;\r
+}\r
+\r
+static ucs4_t\r
+jisx0212_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    TRYMAP_DEC(jisx0212, u, data[0], data[1])\r
+        return u;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+}\r
+\r
+static DBCHAR\r
+jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    assert(*length == 1);\r
+    if (*data < 0x10000) {\r
+        TRYMAP_ENC(jisxcommon, coded, *data) {\r
+            if (coded & 0x8000)\r
+                return coded & 0x7fff;\r
+        }\r
+    }\r
+    return MAP_UNMAPPABLE;\r
+}\r
+\r
+static int\r
+jisx0213_init(void)\r
+{\r
+    static int initialized = 0;\r
+\r
+    if (!initialized && (\r
+                    jisx0208_init() ||\r
+                    IMPORT_MAP(jp, jisx0213_bmp,\r
+                               &jisx0213_bmp_encmap, NULL) ||\r
+                    IMPORT_MAP(jp, jisx0213_1_bmp,\r
+                               NULL, &jisx0213_1_bmp_decmap) ||\r
+                    IMPORT_MAP(jp, jisx0213_2_bmp,\r
+                               NULL, &jisx0213_2_bmp_decmap) ||\r
+                    IMPORT_MAP(jp, jisx0213_emp,\r
+                               &jisx0213_emp_encmap, NULL) ||\r
+                    IMPORT_MAP(jp, jisx0213_1_emp,\r
+                               NULL, &jisx0213_1_emp_decmap) ||\r
+                    IMPORT_MAP(jp, jisx0213_2_emp,\r
+                               NULL, &jisx0213_2_emp_decmap) ||\r
+                    IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,\r
+                               &jisx0213_pair_decmap)))\r
+        return -1;\r
+    initialized = 1;\r
+    return 0;\r
+}\r
+\r
+#define config ((void *)2000)\r
+static ucs4_t\r
+jisx0213_2000_1_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])\r
+    else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */\r
+        return 0xff3c;\r
+    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);\r
+    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);\r
+    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])\r
+        u |= 0x20000;\r
+    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+    return u;\r
+}\r
+\r
+static ucs4_t\r
+jisx0213_2000_2_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])\r
+    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);\r
+    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])\r
+        u |= 0x20000;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+    return u;\r
+}\r
+#undef config\r
+\r
+static ucs4_t\r
+jisx0213_2004_1_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */\r
+        return 0xff3c;\r
+    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);\r
+    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);\r
+    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])\r
+        u |= 0x20000;\r
+    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+    return u;\r
+}\r
+\r
+static ucs4_t\r
+jisx0213_2004_2_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);\r
+    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])\r
+        u |= 0x20000;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+    return u;\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)\r
+{\r
+    DBCHAR coded;\r
+\r
+    switch (*length) {\r
+    case 1: /* first character */\r
+        if (*data >= 0x10000) {\r
+            if ((*data) >> 16 == 0x20000 >> 16) {\r
+                EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)\r
+                else TRYMAP_ENC(jisx0213_emp, coded,\r
+                                (*data) & 0xffff)\r
+                    return coded;\r
+            }\r
+            return MAP_UNMAPPABLE;\r
+        }\r
+\r
+        EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)\r
+        else TRYMAP_ENC(jisx0213_bmp, coded, *data) {\r
+            if (coded == MULTIC)\r
+                return MAP_MULTIPLE_AVAIL;\r
+        }\r
+        else TRYMAP_ENC(jisxcommon, coded, *data) {\r
+            if (coded & 0x8000)\r
+                return MAP_UNMAPPABLE;\r
+        }\r
+        else\r
+            return MAP_UNMAPPABLE;\r
+        return coded;\r
+    case 2: /* second character of unicode pair */\r
+        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],\r
+                        jisx0213_pair_encmap, JISX0213_ENCPAIRS);\r
+        if (coded == DBCINV) {\r
+            *length = 1;\r
+            coded = find_pairencmap((ucs2_t)data[0], 0,\r
+                      jisx0213_pair_encmap, JISX0213_ENCPAIRS);\r
+            if (coded == DBCINV)\r
+                return MAP_UNMAPPABLE;\r
+        }\r
+        else\r
+            return coded;\r
+    case -1: /* flush unterminated */\r
+        *length = 1;\r
+        coded = find_pairencmap((ucs2_t)data[0], 0,\r
+                        jisx0213_pair_encmap, JISX0213_ENCPAIRS);\r
+        if (coded == DBCINV)\r
+            return MAP_UNMAPPABLE;\r
+        else\r
+            return coded;\r
+    default:\r
+        return MAP_UNMAPPABLE;\r
+    }\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);\r
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
+        return coded;\r
+    else if (coded & 0x8000)\r
+        return MAP_UNMAPPABLE;\r
+    else\r
+        return coded;\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    Py_ssize_t ilength = *length;\r
+\r
+    coded = jisx0213_encoder(data, length, (void *)2000);\r
+    switch (ilength) {\r
+    case 1:\r
+        if (coded == MAP_MULTIPLE_AVAIL)\r
+            return MAP_MULTIPLE_AVAIL;\r
+        else\r
+            return MAP_UNMAPPABLE;\r
+    case 2:\r
+        if (*length != 2)\r
+            return MAP_UNMAPPABLE;\r
+        else\r
+            return coded;\r
+    default:\r
+        return MAP_UNMAPPABLE;\r
+    }\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);\r
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
+        return coded;\r
+    else if (coded & 0x8000)\r
+        return coded & 0x7fff;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded = jisx0213_encoder(data, length, NULL);\r
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
+        return coded;\r
+    else if (coded & 0x8000)\r
+        return MAP_UNMAPPABLE;\r
+    else\r
+        return coded;\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    Py_ssize_t ilength = *length;\r
+\r
+    coded = jisx0213_encoder(data, length, NULL);\r
+    switch (ilength) {\r
+    case 1:\r
+        if (coded == MAP_MULTIPLE_AVAIL)\r
+            return MAP_MULTIPLE_AVAIL;\r
+        else\r
+            return MAP_UNMAPPABLE;\r
+    case 2:\r
+        if (*length != 2)\r
+            return MAP_UNMAPPABLE;\r
+        else\r
+            return coded;\r
+    default:\r
+        return MAP_UNMAPPABLE;\r
+    }\r
+}\r
+\r
+static DBCHAR\r
+jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded = jisx0213_encoder(data, length, NULL);\r
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)\r
+        return coded;\r
+    else if (coded & 0x8000)\r
+        return coded & 0x7fff;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+}\r
+\r
+static ucs4_t\r
+jisx0201_r_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    JISX0201_R_DECODE(*data, u)\r
+    else return MAP_UNMAPPABLE;\r
+    return u;\r
+}\r
+\r
+static DBCHAR\r
+jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    JISX0201_R_ENCODE(*data, coded)\r
+    else return MAP_UNMAPPABLE;\r
+    return coded;\r
+}\r
+\r
+static ucs4_t\r
+jisx0201_k_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    JISX0201_K_DECODE(*data ^ 0x80, u)\r
+    else return MAP_UNMAPPABLE;\r
+    return u;\r
+}\r
+\r
+static DBCHAR\r
+jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    JISX0201_K_ENCODE(*data, coded)\r
+    else return MAP_UNMAPPABLE;\r
+    return coded - 0x80;\r
+}\r
+\r
+static int\r
+gb2312_init(void)\r
+{\r
+    static int initialized = 0;\r
+\r
+    if (!initialized && (\r
+                    IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||\r
+                    IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))\r
+        return -1;\r
+    initialized = 1;\r
+    return 0;\r
+}\r
+\r
+static ucs4_t\r
+gb2312_decoder(const unsigned char *data)\r
+{\r
+    ucs4_t u;\r
+    TRYMAP_DEC(gb2312, u, data[0], data[1])\r
+        return u;\r
+    else\r
+        return MAP_UNMAPPABLE;\r
+}\r
+\r
+static DBCHAR\r
+gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    DBCHAR coded;\r
+    assert(*length == 1);\r
+    if (*data < 0x10000) {\r
+        TRYMAP_ENC(gbcommon, coded, *data) {\r
+            if (!(coded & 0x8000))\r
+                return coded;\r
+        }\r
+    }\r
+    return MAP_UNMAPPABLE;\r
+}\r
+\r
+\r
+static ucs4_t\r
+dummy_decoder(const unsigned char *data)\r
+{\r
+    return MAP_UNMAPPABLE;\r
+}\r
+\r
+static DBCHAR\r
+dummy_encoder(const ucs4_t *data, Py_ssize_t *length)\r
+{\r
+    return MAP_UNMAPPABLE;\r
+}\r
+\r
+/*-*- registry tables -*-*/\r
+\r
+#define REGISTRY_KSX1001_G0     { CHARSET_KSX1001, 0, 2,                \\r
+                  ksx1001_init,                                         \\r
+                  ksx1001_decoder, ksx1001_encoder }\r
+#define REGISTRY_KSX1001_G1     { CHARSET_KSX1001, 1, 2,                \\r
+                  ksx1001_init,                                         \\r
+                  ksx1001_decoder, ksx1001_encoder }\r
+#define REGISTRY_JISX0201_R     { CHARSET_JISX0201_R, 0, 1,             \\r
+                  NULL,                                                 \\r
+                  jisx0201_r_decoder, jisx0201_r_encoder }\r
+#define REGISTRY_JISX0201_K     { CHARSET_JISX0201_K, 0, 1,             \\r
+                  NULL,                                                 \\r
+                  jisx0201_k_decoder, jisx0201_k_encoder }\r
+#define REGISTRY_JISX0208       { CHARSET_JISX0208, 0, 2,               \\r
+                  jisx0208_init,                                        \\r
+                  jisx0208_decoder, jisx0208_encoder }\r
+#define REGISTRY_JISX0208_O     { CHARSET_JISX0208_O, 0, 2,             \\r
+                  jisx0208_init,                                        \\r
+                  jisx0208_decoder, jisx0208_encoder }\r
+#define REGISTRY_JISX0212       { CHARSET_JISX0212, 0, 2,               \\r
+                  jisx0212_init,                                        \\r
+                  jisx0212_decoder, jisx0212_encoder }\r
+#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2,       \\r
+                  jisx0213_init,                                        \\r
+                  jisx0213_2000_1_decoder,                              \\r
+                  jisx0213_2000_1_encoder }\r
+#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \\r
+                  jisx0213_init,                                        \\r
+                  jisx0213_2000_1_decoder,                              \\r
+                  jisx0213_2000_1_encoder_paironly }\r
+#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2,            \\r
+                  jisx0213_init,                                        \\r
+                  jisx0213_2000_2_decoder,                              \\r
+                  jisx0213_2000_2_encoder }\r
+#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2,       \\r
+                  jisx0213_init,                                        \\r
+                  jisx0213_2004_1_decoder,                              \\r
+                  jisx0213_2004_1_encoder }\r
+#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \\r
+                  jisx0213_init,                                        \\r
+                  jisx0213_2004_1_decoder,                              \\r
+                  jisx0213_2004_1_encoder_paironly }\r
+#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2,            \\r
+                  jisx0213_init,                                        \\r
+                  jisx0213_2004_2_decoder,                              \\r
+                  jisx0213_2004_2_encoder }\r
+#define REGISTRY_GB2312         { CHARSET_GB2312, 0, 2,                 \\r
+                  gb2312_init,                                          \\r
+                  gb2312_decoder, gb2312_encoder }\r
+#define REGISTRY_CNS11643_1     { CHARSET_CNS11643_1, 1, 2,             \\r
+                  cns11643_init,                                        \\r
+                  cns11643_1_decoder, cns11643_1_encoder }\r
+#define REGISTRY_CNS11643_2     { CHARSET_CNS11643_2, 2, 2,             \\r
+                  cns11643_init,                                        \\r
+                  cns11643_2_decoder, cns11643_2_encoder }\r
+#define REGISTRY_ISO8859_1      { CHARSET_ISO8859_1, 2, 1,              \\r
+                  NULL, dummy_decoder, dummy_encoder }\r
+#define REGISTRY_ISO8859_7      { CHARSET_ISO8859_7, 2, 1,              \\r
+                  NULL, dummy_decoder, dummy_encoder }\r
+#define REGISTRY_SENTINEL       { 0, }\r
+#define CONFIGDEF(var, attrs)                                           \\r
+    static const struct iso2022_config iso2022_##var##_config = {       \\r
+        attrs, iso2022_##var##_designations                             \\r
+    };\r
+\r
+static const struct iso2022_designation iso2022_kr_designations[] = {\r
+    REGISTRY_KSX1001_G1, REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(kr, 0)\r
+\r
+static const struct iso2022_designation iso2022_jp_designations[] = {\r
+    REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,\r
+    REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT)\r
+\r
+static const struct iso2022_designation iso2022_jp_1_designations[] = {\r
+    REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,\r
+    REGISTRY_JISX0208_O, REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)\r
+\r
+static const struct iso2022_designation iso2022_jp_2_designations[] = {\r
+    REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,\r
+    REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,\r
+    REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT)\r
+\r
+static const struct iso2022_designation iso2022_jp_2004_designations[] = {\r
+    REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,\r
+    REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT)\r
+\r
+static const struct iso2022_designation iso2022_jp_3_designations[] = {\r
+    REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,\r
+    REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT)\r
+\r
+static const struct iso2022_designation iso2022_jp_ext_designations[] = {\r
+    REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,\r
+    REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL\r
+};\r
+CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)\r
+\r
+\r
+BEGIN_MAPPINGS_LIST\r
+  /* no mapping table here */\r
+END_MAPPINGS_LIST\r
+\r
+#define ISO2022_CODEC(variation) {              \\r
+    "iso2022_" #variation,                      \\r
+    &iso2022_##variation##_config,              \\r
+    iso2022_codec_init,                         \\r
+    _STATEFUL_METHODS(iso2022)                  \\r
+},\r
+\r
+BEGIN_CODECS_LIST\r
+  ISO2022_CODEC(kr)\r
+  ISO2022_CODEC(jp)\r
+  ISO2022_CODEC(jp_1)\r
+  ISO2022_CODEC(jp_2)\r
+  ISO2022_CODEC(jp_2004)\r
+  ISO2022_CODEC(jp_3)\r
+  ISO2022_CODEC(jp_ext)\r
+END_CODECS_LIST\r
+\r
+I_AM_A_MODULE_FOR(iso2022)\r