AppPkg/Applications/Python/Python-2.7.10/Python/codecs.c

   1 /* ------------------------------------------------------------------------
   2
   3    Python Codec Registry and support functions
   4
   5 Written by Marc-Andre Lemburg (mal@lemburg.com).
   6
   7 Copyright (c) Corporation for National Research Initiatives.
   8
   9    ------------------------------------------------------------------------ */
  10
  11 #include "Python.h"
  12 #include <ctype.h>
  13
  14 /* --- Codec Registry ----------------------------------------------------- */
  15
  16 /* Import the standard encodings package which will register the first
  17    codec search function.
  18
  19    This is done in a lazy way so that the Unicode implementation does
  20    not downgrade startup time of scripts not needing it.
  21
  22    ImportErrors are silently ignored by this function. Only one try is
  23    made.
  24
  25 */
  26
  27 static int _PyCodecRegistry_Init(void); /* Forward */
  28
  29 int PyCodec_Register(PyObject *search_function)
  30 {
  31     PyInterpreterState *interp = PyThreadState_GET()->interp;
  32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
  33         goto onError;
  34     if (search_function == NULL) {
  35         PyErr_BadArgument();
  36         goto onError;
  37     }
  38     if (!PyCallable_Check(search_function)) {
  39         PyErr_SetString(PyExc_TypeError, "argument must be callable");
  40         goto onError;
  41     }
  42     return PyList_Append(interp->codec_search_path, search_function);
  43
  44  onError:
  45     return -1;
  46 }
  47
  48 /* Convert a string to a normalized Python string: all characters are
  49    converted to lower case, spaces are replaced with underscores. */
  50
  51 static
  52 PyObject *normalizestring(const char *string)
  53 {
  54     register size_t i;
  55     size_t len = strlen(string);
  56     char *p;
  57     PyObject *v;
  58
  59     if (len > PY_SSIZE_T_MAX) {
  60         PyErr_SetString(PyExc_OverflowError, "string is too large");
  61         return NULL;
  62     }
  63
  64     v = PyString_FromStringAndSize(NULL, len);
  65     if (v == NULL)
  66         return NULL;
  67     p = PyString_AS_STRING(v);
  68     for (i = 0; i < len; i++) {
  69         register char ch = string[i];
  70         if (ch == ' ')
  71             ch = '-';
  72         else
  73             ch = Py_TOLOWER(Py_CHARMASK(ch));
  74         p[i] = ch;
  75     }
  76     return v;
  77 }
  78
  79 /* Lookup the given encoding and return a tuple providing the codec
  80    facilities.
  81
  82    The encoding string is looked up converted to all lower-case
  83    characters. This makes encodings looked up through this mechanism
  84    effectively case-insensitive.
  85
  86    If no codec is found, a LookupError is set and NULL returned.
  87
  88    As side effect, this tries to load the encodings package, if not
  89    yet done. This is part of the lazy load strategy for the encodings
  90    package.
  91
  92 */
  93
  94 PyObject *_PyCodec_Lookup(const char *encoding)
  95 {
  96     PyInterpreterState *interp;
  97     PyObject *result, *args = NULL, *v;
  98     Py_ssize_t i, len;
  99
 100     if (encoding == NULL) {
 101         PyErr_BadArgument();
 102         goto onError;
 103     }
 104
 105     interp = PyThreadState_GET()->interp;
 106     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 107         goto onError;
 108
 109     /* Convert the encoding to a normalized Python string: all
 110        characters are converted to lower case, spaces and hyphens are
 111        replaced with underscores. */
 112     v = normalizestring(encoding);
 113     if (v == NULL)
 114         goto onError;
 115     PyString_InternInPlace(&v);
 116
 117     /* First, try to lookup the name in the registry dictionary */
 118     result = PyDict_GetItem(interp->codec_search_cache, v);
 119     if (result != NULL) {
 120         Py_INCREF(result);
 121         Py_DECREF(v);
 122         return result;
 123     }
 124
 125     /* Next, scan the search functions in order of registration */
 126     args = PyTuple_New(1);
 127     if (args == NULL)
 128         goto onError;
 129     PyTuple_SET_ITEM(args,0,v);
 130
 131     len = PyList_Size(interp->codec_search_path);
 132     if (len < 0)
 133         goto onError;
 134     if (len == 0) {
 135         PyErr_SetString(PyExc_LookupError,
 136                         "no codec search functions registered: "
 137                         "can't find encoding");
 138         goto onError;
 139     }
 140
 141     for (i = 0; i < len; i++) {
 142         PyObject *func;
 143
 144         func = PyList_GetItem(interp->codec_search_path, i);
 145         if (func == NULL)
 146             goto onError;
 147         result = PyEval_CallObject(func, args);
 148         if (result == NULL)
 149             goto onError;
 150         if (result == Py_None) {
 151             Py_DECREF(result);
 152             continue;
 153         }
 154         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
 155             PyErr_SetString(PyExc_TypeError,
 156                             "codec search functions must return 4-tuples");
 157             Py_DECREF(result);
 158             goto onError;
 159         }
 160         break;
 161     }
 162     if (i == len) {
 163         /* XXX Perhaps we should cache misses too ? */
 164         PyErr_Format(PyExc_LookupError,
 165                      "unknown encoding: %s", encoding);
 166         goto onError;
 167     }
 168
 169     /* Cache and return the result */
 170     PyDict_SetItem(interp->codec_search_cache, v, result);
 171     Py_DECREF(args);
 172     return result;
 173
 174  onError:
 175     Py_XDECREF(args);
 176     return NULL;
 177 }
 178
 179 static
 180 PyObject *args_tuple(PyObject *object,
 181                      const char *errors)
 182 {
 183     PyObject *args;
 184
 185     args = PyTuple_New(1 + (errors != NULL));
 186     if (args == NULL)
 187         return NULL;
 188     Py_INCREF(object);
 189     PyTuple_SET_ITEM(args,0,object);
 190     if (errors) {
 191         PyObject *v;
 192
 193         v = PyString_FromString(errors);
 194         if (v == NULL) {
 195             Py_DECREF(args);
 196             return NULL;
 197         }
 198         PyTuple_SET_ITEM(args, 1, v);
 199     }
 200     return args;
 201 }
 202
 203 /* Helper function to get a codec item */
 204
 205 static
 206 PyObject *codec_getitem(const char *encoding, int index)
 207 {
 208     PyObject *codecs;
 209     PyObject *v;
 210
 211     codecs = _PyCodec_Lookup(encoding);
 212     if (codecs == NULL)
 213         return NULL;
 214     v = PyTuple_GET_ITEM(codecs, index);
 215     Py_DECREF(codecs);
 216     Py_INCREF(v);
 217     return v;
 218 }
 219
 220 /* Helper function to create an incremental codec. */
 221
 222 static
 223 PyObject *codec_getincrementalcodec(const char *encoding,
 224                                     const char *errors,
 225                                     const char *attrname)
 226 {
 227     PyObject *codecs, *ret, *inccodec;
 228
 229     codecs = _PyCodec_Lookup(encoding);
 230     if (codecs == NULL)
 231         return NULL;
 232     inccodec = PyObject_GetAttrString(codecs, attrname);
 233     Py_DECREF(codecs);
 234     if (inccodec == NULL)
 235         return NULL;
 236     if (errors)
 237         ret = PyObject_CallFunction(inccodec, "s", errors);
 238     else
 239         ret = PyObject_CallFunction(inccodec, NULL);
 240     Py_DECREF(inccodec);
 241     return ret;
 242 }
 243
 244 /* Helper function to create a stream codec. */
 245
 246 static
 247 PyObject *codec_getstreamcodec(const char *encoding,
 248                                PyObject *stream,
 249                                const char *errors,
 250                                const int index)
 251 {
 252     PyObject *codecs, *streamcodec, *codeccls;
 253
 254     codecs = _PyCodec_Lookup(encoding);
 255     if (codecs == NULL)
 256         return NULL;
 257
 258     codeccls = PyTuple_GET_ITEM(codecs, index);
 259     if (errors != NULL)
 260         streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
 261     else
 262         streamcodec = PyObject_CallFunction(codeccls, "O", stream);
 263     Py_DECREF(codecs);
 264     return streamcodec;
 265 }
 266
 267 /* Convenience APIs to query the Codec registry.
 268
 269    All APIs return a codec object with incremented refcount.
 270
 271  */
 272
 273 PyObject *PyCodec_Encoder(const char *encoding)
 274 {
 275     return codec_getitem(encoding, 0);
 276 }
 277
 278 PyObject *PyCodec_Decoder(const char *encoding)
 279 {
 280     return codec_getitem(encoding, 1);
 281 }
 282
 283 PyObject *PyCodec_IncrementalEncoder(const char *encoding,
 284                                      const char *errors)
 285 {
 286     return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
 287 }
 288
 289 PyObject *PyCodec_IncrementalDecoder(const char *encoding,
 290                                      const char *errors)
 291 {
 292     return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
 293 }
 294
 295 PyObject *PyCodec_StreamReader(const char *encoding,
 296                                PyObject *stream,
 297                                const char *errors)
 298 {
 299     return codec_getstreamcodec(encoding, stream, errors, 2);
 300 }
 301
 302 PyObject *PyCodec_StreamWriter(const char *encoding,
 303                                PyObject *stream,
 304                                const char *errors)
 305 {
 306     return codec_getstreamcodec(encoding, stream, errors, 3);
 307 }
 308
 309 /* Encode an object (e.g. an Unicode object) using the given encoding
 310    and return the resulting encoded object (usually a Python string).
 311
 312    errors is passed to the encoder factory as argument if non-NULL. */
 313
 314 PyObject *PyCodec_Encode(PyObject *object,
 315                          const char *encoding,
 316                          const char *errors)
 317 {
 318     PyObject *encoder = NULL;
 319     PyObject *args = NULL, *result = NULL;
 320     PyObject *v;
 321
 322     encoder = PyCodec_Encoder(encoding);
 323     if (encoder == NULL)
 324         goto onError;
 325
 326     args = args_tuple(object, errors);
 327     if (args == NULL)
 328         goto onError;
 329
 330     result = PyEval_CallObject(encoder,args);
 331     if (result == NULL)
 332         goto onError;
 333
 334     if (!PyTuple_Check(result) ||
 335         PyTuple_GET_SIZE(result) != 2) {
 336         PyErr_SetString(PyExc_TypeError,
 337                         "encoder must return a tuple (object,integer)");
 338         goto onError;
 339     }
 340     v = PyTuple_GET_ITEM(result,0);
 341     Py_INCREF(v);
 342     /* We don't check or use the second (integer) entry. */
 343
 344     Py_DECREF(args);
 345     Py_DECREF(encoder);
 346     Py_DECREF(result);
 347     return v;
 348
 349  onError:
 350     Py_XDECREF(result);
 351     Py_XDECREF(args);
 352     Py_XDECREF(encoder);
 353     return NULL;
 354 }
 355
 356 /* Decode an object (usually a Python string) using the given encoding
 357    and return an equivalent object (e.g. an Unicode object).
 358
 359    errors is passed to the decoder factory as argument if non-NULL. */
 360
 361 PyObject *PyCodec_Decode(PyObject *object,
 362                          const char *encoding,
 363                          const char *errors)
 364 {
 365     PyObject *decoder = NULL;
 366     PyObject *args = NULL, *result = NULL;
 367     PyObject *v;
 368
 369     decoder = PyCodec_Decoder(encoding);
 370     if (decoder == NULL)
 371         goto onError;
 372
 373     args = args_tuple(object, errors);
 374     if (args == NULL)
 375         goto onError;
 376
 377     result = PyEval_CallObject(decoder,args);
 378     if (result == NULL)
 379         goto onError;
 380     if (!PyTuple_Check(result) ||
 381         PyTuple_GET_SIZE(result) != 2) {
 382         PyErr_SetString(PyExc_TypeError,
 383                         "decoder must return a tuple (object,integer)");
 384         goto onError;
 385     }
 386     v = PyTuple_GET_ITEM(result,0);
 387     Py_INCREF(v);
 388     /* We don't check or use the second (integer) entry. */
 389
 390     Py_DECREF(args);
 391     Py_DECREF(decoder);
 392     Py_DECREF(result);
 393     return v;
 394
 395  onError:
 396     Py_XDECREF(args);
 397     Py_XDECREF(decoder);
 398     Py_XDECREF(result);
 399     return NULL;
 400 }
 401
 402 /* Register the error handling callback function error under the name
 403    name. This function will be called by the codec when it encounters
 404    an unencodable characters/undecodable bytes and doesn't know the
 405    callback name, when name is specified as the error parameter
 406    in the call to the encode/decode function.
 407    Return 0 on success, -1 on error */
 408 int PyCodec_RegisterError(const char *name, PyObject *error)
 409 {
 410     PyInterpreterState *interp = PyThreadState_GET()->interp;
 411     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 412         return -1;
 413     if (!PyCallable_Check(error)) {
 414         PyErr_SetString(PyExc_TypeError, "handler must be callable");
 415         return -1;
 416     }
 417     return PyDict_SetItemString(interp->codec_error_registry,
 418                                 (char *)name, error);
 419 }
 420
 421 /* Lookup the error handling callback function registered under the
 422    name error. As a special case NULL can be passed, in which case
 423    the error handling callback for strict encoding will be returned. */
 424 PyObject *PyCodec_LookupError(const char *name)
 425 {
 426     PyObject *handler = NULL;
 427
 428     PyInterpreterState *interp = PyThreadState_GET()->interp;
 429     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 430         return NULL;
 431
 432     if (name==NULL)
 433         name = "strict";
 434     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
 435     if (!handler)
 436         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
 437     else
 438         Py_INCREF(handler);
 439     return handler;
 440 }
 441
 442 static void wrong_exception_type(PyObject *exc)
 443 {
 444     PyObject *type = PyObject_GetAttrString(exc, "__class__");
 445     if (type != NULL) {
 446         PyObject *name = PyObject_GetAttrString(type, "__name__");
 447         Py_DECREF(type);
 448         if (name != NULL) {
 449             PyObject *string = PyObject_Str(name);
 450             Py_DECREF(name);
 451             if (string != NULL) {
 452                 PyErr_Format(PyExc_TypeError,
 453                     "don't know how to handle %.400s in error callback",
 454                     PyString_AS_STRING(string));
 455                 Py_DECREF(string);
 456             }
 457         }
 458     }
 459 }
 460
 461 PyObject *PyCodec_StrictErrors(PyObject *exc)
 462 {
 463     if (PyExceptionInstance_Check(exc))
 464         PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
 465     else
 466         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
 467     return NULL;
 468 }
 469
 470
 471 #ifdef Py_USING_UNICODE
 472 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 473 {
 474     Py_ssize_t end;
 475     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 476         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 477             return NULL;
 478     }
 479     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 480         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 481             return NULL;
 482     }
 483     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 484         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 485             return NULL;
 486     }
 487     else {
 488         wrong_exception_type(exc);
 489         return NULL;
 490     }
 491     /* ouch: passing NULL, 0, pos gives None instead of u'' */
 492     return Py_BuildValue("(u#n)", &end, 0, end);
 493 }
 494
 495
 496 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 497 {
 498     PyObject *restuple;
 499     Py_ssize_t start;
 500     Py_ssize_t end;
 501     Py_ssize_t i;
 502
 503     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 504         PyObject *res;
 505         Py_UNICODE *p;
 506         if (PyUnicodeEncodeError_GetStart(exc, &start))
 507             return NULL;
 508         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 509             return NULL;
 510         res = PyUnicode_FromUnicode(NULL, end-start);
 511         if (res == NULL)
 512             return NULL;
 513         for (p = PyUnicode_AS_UNICODE(res), i = start;
 514             i<end; ++p, ++i)
 515             *p = '?';
 516         restuple = Py_BuildValue("(On)", res, end);
 517         Py_DECREF(res);
 518         return restuple;
 519     }
 520     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 521         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
 522         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 523             return NULL;
 524         return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
 525     }
 526     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 527         PyObject *res;
 528         Py_UNICODE *p;
 529         if (PyUnicodeTranslateError_GetStart(exc, &start))
 530             return NULL;
 531         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 532             return NULL;
 533         res = PyUnicode_FromUnicode(NULL, end-start);
 534         if (res == NULL)
 535             return NULL;
 536         for (p = PyUnicode_AS_UNICODE(res), i = start;
 537             i<end; ++p, ++i)
 538             *p = Py_UNICODE_REPLACEMENT_CHARACTER;
 539         restuple = Py_BuildValue("(On)", res, end);
 540         Py_DECREF(res);
 541         return restuple;
 542     }
 543     else {
 544         wrong_exception_type(exc);
 545         return NULL;
 546     }
 547 }
 548
 549 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
 550 {
 551     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 552         PyObject *restuple;
 553         PyObject *object;
 554         Py_ssize_t start;
 555         Py_ssize_t end;
 556         PyObject *res;
 557         Py_UNICODE *p;
 558         Py_UNICODE *startp;
 559         Py_UNICODE *e;
 560         Py_UNICODE *outp;
 561         Py_ssize_t ressize;
 562         if (PyUnicodeEncodeError_GetStart(exc, &start))
 563             return NULL;
 564         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 565             return NULL;
 566         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 567             return NULL;
 568         startp = PyUnicode_AS_UNICODE(object);
 569         if (end - start > PY_SSIZE_T_MAX / (2+7+1)) {
 570             end = start + PY_SSIZE_T_MAX / (2+7+1);
 571 #ifndef Py_UNICODE_WIDE
 572             if (0xD800 <= startp[end - 1] && startp[end - 1] <= 0xDBFF)
 573                 end--;
 574 #endif
 575         }
 576         e = startp + end;
 577         for (p = startp+start, ressize = 0; p < e;) {
 578             Py_UCS4 ch = *p++;
 579 #ifndef Py_UNICODE_WIDE
 580             if ((0xD800 <= ch && ch <= 0xDBFF) &&
 581                 (p < e) &&
 582                 (0xDC00 <= *p && *p <= 0xDFFF)) {
 583                 ch = ((((ch & 0x03FF) << 10) |
 584                        ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
 585             }
 586 #endif
 587             if (ch < 10)
 588                 ressize += 2+1+1;
 589             else if (ch < 100)
 590                 ressize += 2+2+1;
 591             else if (ch < 1000)
 592                 ressize += 2+3+1;
 593             else if (ch < 10000)
 594                 ressize += 2+4+1;
 595             else if (ch < 100000)
 596                 ressize += 2+5+1;
 597             else if (ch < 1000000)
 598                 ressize += 2+6+1;
 599             else
 600                 ressize += 2+7+1;
 601         }
 602         /* allocate replacement */
 603         res = PyUnicode_FromUnicode(NULL, ressize);
 604         if (res == NULL) {
 605             Py_DECREF(object);
 606             return NULL;
 607         }
 608         /* generate replacement */
 609         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
 610             int digits;
 611             int base;
 612             Py_UCS4 ch = *p++;
 613 #ifndef Py_UNICODE_WIDE
 614             if ((0xD800 <= ch && ch <= 0xDBFF) &&
 615                 (p < startp+end) &&
 616                 (0xDC00 <= *p && *p <= 0xDFFF)) {
 617                 ch = ((((ch & 0x03FF) << 10) |
 618                        ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
 619             }
 620 #endif
 621             *outp++ = '&';
 622             *outp++ = '#';
 623             if (ch < 10) {
 624                 digits = 1;
 625                 base = 1;
 626             }
 627             else if (ch < 100) {
 628                 digits = 2;
 629                 base = 10;
 630             }
 631             else if (ch < 1000) {
 632                 digits = 3;
 633                 base = 100;
 634             }
 635             else if (ch < 10000) {
 636                 digits = 4;
 637                 base = 1000;
 638             }
 639             else if (ch < 100000) {
 640                 digits = 5;
 641                 base = 10000;
 642             }
 643             else if (ch < 1000000) {
 644                 digits = 6;
 645                 base = 100000;
 646             }
 647             else {
 648                 digits = 7;
 649                 base = 1000000;
 650             }
 651             while (digits-->0) {
 652                 *outp++ = '0' + ch/base;
 653                 ch %= base;
 654                 base /= 10;
 655             }
 656             *outp++ = ';';
 657         }
 658         restuple = Py_BuildValue("(On)", res, end);
 659         Py_DECREF(res);
 660         Py_DECREF(object);
 661         return restuple;
 662     }
 663     else {
 664         wrong_exception_type(exc);
 665         return NULL;
 666     }
 667 }
 668
 669 static Py_UNICODE hexdigits[] = {
 670     '0', '1', '2', '3', '4', '5', '6', '7',
 671     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
 672 };
 673
 674 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 675 {
 676     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 677         PyObject *restuple;
 678         PyObject *object;
 679         Py_ssize_t start;
 680         Py_ssize_t end;
 681         PyObject *res;
 682         Py_UNICODE *p;
 683         Py_UNICODE *startp;
 684         Py_UNICODE *outp;
 685         Py_ssize_t ressize;
 686         if (PyUnicodeEncodeError_GetStart(exc, &start))
 687             return NULL;
 688         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 689             return NULL;
 690         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 691             return NULL;
 692         if (end - start > PY_SSIZE_T_MAX / (1+1+8))
 693             end = start + PY_SSIZE_T_MAX / (1+1+8);
 694         startp = PyUnicode_AS_UNICODE(object);
 695         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 696 #ifdef Py_UNICODE_WIDE
 697             if (*p >= 0x00010000)
 698                 ressize += 1+1+8;
 699             else
 700 #endif
 701             if (*p >= 0x100) {
 702                 ressize += 1+1+4;
 703             }
 704             else
 705                 ressize += 1+1+2;
 706         }
 707         res = PyUnicode_FromUnicode(NULL, ressize);
 708         if (res == NULL) {
 709             Py_DECREF(object);
 710             return NULL;
 711         }
 712         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 713             p < startp+end; ++p) {
 714             Py_UNICODE c = *p;
 715             *outp++ = '\\';
 716 #ifdef Py_UNICODE_WIDE
 717             if (c >= 0x00010000) {
 718                 *outp++ = 'U';
 719                 *outp++ = hexdigits[(c>>28)&0xf];
 720                 *outp++ = hexdigits[(c>>24)&0xf];
 721                 *outp++ = hexdigits[(c>>20)&0xf];
 722                 *outp++ = hexdigits[(c>>16)&0xf];
 723                 *outp++ = hexdigits[(c>>12)&0xf];
 724                 *outp++ = hexdigits[(c>>8)&0xf];
 725             }
 726             else
 727 #endif
 728             if (c >= 0x100) {
 729                 *outp++ = 'u';
 730                 *outp++ = hexdigits[(c>>12)&0xf];
 731                 *outp++ = hexdigits[(c>>8)&0xf];
 732             }
 733             else
 734                 *outp++ = 'x';
 735             *outp++ = hexdigits[(c>>4)&0xf];
 736             *outp++ = hexdigits[c&0xf];
 737         }
 738
 739         restuple = Py_BuildValue("(On)", res, end);
 740         Py_DECREF(res);
 741         Py_DECREF(object);
 742         return restuple;
 743     }
 744     else {
 745         wrong_exception_type(exc);
 746         return NULL;
 747     }
 748 }
 749 #endif
 750
 751 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 752 {
 753     return PyCodec_StrictErrors(exc);
 754 }
 755
 756
 757 #ifdef Py_USING_UNICODE
 758 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
 759 {
 760     return PyCodec_IgnoreErrors(exc);
 761 }
 762
 763
 764 static PyObject *replace_errors(PyObject *self, PyObject *exc)
 765 {
 766     return PyCodec_ReplaceErrors(exc);
 767 }
 768
 769
 770 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
 771 {
 772     return PyCodec_XMLCharRefReplaceErrors(exc);
 773 }
 774
 775
 776 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
 777 {
 778     return PyCodec_BackslashReplaceErrors(exc);
 779 }
 780 #endif
 781
 782 static int _PyCodecRegistry_Init(void)
 783 {
 784     static struct {
 785         char *name;
 786         PyMethodDef def;
 787     } methods[] =
 788     {
 789         {
 790             "strict",
 791             {
 792                 "strict_errors",
 793                 strict_errors,
 794                 METH_O,
 795                 PyDoc_STR("Implements the 'strict' error handling, which "
 796                           "raises a UnicodeError on coding errors.")
 797             }
 798         },
 799 #ifdef Py_USING_UNICODE
 800         {
 801             "ignore",
 802             {
 803                 "ignore_errors",
 804                 ignore_errors,
 805                 METH_O,
 806                 PyDoc_STR("Implements the 'ignore' error handling, which "
 807                           "ignores malformed data and continues.")
 808             }
 809         },
 810         {
 811             "replace",
 812             {
 813                 "replace_errors",
 814                 replace_errors,
 815                 METH_O,
 816                 PyDoc_STR("Implements the 'replace' error handling, which "
 817                           "replaces malformed data with a replacement marker.")
 818             }
 819         },
 820         {
 821             "xmlcharrefreplace",
 822             {
 823                 "xmlcharrefreplace_errors",
 824                 xmlcharrefreplace_errors,
 825                 METH_O,
 826                 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
 827                           "which replaces an unencodable character with the "
 828                           "appropriate XML character reference.")
 829             }
 830         },
 831         {
 832             "backslashreplace",
 833             {
 834                 "backslashreplace_errors",
 835                 backslashreplace_errors,
 836                 METH_O,
 837                 PyDoc_STR("Implements the 'backslashreplace' error handling, "
 838                           "which replaces an unencodable character with a "
 839                           "backslashed escape sequence.")
 840             }
 841         }
 842 #endif
 843     };
 844
 845     PyInterpreterState *interp = PyThreadState_GET()->interp;
 846     PyObject *mod;
 847     unsigned i;
 848
 849     if (interp->codec_search_path != NULL)
 850         return 0;
 851
 852     interp->codec_search_path = PyList_New(0);
 853     interp->codec_search_cache = PyDict_New();
 854     interp->codec_error_registry = PyDict_New();
 855
 856     if (interp->codec_error_registry) {
 857         for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
 858             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
 859             int res;
 860             if (!func)
 861                 Py_FatalError("can't initialize codec error registry");
 862             res = PyCodec_RegisterError(methods[i].name, func);
 863             Py_DECREF(func);
 864             if (res)
 865                 Py_FatalError("can't initialize codec error registry");
 866         }
 867     }
 868
 869     if (interp->codec_search_path == NULL ||
 870         interp->codec_search_cache == NULL ||
 871         interp->codec_error_registry == NULL)
 872         Py_FatalError("can't initialize codec registry");
 873
 874     mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
 875     if (mod == NULL) {
 876         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 877             /* Ignore ImportErrors... this is done so that
 878                distributions can disable the encodings package. Note
 879                that other errors are not masked, e.g. SystemErrors
 880                raised to inform the user of an error in the Python
 881                configuration are still reported back to the user. */
 882             PyErr_Clear();
 883             return 0;
 884         }
 885         return -1;
 886     }
 887     Py_DECREF(mod);
 888     return 0;
 889 }