--- /dev/null
+""" Standard "encodings" Package\r
+\r
+ Standard Python encoding modules are stored in this package\r
+ directory.\r
+\r
+ Codec modules must have names corresponding to normalized encoding\r
+ names as defined in the normalize_encoding() function below, e.g.\r
+ 'utf-8' must be implemented by the module 'utf_8.py'.\r
+\r
+ Each codec module must export the following interface:\r
+\r
+ * getregentry() -> codecs.CodecInfo object\r
+ The getregentry() API must a CodecInfo object with encoder, decoder,\r
+ incrementalencoder, incrementaldecoder, streamwriter and streamreader\r
+ atttributes which adhere to the Python Codec Interface Standard.\r
+\r
+ In addition, a module may optionally also define the following\r
+ APIs which are then used by the package's codec search function:\r
+\r
+ * getaliases() -> sequence of encoding name strings to use as aliases\r
+\r
+ Alias names returned by getaliases() must be normalized encoding\r
+ names as defined by normalize_encoding().\r
+\r
+Written by Marc-Andre Lemburg (mal@lemburg.com).\r
+\r
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.\r
+\r
+"""#"\r
+\r
+import codecs\r
+from encodings import aliases\r
+import __builtin__\r
+\r
+_cache = {}\r
+_unknown = '--unknown--'\r
+_import_tail = ['*']\r
+_norm_encoding_map = (' . '\r
+ '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '\r
+ ' abcdefghijklmnopqrstuvwxyz '\r
+ ' '\r
+ ' '\r
+ ' ')\r
+_aliases = aliases.aliases\r
+\r
+class CodecRegistryError(LookupError, SystemError):\r
+ pass\r
+\r
+def normalize_encoding(encoding):\r
+\r
+ """ Normalize an encoding name.\r
+\r
+ Normalization works as follows: all non-alphanumeric\r
+ characters except the dot used for Python package names are\r
+ collapsed and replaced with a single underscore, e.g. ' -;#'\r
+ becomes '_'. Leading and trailing underscores are removed.\r
+\r
+ Note that encoding names should be ASCII only; if they do use\r
+ non-ASCII characters, these must be Latin-1 compatible.\r
+\r
+ """\r
+ # Make sure we have an 8-bit string, because .translate() works\r
+ # differently for Unicode strings.\r
+ if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):\r
+ # Note that .encode('latin-1') does *not* use the codec\r
+ # registry, so this call doesn't recurse. (See unicodeobject.c\r
+ # PyUnicode_AsEncodedString() for details)\r
+ encoding = encoding.encode('latin-1')\r
+ return '_'.join(encoding.translate(_norm_encoding_map).split())\r
+\r
+def search_function(encoding):\r
+\r
+ # Cache lookup\r
+ entry = _cache.get(encoding, _unknown)\r
+ if entry is not _unknown:\r
+ return entry\r
+\r
+ # Import the module:\r
+ #\r
+ # First try to find an alias for the normalized encoding\r
+ # name and lookup the module using the aliased name, then try to\r
+ # lookup the module using the standard import scheme, i.e. first\r
+ # try in the encodings package, then at top-level.\r
+ #\r
+ norm_encoding = normalize_encoding(encoding)\r
+ aliased_encoding = _aliases.get(norm_encoding) or \\r
+ _aliases.get(norm_encoding.replace('.', '_'))\r
+ if aliased_encoding is not None:\r
+ modnames = [aliased_encoding,\r
+ norm_encoding]\r
+ else:\r
+ modnames = [norm_encoding]\r
+ for modname in modnames:\r
+ if not modname or '.' in modname:\r
+ continue\r
+ try:\r
+ # Import is absolute to prevent the possibly malicious import of a\r
+ # module with side-effects that is not in the 'encodings' package.\r
+ mod = __import__('encodings.' + modname, fromlist=_import_tail,\r
+ level=0)\r
+ except ImportError:\r
+ pass\r
+ else:\r
+ break\r
+ else:\r
+ mod = None\r
+\r
+ try:\r
+ getregentry = mod.getregentry\r
+ except AttributeError:\r
+ # Not a codec module\r
+ mod = None\r
+\r
+ if mod is None:\r
+ # Cache misses\r
+ _cache[encoding] = None\r
+ return None\r
+\r
+ # Now ask the module for the registry entry\r
+ entry = getregentry()\r
+ if not isinstance(entry, codecs.CodecInfo):\r
+ if not 4 <= len(entry) <= 7:\r
+ raise CodecRegistryError,\\r
+ 'module "%s" (%s) failed to register' % \\r
+ (mod.__name__, mod.__file__)\r
+ if not hasattr(entry[0], '__call__') or \\r
+ not hasattr(entry[1], '__call__') or \\r
+ (entry[2] is not None and not hasattr(entry[2], '__call__')) or \\r
+ (entry[3] is not None and not hasattr(entry[3], '__call__')) or \\r
+ (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \\r
+ (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):\r
+ raise CodecRegistryError,\\r
+ 'incompatible codecs in module "%s" (%s)' % \\r
+ (mod.__name__, mod.__file__)\r
+ if len(entry)<7 or entry[6] is None:\r
+ entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)\r
+ entry = codecs.CodecInfo(*entry)\r
+\r
+ # Cache the codec registry entry\r
+ _cache[encoding] = entry\r
+\r
+ # Register its aliases (without overwriting previously registered\r
+ # aliases)\r
+ try:\r
+ codecaliases = mod.getaliases()\r
+ except AttributeError:\r
+ pass\r
+ else:\r
+ for alias in codecaliases:\r
+ if alias not in _aliases:\r
+ _aliases[alias] = modname\r
+\r
+ # Return the registry entry\r
+ return entry\r
+\r
+# Register the search_function in the Python codec registry\r
+codecs.register(search_function)\r