+++ /dev/null
-""" Standard "encodings" Package\r
-\r
- Standard Python encoding modules are stored in this package\r
- directory.\r
-\r
- Codec modules must have names corresponding to normalized encoding\r
- names as defined in the normalize_encoding() function below, e.g.\r
- 'utf-8' must be implemented by the module 'utf_8.py'.\r
-\r
- Each codec module must export the following interface:\r
-\r
- * getregentry() -> codecs.CodecInfo object\r
- The getregentry() API must a CodecInfo object with encoder, decoder,\r
- incrementalencoder, incrementaldecoder, streamwriter and streamreader\r
- atttributes which adhere to the Python Codec Interface Standard.\r
-\r
- In addition, a module may optionally also define the following\r
- APIs which are then used by the package's codec search function:\r
-\r
- * getaliases() -> sequence of encoding name strings to use as aliases\r
-\r
- Alias names returned by getaliases() must be normalized encoding\r
- names as defined by normalize_encoding().\r
-\r
-Written by Marc-Andre Lemburg (mal@lemburg.com).\r
-\r
-(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.\r
-\r
-"""#"\r
-\r
-import codecs\r
-from encodings import aliases\r
-import __builtin__\r
-\r
-_cache = {}\r
-_unknown = '--unknown--'\r
-_import_tail = ['*']\r
-_norm_encoding_map = (' . '\r
- '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '\r
- ' abcdefghijklmnopqrstuvwxyz '\r
- ' '\r
- ' '\r
- ' ')\r
-_aliases = aliases.aliases\r
-\r
-class CodecRegistryError(LookupError, SystemError):\r
- pass\r
-\r
-def normalize_encoding(encoding):\r
-\r
- """ Normalize an encoding name.\r
-\r
- Normalization works as follows: all non-alphanumeric\r
- characters except the dot used for Python package names are\r
- collapsed and replaced with a single underscore, e.g. ' -;#'\r
- becomes '_'. Leading and trailing underscores are removed.\r
-\r
- Note that encoding names should be ASCII only; if they do use\r
- non-ASCII characters, these must be Latin-1 compatible.\r
-\r
- """\r
- # Make sure we have an 8-bit string, because .translate() works\r
- # differently for Unicode strings.\r
- if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):\r
- # Note that .encode('latin-1') does *not* use the codec\r
- # registry, so this call doesn't recurse. (See unicodeobject.c\r
- # PyUnicode_AsEncodedString() for details)\r
- encoding = encoding.encode('latin-1')\r
- return '_'.join(encoding.translate(_norm_encoding_map).split())\r
-\r
-def search_function(encoding):\r
-\r
- # Cache lookup\r
- entry = _cache.get(encoding, _unknown)\r
- if entry is not _unknown:\r
- return entry\r
-\r
- # Import the module:\r
- #\r
- # First try to find an alias for the normalized encoding\r
- # name and lookup the module using the aliased name, then try to\r
- # lookup the module using the standard import scheme, i.e. first\r
- # try in the encodings package, then at top-level.\r
- #\r
- norm_encoding = normalize_encoding(encoding)\r
- aliased_encoding = _aliases.get(norm_encoding) or \\r
- _aliases.get(norm_encoding.replace('.', '_'))\r
- if aliased_encoding is not None:\r
- modnames = [aliased_encoding,\r
- norm_encoding]\r
- else:\r
- modnames = [norm_encoding]\r
- for modname in modnames:\r
- if not modname or '.' in modname:\r
- continue\r
- try:\r
- # Import is absolute to prevent the possibly malicious import of a\r
- # module with side-effects that is not in the 'encodings' package.\r
- mod = __import__('encodings.' + modname, fromlist=_import_tail,\r
- level=0)\r
- except ImportError:\r
- pass\r
- else:\r
- break\r
- else:\r
- mod = None\r
-\r
- try:\r
- getregentry = mod.getregentry\r
- except AttributeError:\r
- # Not a codec module\r
- mod = None\r
-\r
- if mod is None:\r
- # Cache misses\r
- _cache[encoding] = None\r
- return None\r
-\r
- # Now ask the module for the registry entry\r
- entry = getregentry()\r
- if not isinstance(entry, codecs.CodecInfo):\r
- if not 4 <= len(entry) <= 7:\r
- raise CodecRegistryError,\\r
- 'module "%s" (%s) failed to register' % \\r
- (mod.__name__, mod.__file__)\r
- if not hasattr(entry[0], '__call__') or \\r
- not hasattr(entry[1], '__call__') or \\r
- (entry[2] is not None and not hasattr(entry[2], '__call__')) or \\r
- (entry[3] is not None and not hasattr(entry[3], '__call__')) or \\r
- (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \\r
- (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):\r
- raise CodecRegistryError,\\r
- 'incompatible codecs in module "%s" (%s)' % \\r
- (mod.__name__, mod.__file__)\r
- if len(entry)<7 or entry[6] is None:\r
- entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)\r
- entry = codecs.CodecInfo(*entry)\r
-\r
- # Cache the codec registry entry\r
- _cache[encoding] = entry\r
-\r
- # Register its aliases (without overwriting previously registered\r
- # aliases)\r
- try:\r
- codecaliases = mod.getaliases()\r
- except AttributeError:\r
- pass\r
- else:\r
- for alias in codecaliases:\r
- if alias not in _aliases:\r
- _aliases[alias] = modname\r
-\r
- # Return the registry entry\r
- return entry\r
-\r
-# Register the search_function in the Python codec registry\r
-codecs.register(search_function)\r