+++ /dev/null
-"""Internationalization and localization support.\r
-\r
-This module provides internationalization (I18N) and localization (L10N)\r
-support for your Python programs by providing an interface to the GNU gettext\r
-message catalog library.\r
-\r
-I18N refers to the operation by which a program is made aware of multiple\r
-languages. L10N refers to the adaptation of your program, once\r
-internationalized, to the local language and cultural habits.\r
-\r
-"""\r
-\r
-# This module represents the integration of work, contributions, feedback, and\r
-# suggestions from the following people:\r
-#\r
-# Martin von Loewis, who wrote the initial implementation of the underlying\r
-# C-based libintlmodule (later renamed _gettext), along with a skeletal\r
-# gettext.py implementation.\r
-#\r
-# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,\r
-# which also included a pure-Python implementation to read .mo files if\r
-# intlmodule wasn't available.\r
-#\r
-# James Henstridge, who also wrote a gettext.py module, which has some\r
-# interesting, but currently unsupported experimental features: the notion of\r
-# a Catalog class and instances, and the ability to add to a catalog file via\r
-# a Python API.\r
-#\r
-# Barry Warsaw integrated these modules, wrote the .install() API and code,\r
-# and conformed all C and Python code to Python's coding standards.\r
-#\r
-# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this\r
-# module.\r
-#\r
-# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.\r
-#\r
-# TODO:\r
-# - Lazy loading of .mo files. Currently the entire catalog is loaded into\r
-# memory, but that's probably bad for large translated programs. Instead,\r
-# the lexical sort of original strings in GNU .mo files should be exploited\r
-# to do binary searches and lazy initializations. Or you might want to use\r
-# the undocumented double-hash algorithm for .mo files with hash tables, but\r
-# you'll need to study the GNU gettext code to do this.\r
-#\r
-# - Support Solaris .mo file formats. Unfortunately, we've been unable to\r
-# find this format documented anywhere.\r
-\r
-\r
-import locale, copy, os, re, struct, sys\r
-from errno import ENOENT\r
-\r
-\r
-__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',\r
- 'find', 'translation', 'install', 'textdomain', 'bindtextdomain',\r
- 'bind_textdomain_codeset',\r
- 'dgettext', 'dngettext', 'gettext', 'lgettext', 'ldgettext',\r
- 'ldngettext', 'lngettext', 'ngettext',\r
- ]\r
-\r
-_default_localedir = os.path.join(sys.prefix, 'share', 'locale')\r
-\r
-\r
-def test(condition, true, false):\r
- """\r
- Implements the C expression:\r
-\r
- condition ? true : false\r
-\r
- Required to correctly interpret plural forms.\r
- """\r
- if condition:\r
- return true\r
- else:\r
- return false\r
-\r
-\r
-def c2py(plural):\r
- """Gets a C expression as used in PO files for plural forms and returns a\r
- Python lambda function that implements an equivalent expression.\r
- """\r
- # Security check, allow only the "n" identifier\r
- try:\r
- from cStringIO import StringIO\r
- except ImportError:\r
- from StringIO import StringIO\r
- import token, tokenize\r
- tokens = tokenize.generate_tokens(StringIO(plural).readline)\r
- try:\r
- danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']\r
- except tokenize.TokenError:\r
- raise ValueError, \\r
- 'plural forms expression error, maybe unbalanced parenthesis'\r
- else:\r
- if danger:\r
- raise ValueError, 'plural forms expression could be dangerous'\r
-\r
- # Replace some C operators by their Python equivalents\r
- plural = plural.replace('&&', ' and ')\r
- plural = plural.replace('||', ' or ')\r
-\r
- expr = re.compile(r'\!([^=])')\r
- plural = expr.sub(' not \\1', plural)\r
-\r
- # Regular expression and replacement function used to transform\r
- # "a?b:c" to "test(a,b,c)".\r
- expr = re.compile(r'(.*?)\?(.*?):(.*)')\r
- def repl(x):\r
- return "test(%s, %s, %s)" % (x.group(1), x.group(2),\r
- expr.sub(repl, x.group(3)))\r
-\r
- # Code to transform the plural expression, taking care of parentheses\r
- stack = ['']\r
- for c in plural:\r
- if c == '(':\r
- stack.append('')\r
- elif c == ')':\r
- if len(stack) == 1:\r
- # Actually, we never reach this code, because unbalanced\r
- # parentheses get caught in the security check at the\r
- # beginning.\r
- raise ValueError, 'unbalanced parenthesis in plural form'\r
- s = expr.sub(repl, stack.pop())\r
- stack[-1] += '(%s)' % s\r
- else:\r
- stack[-1] += c\r
- plural = expr.sub(repl, stack.pop())\r
-\r
- return eval('lambda n: int(%s)' % plural)\r
-\r
-\r
-\r
-def _expand_lang(locale):\r
- from locale import normalize\r
- locale = normalize(locale)\r
- COMPONENT_CODESET = 1 << 0\r
- COMPONENT_TERRITORY = 1 << 1\r
- COMPONENT_MODIFIER = 1 << 2\r
- # split up the locale into its base components\r
- mask = 0\r
- pos = locale.find('@')\r
- if pos >= 0:\r
- modifier = locale[pos:]\r
- locale = locale[:pos]\r
- mask |= COMPONENT_MODIFIER\r
- else:\r
- modifier = ''\r
- pos = locale.find('.')\r
- if pos >= 0:\r
- codeset = locale[pos:]\r
- locale = locale[:pos]\r
- mask |= COMPONENT_CODESET\r
- else:\r
- codeset = ''\r
- pos = locale.find('_')\r
- if pos >= 0:\r
- territory = locale[pos:]\r
- locale = locale[:pos]\r
- mask |= COMPONENT_TERRITORY\r
- else:\r
- territory = ''\r
- language = locale\r
- ret = []\r
- for i in range(mask+1):\r
- if not (i & ~mask): # if all components for this combo exist ...\r
- val = language\r
- if i & COMPONENT_TERRITORY: val += territory\r
- if i & COMPONENT_CODESET: val += codeset\r
- if i & COMPONENT_MODIFIER: val += modifier\r
- ret.append(val)\r
- ret.reverse()\r
- return ret\r
-\r
-\r
-\r
-class NullTranslations:\r
- def __init__(self, fp=None):\r
- self._info = {}\r
- self._charset = None\r
- self._output_charset = None\r
- self._fallback = None\r
- if fp is not None:\r
- self._parse(fp)\r
-\r
- def _parse(self, fp):\r
- pass\r
-\r
- def add_fallback(self, fallback):\r
- if self._fallback:\r
- self._fallback.add_fallback(fallback)\r
- else:\r
- self._fallback = fallback\r
-\r
- def gettext(self, message):\r
- if self._fallback:\r
- return self._fallback.gettext(message)\r
- return message\r
-\r
- def lgettext(self, message):\r
- if self._fallback:\r
- return self._fallback.lgettext(message)\r
- return message\r
-\r
- def ngettext(self, msgid1, msgid2, n):\r
- if self._fallback:\r
- return self._fallback.ngettext(msgid1, msgid2, n)\r
- if n == 1:\r
- return msgid1\r
- else:\r
- return msgid2\r
-\r
- def lngettext(self, msgid1, msgid2, n):\r
- if self._fallback:\r
- return self._fallback.lngettext(msgid1, msgid2, n)\r
- if n == 1:\r
- return msgid1\r
- else:\r
- return msgid2\r
-\r
- def ugettext(self, message):\r
- if self._fallback:\r
- return self._fallback.ugettext(message)\r
- return unicode(message)\r
-\r
- def ungettext(self, msgid1, msgid2, n):\r
- if self._fallback:\r
- return self._fallback.ungettext(msgid1, msgid2, n)\r
- if n == 1:\r
- return unicode(msgid1)\r
- else:\r
- return unicode(msgid2)\r
-\r
- def info(self):\r
- return self._info\r
-\r
- def charset(self):\r
- return self._charset\r
-\r
- def output_charset(self):\r
- return self._output_charset\r
-\r
- def set_output_charset(self, charset):\r
- self._output_charset = charset\r
-\r
- def install(self, unicode=False, names=None):\r
- import __builtin__\r
- __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext\r
- if hasattr(names, "__contains__"):\r
- if "gettext" in names:\r
- __builtin__.__dict__['gettext'] = __builtin__.__dict__['_']\r
- if "ngettext" in names:\r
- __builtin__.__dict__['ngettext'] = (unicode and self.ungettext\r
- or self.ngettext)\r
- if "lgettext" in names:\r
- __builtin__.__dict__['lgettext'] = self.lgettext\r
- if "lngettext" in names:\r
- __builtin__.__dict__['lngettext'] = self.lngettext\r
-\r
-\r
-class GNUTranslations(NullTranslations):\r
- # Magic number of .mo files\r
- LE_MAGIC = 0x950412deL\r
- BE_MAGIC = 0xde120495L\r
-\r
- def _parse(self, fp):\r
- """Override this method to support alternative .mo formats."""\r
- unpack = struct.unpack\r
- filename = getattr(fp, 'name', '')\r
- # Parse the .mo file header, which consists of 5 little endian 32\r
- # bit words.\r
- self._catalog = catalog = {}\r
- self.plural = lambda n: int(n != 1) # germanic plural by default\r
- buf = fp.read()\r
- buflen = len(buf)\r
- # Are we big endian or little endian?\r
- magic = unpack('<I', buf[:4])[0]\r
- if magic == self.LE_MAGIC:\r
- version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])\r
- ii = '<II'\r
- elif magic == self.BE_MAGIC:\r
- version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])\r
- ii = '>II'\r
- else:\r
- raise IOError(0, 'Bad magic number', filename)\r
- # Now put all messages from the .mo file buffer into the catalog\r
- # dictionary.\r
- for i in xrange(0, msgcount):\r
- mlen, moff = unpack(ii, buf[masteridx:masteridx+8])\r
- mend = moff + mlen\r
- tlen, toff = unpack(ii, buf[transidx:transidx+8])\r
- tend = toff + tlen\r
- if mend < buflen and tend < buflen:\r
- msg = buf[moff:mend]\r
- tmsg = buf[toff:tend]\r
- else:\r
- raise IOError(0, 'File is corrupt', filename)\r
- # See if we're looking at GNU .mo conventions for metadata\r
- if mlen == 0:\r
- # Catalog description\r
- lastk = None\r
- for item in tmsg.splitlines():\r
- item = item.strip()\r
- if not item:\r
- continue\r
- k = v = None\r
- if ':' in item:\r
- k, v = item.split(':', 1)\r
- k = k.strip().lower()\r
- v = v.strip()\r
- self._info[k] = v\r
- lastk = k\r
- elif lastk:\r
- self._info[lastk] += '\n' + item\r
- if k == 'content-type':\r
- self._charset = v.split('charset=')[1]\r
- elif k == 'plural-forms':\r
- v = v.split(';')\r
- plural = v[1].split('plural=')[1]\r
- self.plural = c2py(plural)\r
- # Note: we unconditionally convert both msgids and msgstrs to\r
- # Unicode using the character encoding specified in the charset\r
- # parameter of the Content-Type header. The gettext documentation\r
- # strongly encourages msgids to be us-ascii, but some applications\r
- # require alternative encodings (e.g. Zope's ZCML and ZPT). For\r
- # traditional gettext applications, the msgid conversion will\r
- # cause no problems since us-ascii should always be a subset of\r
- # the charset encoding. We may want to fall back to 8-bit msgids\r
- # if the Unicode conversion fails.\r
- if '\x00' in msg:\r
- # Plural forms\r
- msgid1, msgid2 = msg.split('\x00')\r
- tmsg = tmsg.split('\x00')\r
- if self._charset:\r
- msgid1 = unicode(msgid1, self._charset)\r
- tmsg = [unicode(x, self._charset) for x in tmsg]\r
- for i in range(len(tmsg)):\r
- catalog[(msgid1, i)] = tmsg[i]\r
- else:\r
- if self._charset:\r
- msg = unicode(msg, self._charset)\r
- tmsg = unicode(tmsg, self._charset)\r
- catalog[msg] = tmsg\r
- # advance to next entry in the seek tables\r
- masteridx += 8\r
- transidx += 8\r
-\r
- def gettext(self, message):\r
- missing = object()\r
- tmsg = self._catalog.get(message, missing)\r
- if tmsg is missing:\r
- if self._fallback:\r
- return self._fallback.gettext(message)\r
- return message\r
- # Encode the Unicode tmsg back to an 8-bit string, if possible\r
- if self._output_charset:\r
- return tmsg.encode(self._output_charset)\r
- elif self._charset:\r
- return tmsg.encode(self._charset)\r
- return tmsg\r
-\r
- def lgettext(self, message):\r
- missing = object()\r
- tmsg = self._catalog.get(message, missing)\r
- if tmsg is missing:\r
- if self._fallback:\r
- return self._fallback.lgettext(message)\r
- return message\r
- if self._output_charset:\r
- return tmsg.encode(self._output_charset)\r
- return tmsg.encode(locale.getpreferredencoding())\r
-\r
- def ngettext(self, msgid1, msgid2, n):\r
- try:\r
- tmsg = self._catalog[(msgid1, self.plural(n))]\r
- if self._output_charset:\r
- return tmsg.encode(self._output_charset)\r
- elif self._charset:\r
- return tmsg.encode(self._charset)\r
- return tmsg\r
- except KeyError:\r
- if self._fallback:\r
- return self._fallback.ngettext(msgid1, msgid2, n)\r
- if n == 1:\r
- return msgid1\r
- else:\r
- return msgid2\r
-\r
- def lngettext(self, msgid1, msgid2, n):\r
- try:\r
- tmsg = self._catalog[(msgid1, self.plural(n))]\r
- if self._output_charset:\r
- return tmsg.encode(self._output_charset)\r
- return tmsg.encode(locale.getpreferredencoding())\r
- except KeyError:\r
- if self._fallback:\r
- return self._fallback.lngettext(msgid1, msgid2, n)\r
- if n == 1:\r
- return msgid1\r
- else:\r
- return msgid2\r
-\r
- def ugettext(self, message):\r
- missing = object()\r
- tmsg = self._catalog.get(message, missing)\r
- if tmsg is missing:\r
- if self._fallback:\r
- return self._fallback.ugettext(message)\r
- return unicode(message)\r
- return tmsg\r
-\r
- def ungettext(self, msgid1, msgid2, n):\r
- try:\r
- tmsg = self._catalog[(msgid1, self.plural(n))]\r
- except KeyError:\r
- if self._fallback:\r
- return self._fallback.ungettext(msgid1, msgid2, n)\r
- if n == 1:\r
- tmsg = unicode(msgid1)\r
- else:\r
- tmsg = unicode(msgid2)\r
- return tmsg\r
-\r
-\r
-# Locate a .mo file using the gettext strategy\r
-def find(domain, localedir=None, languages=None, all=0):\r
- # Get some reasonable defaults for arguments that were not supplied\r
- if localedir is None:\r
- localedir = _default_localedir\r
- if languages is None:\r
- languages = []\r
- for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):\r
- val = os.environ.get(envar)\r
- if val:\r
- languages = val.split(':')\r
- break\r
- if 'C' not in languages:\r
- languages.append('C')\r
- # now normalize and expand the languages\r
- nelangs = []\r
- for lang in languages:\r
- for nelang in _expand_lang(lang):\r
- if nelang not in nelangs:\r
- nelangs.append(nelang)\r
- # select a language\r
- if all:\r
- result = []\r
- else:\r
- result = None\r
- for lang in nelangs:\r
- if lang == 'C':\r
- break\r
- mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)\r
- if os.path.exists(mofile):\r
- if all:\r
- result.append(mofile)\r
- else:\r
- return mofile\r
- return result\r
-\r
-\r
-\r
-# a mapping between absolute .mo file path and Translation object\r
-_translations = {}\r
-\r
-def translation(domain, localedir=None, languages=None,\r
- class_=None, fallback=False, codeset=None):\r
- if class_ is None:\r
- class_ = GNUTranslations\r
- mofiles = find(domain, localedir, languages, all=1)\r
- if not mofiles:\r
- if fallback:\r
- return NullTranslations()\r
- raise IOError(ENOENT, 'No translation file found for domain', domain)\r
- # Avoid opening, reading, and parsing the .mo file after it's been done\r
- # once.\r
- result = None\r
- for mofile in mofiles:\r
- key = (class_, os.path.abspath(mofile))\r
- t = _translations.get(key)\r
- if t is None:\r
- with open(mofile, 'rb') as fp:\r
- t = _translations.setdefault(key, class_(fp))\r
- # Copy the translation object to allow setting fallbacks and\r
- # output charset. All other instance data is shared with the\r
- # cached object.\r
- t = copy.copy(t)\r
- if codeset:\r
- t.set_output_charset(codeset)\r
- if result is None:\r
- result = t\r
- else:\r
- result.add_fallback(t)\r
- return result\r
-\r
-\r
-def install(domain, localedir=None, unicode=False, codeset=None, names=None):\r
- t = translation(domain, localedir, fallback=True, codeset=codeset)\r
- t.install(unicode, names)\r
-\r
-\r
-\r
-# a mapping b/w domains and locale directories\r
-_localedirs = {}\r
-# a mapping b/w domains and codesets\r
-_localecodesets = {}\r
-# current global domain, `messages' used for compatibility w/ GNU gettext\r
-_current_domain = 'messages'\r
-\r
-\r
-def textdomain(domain=None):\r
- global _current_domain\r
- if domain is not None:\r
- _current_domain = domain\r
- return _current_domain\r
-\r
-\r
-def bindtextdomain(domain, localedir=None):\r
- global _localedirs\r
- if localedir is not None:\r
- _localedirs[domain] = localedir\r
- return _localedirs.get(domain, _default_localedir)\r
-\r
-\r
-def bind_textdomain_codeset(domain, codeset=None):\r
- global _localecodesets\r
- if codeset is not None:\r
- _localecodesets[domain] = codeset\r
- return _localecodesets.get(domain)\r
-\r
-\r
-def dgettext(domain, message):\r
- try:\r
- t = translation(domain, _localedirs.get(domain, None),\r
- codeset=_localecodesets.get(domain))\r
- except IOError:\r
- return message\r
- return t.gettext(message)\r
-\r
-def ldgettext(domain, message):\r
- try:\r
- t = translation(domain, _localedirs.get(domain, None),\r
- codeset=_localecodesets.get(domain))\r
- except IOError:\r
- return message\r
- return t.lgettext(message)\r
-\r
-def dngettext(domain, msgid1, msgid2, n):\r
- try:\r
- t = translation(domain, _localedirs.get(domain, None),\r
- codeset=_localecodesets.get(domain))\r
- except IOError:\r
- if n == 1:\r
- return msgid1\r
- else:\r
- return msgid2\r
- return t.ngettext(msgid1, msgid2, n)\r
-\r
-def ldngettext(domain, msgid1, msgid2, n):\r
- try:\r
- t = translation(domain, _localedirs.get(domain, None),\r
- codeset=_localecodesets.get(domain))\r
- except IOError:\r
- if n == 1:\r
- return msgid1\r
- else:\r
- return msgid2\r
- return t.lngettext(msgid1, msgid2, n)\r
-\r
-def gettext(message):\r
- return dgettext(_current_domain, message)\r
-\r
-def lgettext(message):\r
- return ldgettext(_current_domain, message)\r
-\r
-def ngettext(msgid1, msgid2, n):\r
- return dngettext(_current_domain, msgid1, msgid2, n)\r
-\r
-def lngettext(msgid1, msgid2, n):\r
- return ldngettext(_current_domain, msgid1, msgid2, n)\r
-\r
-# dcgettext() has been deemed unnecessary and is not implemented.\r
-\r
-# James Henstridge's Catalog constructor from GNOME gettext. Documented usage\r
-# was:\r
-#\r
-# import gettext\r
-# cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)\r
-# _ = cat.gettext\r
-# print _('Hello World')\r
-\r
-# The resulting catalog object currently don't support access through a\r
-# dictionary API, which was supported (but apparently unused) in GNOME\r
-# gettext.\r
-\r
-Catalog = translation\r