AppPkg/Applications/Python/Python-2.7.10/Lib/gettext.py

   1 """Internationalization and localization support.
   2
   3 This module provides internationalization (I18N) and localization (L10N)
   4 support for your Python programs by providing an interface to the GNU gettext
   5 message catalog library.
   6
   7 I18N refers to the operation by which a program is made aware of multiple
   8 languages.  L10N refers to the adaptation of your program, once
   9 internationalized, to the local language and cultural habits.
  10
  11 """
  12
  13 # This module represents the integration of work, contributions, feedback, and
  14 # suggestions from the following people:
  15 #
  16 # Martin von Loewis, who wrote the initial implementation of the underlying
  17 # C-based libintlmodule (later renamed _gettext), along with a skeletal
  18 # gettext.py implementation.
  19 #
  20 # Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
  21 # which also included a pure-Python implementation to read .mo files if
  22 # intlmodule wasn't available.
  23 #
  24 # James Henstridge, who also wrote a gettext.py module, which has some
  25 # interesting, but currently unsupported experimental features: the notion of
  26 # a Catalog class and instances, and the ability to add to a catalog file via
  27 # a Python API.
  28 #
  29 # Barry Warsaw integrated these modules, wrote the .install() API and code,
  30 # and conformed all C and Python code to Python's coding standards.
  31 #
  32 # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
  33 # module.
  34 #
  35 # J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
  36 #
  37 # TODO:
  38 # - Lazy loading of .mo files.  Currently the entire catalog is loaded into
  39 #   memory, but that's probably bad for large translated programs.  Instead,
  40 #   the lexical sort of original strings in GNU .mo files should be exploited
  41 #   to do binary searches and lazy initializations.  Or you might want to use
  42 #   the undocumented double-hash algorithm for .mo files with hash tables, but
  43 #   you'll need to study the GNU gettext code to do this.
  44 #
  45 # - Support Solaris .mo file formats.  Unfortunately, we've been unable to
  46 #   find this format documented anywhere.
  47
  48
  49 import locale, copy, os, re, struct, sys
  50 from errno import ENOENT
  51
  52
  53 __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
  54            'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
  55            'bind_textdomain_codeset',
  56            'dgettext', 'dngettext', 'gettext', 'lgettext', 'ldgettext',
  57            'ldngettext', 'lngettext', 'ngettext',
  58            ]
  59
  60 _default_localedir = os.path.join(sys.prefix, 'share', 'locale')
  61
  62
  63 def test(condition, true, false):
  64     """
  65     Implements the C expression:
  66
  67       condition ? true : false
  68
  69     Required to correctly interpret plural forms.
  70     """
  71     if condition:
  72         return true
  73     else:
  74         return false
  75
  76
  77 def c2py(plural):
  78     """Gets a C expression as used in PO files for plural forms and returns a
  79     Python lambda function that implements an equivalent expression.
  80     """
  81     # Security check, allow only the "n" identifier
  82     try:
  83         from cStringIO import StringIO
  84     except ImportError:
  85         from StringIO import StringIO
  86     import token, tokenize
  87     tokens = tokenize.generate_tokens(StringIO(plural).readline)
  88     try:
  89         danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
  90     except tokenize.TokenError:
  91         raise ValueError, \
  92               'plural forms expression error, maybe unbalanced parenthesis'
  93     else:
  94         if danger:
  95             raise ValueError, 'plural forms expression could be dangerous'
  96
  97     # Replace some C operators by their Python equivalents
  98     plural = plural.replace('&&', ' and ')
  99     plural = plural.replace('||', ' or ')
 100
 101     expr = re.compile(r'\!([^=])')
 102     plural = expr.sub(' not \\1', plural)
 103
 104     # Regular expression and replacement function used to transform
 105     # "a?b:c" to "test(a,b,c)".
 106     expr = re.compile(r'(.*?)\?(.*?):(.*)')
 107     def repl(x):
 108         return "test(%s, %s, %s)" % (x.group(1), x.group(2),
 109                                      expr.sub(repl, x.group(3)))
 110
 111     # Code to transform the plural expression, taking care of parentheses
 112     stack = ['']
 113     for c in plural:
 114         if c == '(':
 115             stack.append('')
 116         elif c == ')':
 117             if len(stack) == 1:
 118                 # Actually, we never reach this code, because unbalanced
 119                 # parentheses get caught in the security check at the
 120                 # beginning.
 121                 raise ValueError, 'unbalanced parenthesis in plural form'
 122             s = expr.sub(repl, stack.pop())
 123             stack[-1] += '(%s)' % s
 124         else:
 125             stack[-1] += c
 126     plural = expr.sub(repl, stack.pop())
 127
 128     return eval('lambda n: int(%s)' % plural)
 129
 130
 131
 132 def _expand_lang(locale):
 133     from locale import normalize
 134     locale = normalize(locale)
 135     COMPONENT_CODESET   = 1 << 0
 136     COMPONENT_TERRITORY = 1 << 1
 137     COMPONENT_MODIFIER  = 1 << 2
 138     # split up the locale into its base components
 139     mask = 0
 140     pos = locale.find('@')
 141     if pos >= 0:
 142         modifier = locale[pos:]
 143         locale = locale[:pos]
 144         mask |= COMPONENT_MODIFIER
 145     else:
 146         modifier = ''
 147     pos = locale.find('.')
 148     if pos >= 0:
 149         codeset = locale[pos:]
 150         locale = locale[:pos]
 151         mask |= COMPONENT_CODESET
 152     else:
 153         codeset = ''
 154     pos = locale.find('_')
 155     if pos >= 0:
 156         territory = locale[pos:]
 157         locale = locale[:pos]
 158         mask |= COMPONENT_TERRITORY
 159     else:
 160         territory = ''
 161     language = locale
 162     ret = []
 163     for i in range(mask+1):
 164         if not (i & ~mask):  # if all components for this combo exist ...
 165             val = language
 166             if i & COMPONENT_TERRITORY: val += territory
 167             if i & COMPONENT_CODESET:   val += codeset
 168             if i & COMPONENT_MODIFIER:  val += modifier
 169             ret.append(val)
 170     ret.reverse()
 171     return ret
 172
 173
 174
 175 class NullTranslations:
 176     def __init__(self, fp=None):
 177         self._info = {}
 178         self._charset = None
 179         self._output_charset = None
 180         self._fallback = None
 181         if fp is not None:
 182             self._parse(fp)
 183
 184     def _parse(self, fp):
 185         pass
 186
 187     def add_fallback(self, fallback):
 188         if self._fallback:
 189             self._fallback.add_fallback(fallback)
 190         else:
 191             self._fallback = fallback
 192
 193     def gettext(self, message):
 194         if self._fallback:
 195             return self._fallback.gettext(message)
 196         return message
 197
 198     def lgettext(self, message):
 199         if self._fallback:
 200             return self._fallback.lgettext(message)
 201         return message
 202
 203     def ngettext(self, msgid1, msgid2, n):
 204         if self._fallback:
 205             return self._fallback.ngettext(msgid1, msgid2, n)
 206         if n == 1:
 207             return msgid1
 208         else:
 209             return msgid2
 210
 211     def lngettext(self, msgid1, msgid2, n):
 212         if self._fallback:
 213             return self._fallback.lngettext(msgid1, msgid2, n)
 214         if n == 1:
 215             return msgid1
 216         else:
 217             return msgid2
 218
 219     def ugettext(self, message):
 220         if self._fallback:
 221             return self._fallback.ugettext(message)
 222         return unicode(message)
 223
 224     def ungettext(self, msgid1, msgid2, n):
 225         if self._fallback:
 226             return self._fallback.ungettext(msgid1, msgid2, n)
 227         if n == 1:
 228             return unicode(msgid1)
 229         else:
 230             return unicode(msgid2)
 231
 232     def info(self):
 233         return self._info
 234
 235     def charset(self):
 236         return self._charset
 237
 238     def output_charset(self):
 239         return self._output_charset
 240
 241     def set_output_charset(self, charset):
 242         self._output_charset = charset
 243
 244     def install(self, unicode=False, names=None):
 245         import __builtin__
 246         __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
 247         if hasattr(names, "__contains__"):
 248             if "gettext" in names:
 249                 __builtin__.__dict__['gettext'] = __builtin__.__dict__['_']
 250             if "ngettext" in names:
 251                 __builtin__.__dict__['ngettext'] = (unicode and self.ungettext
 252                                                              or self.ngettext)
 253             if "lgettext" in names:
 254                 __builtin__.__dict__['lgettext'] = self.lgettext
 255             if "lngettext" in names:
 256                 __builtin__.__dict__['lngettext'] = self.lngettext
 257
 258
 259 class GNUTranslations(NullTranslations):
 260     # Magic number of .mo files
 261     LE_MAGIC = 0x950412deL
 262     BE_MAGIC = 0xde120495L
 263
 264     def _parse(self, fp):
 265         """Override this method to support alternative .mo formats."""
 266         unpack = struct.unpack
 267         filename = getattr(fp, 'name', '')
 268         # Parse the .mo file header, which consists of 5 little endian 32
 269         # bit words.
 270         self._catalog = catalog = {}
 271         self.plural = lambda n: int(n != 1) # germanic plural by default
 272         buf = fp.read()
 273         buflen = len(buf)
 274         # Are we big endian or little endian?
 275         magic = unpack('<I', buf[:4])[0]
 276         if magic == self.LE_MAGIC:
 277             version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
 278             ii = '<II'
 279         elif magic == self.BE_MAGIC:
 280             version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
 281             ii = '>II'
 282         else:
 283             raise IOError(0, 'Bad magic number', filename)
 284         # Now put all messages from the .mo file buffer into the catalog
 285         # dictionary.
 286         for i in xrange(0, msgcount):
 287             mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
 288             mend = moff + mlen
 289             tlen, toff = unpack(ii, buf[transidx:transidx+8])
 290             tend = toff + tlen
 291             if mend < buflen and tend < buflen:
 292                 msg = buf[moff:mend]
 293                 tmsg = buf[toff:tend]
 294             else:
 295                 raise IOError(0, 'File is corrupt', filename)
 296             # See if we're looking at GNU .mo conventions for metadata
 297             if mlen == 0:
 298                 # Catalog description
 299                 lastk = None
 300                 for item in tmsg.splitlines():
 301                     item = item.strip()
 302                     if not item:
 303                         continue
 304                     k = v = None
 305                     if ':' in item:
 306                         k, v = item.split(':', 1)
 307                         k = k.strip().lower()
 308                         v = v.strip()
 309                         self._info[k] = v
 310                         lastk = k
 311                     elif lastk:
 312                         self._info[lastk] += '\n' + item
 313                     if k == 'content-type':
 314                         self._charset = v.split('charset=')[1]
 315                     elif k == 'plural-forms':
 316                         v = v.split(';')
 317                         plural = v[1].split('plural=')[1]
 318                         self.plural = c2py(plural)
 319             # Note: we unconditionally convert both msgids and msgstrs to
 320             # Unicode using the character encoding specified in the charset
 321             # parameter of the Content-Type header.  The gettext documentation
 322             # strongly encourages msgids to be us-ascii, but some applications
 323             # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
 324             # traditional gettext applications, the msgid conversion will
 325             # cause no problems since us-ascii should always be a subset of
 326             # the charset encoding.  We may want to fall back to 8-bit msgids
 327             # if the Unicode conversion fails.
 328             if '\x00' in msg:
 329                 # Plural forms
 330                 msgid1, msgid2 = msg.split('\x00')
 331                 tmsg = tmsg.split('\x00')
 332                 if self._charset:
 333                     msgid1 = unicode(msgid1, self._charset)
 334                     tmsg = [unicode(x, self._charset) for x in tmsg]
 335                 for i in range(len(tmsg)):
 336                     catalog[(msgid1, i)] = tmsg[i]
 337             else:
 338                 if self._charset:
 339                     msg = unicode(msg, self._charset)
 340                     tmsg = unicode(tmsg, self._charset)
 341                 catalog[msg] = tmsg
 342             # advance to next entry in the seek tables
 343             masteridx += 8
 344             transidx += 8
 345
 346     def gettext(self, message):
 347         missing = object()
 348         tmsg = self._catalog.get(message, missing)
 349         if tmsg is missing:
 350             if self._fallback:
 351                 return self._fallback.gettext(message)
 352             return message
 353         # Encode the Unicode tmsg back to an 8-bit string, if possible
 354         if self._output_charset:
 355             return tmsg.encode(self._output_charset)
 356         elif self._charset:
 357             return tmsg.encode(self._charset)
 358         return tmsg
 359
 360     def lgettext(self, message):
 361         missing = object()
 362         tmsg = self._catalog.get(message, missing)
 363         if tmsg is missing:
 364             if self._fallback:
 365                 return self._fallback.lgettext(message)
 366             return message
 367         if self._output_charset:
 368             return tmsg.encode(self._output_charset)
 369         return tmsg.encode(locale.getpreferredencoding())
 370
 371     def ngettext(self, msgid1, msgid2, n):
 372         try:
 373             tmsg = self._catalog[(msgid1, self.plural(n))]
 374             if self._output_charset:
 375                 return tmsg.encode(self._output_charset)
 376             elif self._charset:
 377                 return tmsg.encode(self._charset)
 378             return tmsg
 379         except KeyError:
 380             if self._fallback:
 381                 return self._fallback.ngettext(msgid1, msgid2, n)
 382             if n == 1:
 383                 return msgid1
 384             else:
 385                 return msgid2
 386
 387     def lngettext(self, msgid1, msgid2, n):
 388         try:
 389             tmsg = self._catalog[(msgid1, self.plural(n))]
 390             if self._output_charset:
 391                 return tmsg.encode(self._output_charset)
 392             return tmsg.encode(locale.getpreferredencoding())
 393         except KeyError:
 394             if self._fallback:
 395                 return self._fallback.lngettext(msgid1, msgid2, n)
 396             if n == 1:
 397                 return msgid1
 398             else:
 399                 return msgid2
 400
 401     def ugettext(self, message):
 402         missing = object()
 403         tmsg = self._catalog.get(message, missing)
 404         if tmsg is missing:
 405             if self._fallback:
 406                 return self._fallback.ugettext(message)
 407             return unicode(message)
 408         return tmsg
 409
 410     def ungettext(self, msgid1, msgid2, n):
 411         try:
 412             tmsg = self._catalog[(msgid1, self.plural(n))]
 413         except KeyError:
 414             if self._fallback:
 415                 return self._fallback.ungettext(msgid1, msgid2, n)
 416             if n == 1:
 417                 tmsg = unicode(msgid1)
 418             else:
 419                 tmsg = unicode(msgid2)
 420         return tmsg
 421
 422
 423 # Locate a .mo file using the gettext strategy
 424 def find(domain, localedir=None, languages=None, all=0):
 425     # Get some reasonable defaults for arguments that were not supplied
 426     if localedir is None:
 427         localedir = _default_localedir
 428     if languages is None:
 429         languages = []
 430         for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
 431             val = os.environ.get(envar)
 432             if val:
 433                 languages = val.split(':')
 434                 break
 435         if 'C' not in languages:
 436             languages.append('C')
 437     # now normalize and expand the languages
 438     nelangs = []
 439     for lang in languages:
 440         for nelang in _expand_lang(lang):
 441             if nelang not in nelangs:
 442                 nelangs.append(nelang)
 443     # select a language
 444     if all:
 445         result = []
 446     else:
 447         result = None
 448     for lang in nelangs:
 449         if lang == 'C':
 450             break
 451         mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
 452         if os.path.exists(mofile):
 453             if all:
 454                 result.append(mofile)
 455             else:
 456                 return mofile
 457     return result
 458
 459
 460
 461 # a mapping between absolute .mo file path and Translation object
 462 _translations = {}
 463
 464 def translation(domain, localedir=None, languages=None,
 465                 class_=None, fallback=False, codeset=None):
 466     if class_ is None:
 467         class_ = GNUTranslations
 468     mofiles = find(domain, localedir, languages, all=1)
 469     if not mofiles:
 470         if fallback:
 471             return NullTranslations()
 472         raise IOError(ENOENT, 'No translation file found for domain', domain)
 473     # Avoid opening, reading, and parsing the .mo file after it's been done
 474     # once.
 475     result = None
 476     for mofile in mofiles:
 477         key = (class_, os.path.abspath(mofile))
 478         t = _translations.get(key)
 479         if t is None:
 480             with open(mofile, 'rb') as fp:
 481                 t = _translations.setdefault(key, class_(fp))
 482         # Copy the translation object to allow setting fallbacks and
 483         # output charset. All other instance data is shared with the
 484         # cached object.
 485         t = copy.copy(t)
 486         if codeset:
 487             t.set_output_charset(codeset)
 488         if result is None:
 489             result = t
 490         else:
 491             result.add_fallback(t)
 492     return result
 493
 494
 495 def install(domain, localedir=None, unicode=False, codeset=None, names=None):
 496     t = translation(domain, localedir, fallback=True, codeset=codeset)
 497     t.install(unicode, names)
 498
 499
 500
 501 # a mapping b/w domains and locale directories
 502 _localedirs = {}
 503 # a mapping b/w domains and codesets
 504 _localecodesets = {}
 505 # current global domain, `messages' used for compatibility w/ GNU gettext
 506 _current_domain = 'messages'
 507
 508
 509 def textdomain(domain=None):
 510     global _current_domain
 511     if domain is not None:
 512         _current_domain = domain
 513     return _current_domain
 514
 515
 516 def bindtextdomain(domain, localedir=None):
 517     global _localedirs
 518     if localedir is not None:
 519         _localedirs[domain] = localedir
 520     return _localedirs.get(domain, _default_localedir)
 521
 522
 523 def bind_textdomain_codeset(domain, codeset=None):
 524     global _localecodesets
 525     if codeset is not None:
 526         _localecodesets[domain] = codeset
 527     return _localecodesets.get(domain)
 528
 529
 530 def dgettext(domain, message):
 531     try:
 532         t = translation(domain, _localedirs.get(domain, None),
 533                         codeset=_localecodesets.get(domain))
 534     except IOError:
 535         return message
 536     return t.gettext(message)
 537
 538 def ldgettext(domain, message):
 539     try:
 540         t = translation(domain, _localedirs.get(domain, None),
 541                         codeset=_localecodesets.get(domain))
 542     except IOError:
 543         return message
 544     return t.lgettext(message)
 545
 546 def dngettext(domain, msgid1, msgid2, n):
 547     try:
 548         t = translation(domain, _localedirs.get(domain, None),
 549                         codeset=_localecodesets.get(domain))
 550     except IOError:
 551         if n == 1:
 552             return msgid1
 553         else:
 554             return msgid2
 555     return t.ngettext(msgid1, msgid2, n)
 556
 557 def ldngettext(domain, msgid1, msgid2, n):
 558     try:
 559         t = translation(domain, _localedirs.get(domain, None),
 560                         codeset=_localecodesets.get(domain))
 561     except IOError:
 562         if n == 1:
 563             return msgid1
 564         else:
 565             return msgid2
 566     return t.lngettext(msgid1, msgid2, n)
 567
 568 def gettext(message):
 569     return dgettext(_current_domain, message)
 570
 571 def lgettext(message):
 572     return ldgettext(_current_domain, message)
 573
 574 def ngettext(msgid1, msgid2, n):
 575     return dngettext(_current_domain, msgid1, msgid2, n)
 576
 577 def lngettext(msgid1, msgid2, n):
 578     return ldngettext(_current_domain, msgid1, msgid2, n)
 579
 580 # dcgettext() has been deemed unnecessary and is not implemented.
 581
 582 # James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
 583 # was:
 584 #
 585 #    import gettext
 586 #    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
 587 #    _ = cat.gettext
 588 #    print _('Hello World')
 589
 590 # The resulting catalog object currently don't support access through a
 591 # dictionary API, which was supported (but apparently unused) in GNOME
 592 # gettext.
 593
 594 Catalog = translation