mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			301 lines
		
	
	
	
		
			9.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			301 lines
		
	
	
	
		
			9.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Internationalization and localization support.
 | |
| 
 | |
| This module provides internationalization (I18N) and localization (L10N)
 | |
| support for your Python programs by providing an interface to the GNU gettext
 | |
| message catalog library.
 | |
| 
 | |
| I18N refers to the operation by which a program is made aware of multiple
 | |
| languages.  L10N refers to the adaptation of your program, once
 | |
| internationalized, to the local language and cultural habits.
 | |
| 
 | |
| """
 | |
| 
 | |
| # This module represents the integration of work, contributions, feedback, and
 | |
| # suggestions from the following people:
 | |
| #
 | |
| # Martin von Loewis, who wrote the initial implementation of the underlying
 | |
| # C-based libintlmodule (later renamed _gettext), along with a skeletal
 | |
| # gettext.py implementation.
 | |
| #
 | |
| # Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
 | |
| # which also included a pure-Python implementation to read .mo files if
 | |
| # intlmodule wasn't available.
 | |
| #
 | |
| # James Henstridge, who also wrote a gettext.py module, which has some
 | |
| # interesting, but currently unsupported experimental features: the notion of
 | |
| # a Catalog class and instances, and the ability to add to a catalog file via
 | |
| # a Python API.
 | |
| #
 | |
| # Barry Warsaw integrated these modules, wrote the .install() API and code,
 | |
| # and conformed all C and Python code to Python's coding standards.
 | |
| #
 | |
| # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
 | |
| # module.
 | |
| #
 | |
| # TODO:
 | |
| # - Lazy loading of .mo files.  Currently the entire catalog is loaded into
 | |
| #   memory, but that's probably bad for large translated programs.  Instead,
 | |
| #   the lexical sort of original strings in GNU .mo files should be exploited
 | |
| #   to do binary searches and lazy initializations.  Or you might want to use
 | |
| #   the undocumented double-hash algorithm for .mo files with hash tables, but
 | |
| #   you'll need to study the GNU gettext code to do this.
 | |
| #
 | |
| # - Support Solaris .mo file formats.  Unfortunately, we've been unable to
 | |
| #   find this format documented anywhere.
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| import struct
 | |
| from errno import ENOENT
 | |
| 
 | |
| __all__ = ["bindtextdomain","textdomain","gettext","dgettext",
 | |
|            "find","translation","install","Catalog"]
 | |
| 
 | |
| _default_localedir = os.path.join(sys.prefix, 'share', 'locale')
 | |
| 
 | |
| 
 | |
| 
 | |
| def _expand_lang(locale):
 | |
|     from locale import normalize
 | |
|     locale = normalize(locale)
 | |
|     COMPONENT_CODESET   = 1 << 0
 | |
|     COMPONENT_TERRITORY = 1 << 1
 | |
|     COMPONENT_MODIFIER  = 1 << 2
 | |
|     # split up the locale into its base components
 | |
|     mask = 0
 | |
|     pos = locale.find('@')
 | |
|     if pos >= 0:
 | |
|         modifier = locale[pos:]
 | |
|         locale = locale[:pos]
 | |
|         mask |= COMPONENT_MODIFIER
 | |
|     else:
 | |
|         modifier = ''
 | |
|     pos = locale.find('.')
 | |
|     if pos >= 0:
 | |
|         codeset = locale[pos:]
 | |
|         locale = locale[:pos]
 | |
|         mask |= COMPONENT_CODESET
 | |
|     else:
 | |
|         codeset = ''
 | |
|     pos = locale.find('_')
 | |
|     if pos >= 0:
 | |
|         territory = locale[pos:]
 | |
|         locale = locale[:pos]
 | |
|         mask |= COMPONENT_TERRITORY
 | |
|     else:
 | |
|         territory = ''
 | |
|     language = locale
 | |
|     ret = []
 | |
|     for i in range(mask+1):
 | |
|         if not (i & ~mask):  # if all components for this combo exist ...
 | |
|             val = language
 | |
|             if i & COMPONENT_TERRITORY: val += territory
 | |
|             if i & COMPONENT_CODESET:   val += codeset
 | |
|             if i & COMPONENT_MODIFIER:  val += modifier
 | |
|             ret.append(val)
 | |
|     ret.reverse()
 | |
|     return ret
 | |
| 
 | |
| 
 | |
| 
 | |
| class NullTranslations:
 | |
|     def __init__(self, fp=None):
 | |
|         self._info = {}
 | |
|         self._charset = None
 | |
|         if fp:
 | |
|             self._parse(fp)
 | |
| 
 | |
|     def _parse(self, fp):
 | |
|         pass
 | |
| 
 | |
|     def gettext(self, message):
 | |
|         return message
 | |
| 
 | |
|     def ugettext(self, message):
 | |
|         return unicode(message)
 | |
| 
 | |
|     def info(self):
 | |
|         return self._info
 | |
| 
 | |
|     def charset(self):
 | |
|         return self._charset
 | |
| 
 | |
|     def install(self, unicode=0):
 | |
|         import __builtin__
 | |
|         __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
 | |
| 
 | |
| 
 | |
| class GNUTranslations(NullTranslations):
 | |
|     # Magic number of .mo files
 | |
|     LE_MAGIC = 0x950412de
 | |
|     BE_MAGIC = 0xde120495
 | |
| 
 | |
|     def _parse(self, fp):
 | |
|         """Override this method to support alternative .mo formats."""
 | |
|         # We need to & all 32 bit unsigned integers with 0xffffffff for
 | |
|         # portability to 64 bit machines.
 | |
|         MASK = 0xffffffff
 | |
|         unpack = struct.unpack
 | |
|         filename = getattr(fp, 'name', '')
 | |
|         # Parse the .mo file header, which consists of 5 little endian 32
 | |
|         # bit words.
 | |
|         self._catalog = catalog = {}
 | |
|         buf = fp.read()
 | |
|         buflen = len(buf)
 | |
|         # Are we big endian or little endian?
 | |
|         magic = unpack('<i', buf[:4])[0] & MASK
 | |
|         if magic == self.LE_MAGIC:
 | |
|             version, msgcount, masteridx, transidx = unpack('<4i', buf[4:20])
 | |
|             ii = '<ii'
 | |
|         elif magic == self.BE_MAGIC:
 | |
|             version, msgcount, masteridx, transidx = unpack('>4i', buf[4:20])
 | |
|             ii = '>ii'
 | |
|         else:
 | |
|             raise IOError(0, 'Bad magic number', filename)
 | |
|         # more unsigned ints
 | |
|         msgcount &= MASK
 | |
|         masteridx &= MASK
 | |
|         transidx &= MASK
 | |
|         # Now put all messages from the .mo file buffer into the catalog
 | |
|         # dictionary.
 | |
|         for i in xrange(0, msgcount):
 | |
|             mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
 | |
|             moff &= MASK
 | |
|             mend = moff + (mlen & MASK)
 | |
|             tlen, toff = unpack(ii, buf[transidx:transidx+8])
 | |
|             toff &= MASK
 | |
|             tend = toff + (tlen & MASK)
 | |
|             if mend < buflen and tend < buflen:
 | |
|                 tmsg = buf[toff:tend]
 | |
|                 catalog[buf[moff:mend]] = tmsg
 | |
|             else:
 | |
|                 raise IOError(0, 'File is corrupt', filename)
 | |
|             # See if we're looking at GNU .mo conventions for metadata
 | |
|             if mlen == 0 and tmsg.lower().startswith('project-id-version:'):
 | |
|                 # Catalog description
 | |
|                 for item in tmsg.split('\n'):
 | |
|                     item = item.strip()
 | |
|                     if not item:
 | |
|                         continue
 | |
|                     k, v = item.split(':', 1)
 | |
|                     k = k.strip().lower()
 | |
|                     v = v.strip()
 | |
|                     self._info[k] = v
 | |
|                     if k == 'content-type':
 | |
|                         self._charset = v.split('charset=')[1]
 | |
|             # advance to next entry in the seek tables
 | |
|             masteridx += 8
 | |
|             transidx += 8
 | |
| 
 | |
|     def gettext(self, message):
 | |
|         return self._catalog.get(message, message)
 | |
| 
 | |
|     def ugettext(self, message):
 | |
|         tmsg = self._catalog.get(message, message)
 | |
|         return unicode(tmsg, self._charset)
 | |
| 
 | |
| 
 | |
| 
 | |
| # Locate a .mo file using the gettext strategy
 | |
| def find(domain, localedir=None, languages=None):
 | |
|     # Get some reasonable defaults for arguments that were not supplied
 | |
|     if localedir is None:
 | |
|         localedir = _default_localedir
 | |
|     if languages is None:
 | |
|         languages = []
 | |
|         for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
 | |
|             val = os.environ.get(envar)
 | |
|             if val:
 | |
|                 languages = val.split(':')
 | |
|                 break
 | |
|         if 'C' not in languages:
 | |
|             languages.append('C')
 | |
|     # now normalize and expand the languages
 | |
|     nelangs = []
 | |
|     for lang in languages:
 | |
|         for nelang in _expand_lang(lang):
 | |
|             if nelang not in nelangs:
 | |
|                 nelangs.append(nelang)
 | |
|     # select a language
 | |
|     for lang in nelangs:
 | |
|         if lang == 'C':
 | |
|             break
 | |
|         mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
 | |
|         if os.path.exists(mofile):
 | |
|             return mofile
 | |
|     return None
 | |
| 
 | |
| 
 | |
| 
 | |
| # a mapping between absolute .mo file path and Translation object
 | |
| _translations = {}
 | |
| 
 | |
| def translation(domain, localedir=None, languages=None, class_=None):
 | |
|     if class_ is None:
 | |
|         class_ = GNUTranslations
 | |
|     mofile = find(domain, localedir, languages)
 | |
|     if mofile is None:
 | |
|         raise IOError(ENOENT, 'No translation file found for domain', domain)
 | |
|     key = os.path.abspath(mofile)
 | |
|     # TBD: do we need to worry about the file pointer getting collected?
 | |
|     # Avoid opening, reading, and parsing the .mo file after it's been done
 | |
|     # once.
 | |
|     t = _translations.get(key)
 | |
|     if t is None:
 | |
|         t = _translations.setdefault(key, class_(open(mofile, 'rb')))
 | |
|     return t
 | |
| 
 | |
| 
 | |
| 
 | |
| def install(domain, localedir=None, unicode=0):
 | |
|     translation(domain, localedir).install(unicode)
 | |
| 
 | |
| 
 | |
| 
 | |
| # a mapping b/w domains and locale directories
 | |
| _localedirs = {}
 | |
| # current global domain, `messages' used for compatibility w/ GNU gettext
 | |
| _current_domain = 'messages'
 | |
| 
 | |
| 
 | |
| def textdomain(domain=None):
 | |
|     global _current_domain
 | |
|     if domain is not None:
 | |
|         _current_domain = domain
 | |
|     return _current_domain
 | |
| 
 | |
| 
 | |
| def bindtextdomain(domain, localedir=None):
 | |
|     global _localedirs
 | |
|     if localedir is not None:
 | |
|         _localedirs[domain] = localedir
 | |
|     return _localedirs.get(domain, _default_localedir)
 | |
| 
 | |
| 
 | |
| def dgettext(domain, message):
 | |
|     try:
 | |
|         t = translation(domain, _localedirs.get(domain, None))
 | |
|     except IOError:
 | |
|         return message
 | |
|     return t.gettext(message)
 | |
| 
 | |
| 
 | |
| def gettext(message):
 | |
|     return dgettext(_current_domain, message)
 | |
| 
 | |
| 
 | |
| # dcgettext() has been deemed unnecessary and is not implemented.
 | |
| 
 | |
| # James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
 | |
| # was:
 | |
| #
 | |
| #    import gettext
 | |
| #    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
 | |
| #    _ = cat.gettext
 | |
| #    print _('Hello World')
 | |
| 
 | |
| # The resulting catalog object currently don't support access through a
 | |
| # dictionary API, which was supported (but apparently unused) in GNOME
 | |
| # gettext.
 | |
| 
 | |
| Catalog = translation
 | 
