| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | """ Standard "encodings" Package
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Standard Python encoding modules are stored in this package | 
					
						
							|  |  |  |     directory. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     Codec modules must have names corresponding to normalized encoding | 
					
						
							|  |  |  |     names as defined in the normalize_encoding() function below, e.g. | 
					
						
							|  |  |  |     'utf-8' must be implemented by the module 'utf_8.py'. | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Each codec module must export the following interface: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 |  |  |     * getregentry() -> codecs.CodecInfo object | 
					
						
							|  |  |  |     The getregentry() API must a CodecInfo object with encoder, decoder, | 
					
						
							|  |  |  |     incrementalencoder, incrementaldecoder, streamwriter and streamreader | 
					
						
							|  |  |  |     atttributes which adhere to the Python Codec Interface Standard. | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     In addition, a module may optionally also define the following | 
					
						
							|  |  |  |     APIs which are then used by the package's codec search function: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     * getaliases() -> sequence of encoding name strings to use as aliases | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     Alias names returned by getaliases() must be normalized encoding | 
					
						
							|  |  |  |     names as defined by normalize_encoding(). | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """#"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-08-25 01:52:49 +00:00
										 |  |  | import codecs | 
					
						
							| 
									
										
										
										
											2006-03-15 23:08:13 +00:00
										 |  |  | from encodings import aliases | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | _cache = {} | 
					
						
							| 
									
										
										
										
											2000-03-20 16:36:48 +00:00
										 |  |  | _unknown = '--unknown--' | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  | _import_tail = ['*'] | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  | _norm_encoding_map = ('                                              . ' | 
					
						
							|  |  |  |                       '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     ' | 
					
						
							|  |  |  |                       ' abcdefghijklmnopqrstuvwxyz                     ' | 
					
						
							|  |  |  |                       '                                                ' | 
					
						
							|  |  |  |                       '                                                ' | 
					
						
							|  |  |  |                       '                ') | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  | _aliases = aliases.aliases | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-09-01 00:45:28 +00:00
										 |  |  | class CodecRegistryError(LookupError, SystemError): | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | def normalize_encoding(encoding): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """ Normalize an encoding name.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Normalization works as follows: all non-alphanumeric | 
					
						
							|  |  |  |         characters except the dot used for Python package names are | 
					
						
							|  |  |  |         collapsed and replaced with a single underscore, e.g. '  -;#' | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  |         becomes '_'. Leading and trailing underscores are removed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Note that encoding names should be ASCII only; if they do use | 
					
						
							|  |  |  |         non-ASCII characters, these must be Latin-1 compatible. | 
					
						
							| 
									
										
										
										
											2002-12-24 18:31:27 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  |     # Make sure we have an 8-bit string, because .translate() works | 
					
						
							|  |  |  |     # differently for Unicode strings. | 
					
						
							| 
									
										
										
										
											2006-08-25 01:52:49 +00:00
										 |  |  |     if isinstance(encoding, unicode): | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  |         # Note that .encode('latin-1') does *not* use the codec | 
					
						
							|  |  |  |         # registry, so this call doesn't recurse. (See unicodeobject.c | 
					
						
							|  |  |  |         # PyUnicode_AsEncodedString() for details) | 
					
						
							|  |  |  |         encoding = encoding.encode('latin-1') | 
					
						
							|  |  |  |     return '_'.join(encoding.translate(_norm_encoding_map).split()) | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | def search_function(encoding): | 
					
						
							| 
									
										
										
										
											2002-08-08 20:19:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     # Cache lookup | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     entry = _cache.get(encoding, _unknown) | 
					
						
							| 
									
										
										
										
											2000-03-20 16:36:48 +00:00
										 |  |  |     if entry is not _unknown: | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         return entry | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     # Import the module: | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |     # First try to find an alias for the normalized encoding | 
					
						
							|  |  |  |     # name and lookup the module using the aliased name, then try to | 
					
						
							|  |  |  |     # lookup the module using the standard import scheme, i.e. first | 
					
						
							|  |  |  |     # try in the encodings package, then at top-level. | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     # | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |     norm_encoding = normalize_encoding(encoding) | 
					
						
							|  |  |  |     aliased_encoding = _aliases.get(norm_encoding) or \ | 
					
						
							|  |  |  |                        _aliases.get(norm_encoding.replace('.', '_')) | 
					
						
							|  |  |  |     if aliased_encoding is not None: | 
					
						
							|  |  |  |         modnames = [aliased_encoding, | 
					
						
							|  |  |  |                     norm_encoding] | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         modnames = [norm_encoding] | 
					
						
							|  |  |  |     for modname in modnames: | 
					
						
							| 
									
										
										
										
											2006-09-30 11:22:28 +00:00
										 |  |  |         if not modname or '.' in modname: | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2002-02-11 17:43:46 +00:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2007-02-15 22:54:39 +00:00
										 |  |  |             # Import equivalent to `` from .modname import *``. | 
					
						
							|  |  |  |             # '*' is used so that __import__ returns the desired module and not | 
					
						
							|  |  |  |             # 'encodings' itself. | 
					
						
							|  |  |  |             mod = __import__(modname, globals(), locals(), ['*'], 1) | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  |         except ImportError: | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |             pass | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         mod = None | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         getregentry = mod.getregentry | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         # Not a codec module | 
					
						
							|  |  |  |         mod = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     if mod is None: | 
					
						
							| 
									
										
										
										
											2002-02-11 17:43:46 +00:00
										 |  |  |         # Cache misses | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         _cache[encoding] = None | 
					
						
							| 
									
										
										
										
											2002-08-08 20:19:19 +00:00
										 |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     # Now ask the module for the registry entry | 
					
						
							| 
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 |  |  |     entry = getregentry() | 
					
						
							|  |  |  |     if not isinstance(entry, codecs.CodecInfo): | 
					
						
							|  |  |  |         if not 4 <= len(entry) <= 7: | 
					
						
							| 
									
										
										
										
											2006-03-15 18:08:37 +00:00
										 |  |  |             raise CodecRegistryError,\ | 
					
						
							|  |  |  |                  'module "%s" (%s) failed to register' % \ | 
					
						
							|  |  |  |                   (mod.__name__, mod.__file__) | 
					
						
							| 
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 |  |  |         if not callable(entry[0]) or \ | 
					
						
							|  |  |  |            not callable(entry[1]) or \ | 
					
						
							|  |  |  |            (entry[2] is not None and not callable(entry[2])) or \ | 
					
						
							|  |  |  |            (entry[3] is not None and not callable(entry[3])) or \ | 
					
						
							|  |  |  |            (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \ | 
					
						
							|  |  |  |            (len(entry) > 5 and entry[5] is not None and not callable(entry[5])): | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |             raise CodecRegistryError,\ | 
					
						
							| 
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 |  |  |                 'incompatible codecs in module "%s" (%s)' % \ | 
					
						
							|  |  |  |                 (mod.__name__, mod.__file__) | 
					
						
							|  |  |  |         if len(entry)<7 or entry[6] is None: | 
					
						
							|  |  |  |             entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) | 
					
						
							|  |  |  |         entry = codecs.CodecInfo(*entry) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  |     # Cache the codec registry entry | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     _cache[encoding] = entry | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Register its aliases (without overwriting previously registered | 
					
						
							|  |  |  |     # aliases) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         codecaliases = mod.getaliases() | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         for alias in codecaliases: | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |             if not _aliases.has_key(alias): | 
					
						
							|  |  |  |                 _aliases[alias] = modname | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Return the registry entry | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     return entry | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Register the search_function in the Python codec registry | 
					
						
							|  |  |  | codecs.register(search_function) |