| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | """ Standard "encodings" Package
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Standard Python encoding modules are stored in this package | 
					
						
							|  |  |  |     directory. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     Codec modules must have names corresponding to normalized encoding | 
					
						
							|  |  |  |     names as defined in the normalize_encoding() function below, e.g. | 
					
						
							|  |  |  |     'utf-8' must be implemented by the module 'utf_8.py'. | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Each codec module must export the following interface: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     * getregentry() -> (encoder, decoder, stream_reader, stream_writer) | 
					
						
							|  |  |  |     The getregentry() API must return callable objects which adhere to | 
					
						
							|  |  |  |     the Python Codec Interface Standard. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     In addition, a module may optionally also define the following | 
					
						
							|  |  |  |     APIs which are then used by the package's codec search function: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     * getaliases() -> sequence of encoding name strings to use as aliases | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     Alias names returned by getaliases() must be normalized encoding | 
					
						
							|  |  |  |     names as defined by normalize_encoding(). | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """#"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-15 04:33:54 +00:00
										 |  |  | import codecs, types | 
					
						
							|  |  |  | from . import aliases | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | _cache = {} | 
					
						
							| 
									
										
										
										
											2000-03-20 16:36:48 +00:00
										 |  |  | _unknown = '--unknown--' | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  | _import_tail = ['*'] | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  | _norm_encoding_map = ('                                              . ' | 
					
						
							|  |  |  |                       '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     ' | 
					
						
							|  |  |  |                       ' abcdefghijklmnopqrstuvwxyz                     ' | 
					
						
							|  |  |  |                       '                                                ' | 
					
						
							|  |  |  |                       '                                                ' | 
					
						
							|  |  |  |                       '                ') | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  | _aliases = aliases.aliases | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-09-01 00:45:28 +00:00
										 |  |  | class CodecRegistryError(LookupError, SystemError): | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | def normalize_encoding(encoding): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """ Normalize an encoding name.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Normalization works as follows: all non-alphanumeric | 
					
						
							|  |  |  |         characters except the dot used for Python package names are | 
					
						
							|  |  |  |         collapsed and replaced with a single underscore, e.g. '  -;#' | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  |         becomes '_'. Leading and trailing underscores are removed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Note that encoding names should be ASCII only; if they do use | 
					
						
							|  |  |  |         non-ASCII characters, these must be Latin-1 compatible. | 
					
						
							| 
									
										
										
										
											2002-12-24 18:31:27 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2003-05-16 17:07:51 +00:00
										 |  |  |     # Make sure we have an 8-bit string, because .translate() works | 
					
						
							|  |  |  |     # differently for Unicode strings. | 
					
						
							|  |  |  |     if type(encoding) is types.UnicodeType: | 
					
						
							|  |  |  |         # Note that .encode('latin-1') does *not* use the codec | 
					
						
							|  |  |  |         # registry, so this call doesn't recurse. (See unicodeobject.c | 
					
						
							|  |  |  |         # PyUnicode_AsEncodedString() for details) | 
					
						
							|  |  |  |         encoding = encoding.encode('latin-1') | 
					
						
							|  |  |  |     return '_'.join(encoding.translate(_norm_encoding_map).split()) | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | def search_function(encoding): | 
					
						
							| 
									
										
										
										
											2002-08-08 20:19:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     # Cache lookup | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     entry = _cache.get(encoding, _unknown) | 
					
						
							| 
									
										
										
										
											2000-03-20 16:36:48 +00:00
										 |  |  |     if entry is not _unknown: | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         return entry | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     # Import the module: | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |     # First try to find an alias for the normalized encoding | 
					
						
							|  |  |  |     # name and lookup the module using the aliased name, then try to | 
					
						
							|  |  |  |     # lookup the module using the standard import scheme, i.e. first | 
					
						
							|  |  |  |     # try in the encodings package, then at top-level. | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     # | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |     norm_encoding = normalize_encoding(encoding) | 
					
						
							|  |  |  |     aliased_encoding = _aliases.get(norm_encoding) or \ | 
					
						
							|  |  |  |                        _aliases.get(norm_encoding.replace('.', '_')) | 
					
						
							|  |  |  |     if aliased_encoding is not None: | 
					
						
							|  |  |  |         modnames = [aliased_encoding, | 
					
						
							|  |  |  |                     norm_encoding] | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         modnames = [norm_encoding] | 
					
						
							|  |  |  |     for modname in modnames: | 
					
						
							|  |  |  |         if not modname: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2002-02-11 17:43:46 +00:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2006-02-19 15:22:22 +00:00
										 |  |  |             mod = __import__('encodings.' + modname, | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |                              globals(), locals(), _import_tail) | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  |         except ImportError: | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |             pass | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         mod = None | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         getregentry = mod.getregentry | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         # Not a codec module | 
					
						
							|  |  |  |         mod = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     if mod is None: | 
					
						
							| 
									
										
										
										
											2002-02-11 17:43:46 +00:00
										 |  |  |         # Cache misses | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         _cache[encoding] = None | 
					
						
							| 
									
										
										
										
											2002-08-08 20:19:19 +00:00
										 |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     # Now ask the module for the registry entry | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  |     entry = tuple(getregentry()) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     if len(entry) != 4: | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |         raise CodecRegistryError,\ | 
					
						
							|  |  |  |               'module "%s" (%s) failed to register' % \ | 
					
						
							|  |  |  |               (mod.__name__, mod.__file__) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     for obj in entry: | 
					
						
							|  |  |  |         if not callable(obj): | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |             raise CodecRegistryError,\ | 
					
						
							|  |  |  |                   'incompatible codecs in module "%s" (%s)' % \ | 
					
						
							|  |  |  |                   (mod.__name__, mod.__file__) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  |     # Cache the codec registry entry | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     _cache[encoding] = entry | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Register its aliases (without overwriting previously registered | 
					
						
							|  |  |  |     # aliases) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         codecaliases = mod.getaliases() | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         for alias in codecaliases: | 
					
						
							| 
									
										
										
										
											2004-01-20 09:40:14 +00:00
										 |  |  |             if not _aliases.has_key(alias): | 
					
						
							|  |  |  |                 _aliases[alias] = modname | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Return the registry entry | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     return entry | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Register the search_function in the Python codec registry | 
					
						
							|  |  |  | codecs.register(search_function) |