| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | """ Standard "encodings" Package
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Standard Python encoding modules are stored in this package | 
					
						
							|  |  |  |     directory. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     Codec modules must have names corresponding to normalized encoding | 
					
						
							|  |  |  |     names as defined in the normalize_encoding() function below, e.g. | 
					
						
							|  |  |  |     'utf-8' must be implemented by the module 'utf_8.py'. | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Each codec module must export the following interface: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     * getregentry() -> (encoder, decoder, stream_reader, stream_writer) | 
					
						
							|  |  |  |     The getregentry() API must return callable objects which adhere to | 
					
						
							|  |  |  |     the Python Codec Interface Standard. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     In addition, a module may optionally also define the following | 
					
						
							|  |  |  |     APIs which are then used by the package's codec search function: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     * getaliases() -> sequence of encoding name strings to use as aliases | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     Alias names returned by getaliases() must be normalized encoding | 
					
						
							|  |  |  |     names as defined by normalize_encoding(). | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """#"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | import codecs, exceptions, re | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | _cache = {} | 
					
						
							| 
									
										
										
										
											2000-03-20 16:36:48 +00:00
										 |  |  | _unknown = '--unknown--' | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  | _import_tail = ['*'] | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | _norm_encoding_RE = re.compile('[^a-zA-Z0-9.]') | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  | class CodecRegistryError(exceptions.LookupError, | 
					
						
							|  |  |  |                          exceptions.SystemError): | 
					
						
							|  |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  | def normalize_encoding(encoding): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """ Normalize an encoding name.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Normalization works as follows: all non-alphanumeric | 
					
						
							|  |  |  |         characters except the dot used for Python package names are | 
					
						
							|  |  |  |         collapsed and replaced with a single underscore, e.g. '  -;#' | 
					
						
							|  |  |  |         becomes '_'. | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     return '_'.join(_norm_encoding_RE.split(encoding)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | def search_function(encoding): | 
					
						
							| 
									
										
										
										
											2002-08-08 20:19:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     # Cache lookup | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     entry = _cache.get(encoding, _unknown) | 
					
						
							| 
									
										
										
										
											2000-03-20 16:36:48 +00:00
										 |  |  |     if entry is not _unknown: | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         return entry | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     # Import the module: | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # First look in the encodings package, then try to lookup the | 
					
						
							|  |  |  |     # encoding in the aliases mapping and retry the import using the | 
					
						
							|  |  |  |     # default import module lookup scheme with the alias name. | 
					
						
							|  |  |  |     # | 
					
						
							| 
									
										
										
										
											2002-10-04 11:45:38 +00:00
										 |  |  |     modname = normalize_encoding(encoding) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |         mod = __import__('encodings.' + modname, | 
					
						
							|  |  |  |                          globals(), locals(), _import_tail) | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  |     except ImportError: | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |         import aliases | 
					
						
							| 
									
										
										
										
											2002-10-04 20:49:05 +00:00
										 |  |  |         modname = (aliases.aliases.get(modname) or | 
					
						
							|  |  |  |                    aliases.aliases.get(modname.replace('.', '_')) or | 
					
						
							|  |  |  |                    modname) | 
					
						
							| 
									
										
										
										
											2002-02-11 17:43:46 +00:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  |             mod = __import__(modname, globals(), locals(), _import_tail) | 
					
						
							|  |  |  |         except ImportError: | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |             mod = None | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         getregentry = mod.getregentry | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         # Not a codec module | 
					
						
							|  |  |  |         mod = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |     if mod is None: | 
					
						
							| 
									
										
										
										
											2002-02-11 17:43:46 +00:00
										 |  |  |         # Cache misses | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         _cache[encoding] = None | 
					
						
							| 
									
										
										
										
											2002-08-08 20:19:19 +00:00
										 |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     # Now ask the module for the registry entry | 
					
						
							| 
									
										
										
										
											2002-07-29 14:05:24 +00:00
										 |  |  |     entry = tuple(getregentry()) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     if len(entry) != 4: | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |         raise CodecRegistryError,\ | 
					
						
							|  |  |  |               'module "%s" (%s) failed to register' % \ | 
					
						
							|  |  |  |               (mod.__name__, mod.__file__) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     for obj in entry: | 
					
						
							|  |  |  |         if not callable(obj): | 
					
						
							| 
									
										
										
										
											2001-09-19 11:52:07 +00:00
										 |  |  |             raise CodecRegistryError,\ | 
					
						
							|  |  |  |                   'incompatible codecs in module "%s" (%s)' % \ | 
					
						
							|  |  |  |                   (mod.__name__, mod.__file__) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  |     # Cache the codec registry entry | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     _cache[encoding] = entry | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Register its aliases (without overwriting previously registered | 
					
						
							|  |  |  |     # aliases) | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         codecaliases = mod.getaliases() | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     else: | 
					
						
							| 
									
										
										
										
											2002-02-10 21:36:20 +00:00
										 |  |  |         import aliases | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |         for alias in codecaliases: | 
					
						
							| 
									
										
										
										
											2000-12-12 14:45:35 +00:00
										 |  |  |             if not aliases.aliases.has_key(alias): | 
					
						
							|  |  |  |                 aliases.aliases[alias] = modname | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Return the registry entry | 
					
						
							| 
									
										
										
										
											2000-03-10 23:17:24 +00:00
										 |  |  |     return entry | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Register the search_function in the Python codec registry | 
					
						
							|  |  |  | codecs.register(search_function) |