| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | """Guess the MIME type of a file.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-05-18 16:27:20 +00:00
										 |  |  | This module defines two useful functions: | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | guess_type(url) -- guess the MIME type and encoding of a URL. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-05-18 16:27:20 +00:00
										 |  |  | guess_extension(type) -- guess the extension for a given MIME type. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | It also contains the following, for tuning the behavior: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Data: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | knownfiles -- list of files to parse | 
					
						
							|  |  |  | inited -- flag set when init() has been called | 
					
						
							|  |  |  | suffixes_map -- dictionary mapping suffixes to suffixes | 
					
						
							|  |  |  | encodings_map -- dictionary mapping suffixes to encodings | 
					
						
							|  |  |  | types_map -- dictionary mapping suffixes to types | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Functions: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | init([files]) -- parse a list of files, default knownfiles | 
					
						
							|  |  |  | read_mime_types(file) -- parse one file, return a dictionary or None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import string | 
					
						
							|  |  |  | import posixpath | 
					
						
							| 
									
										
										
										
											1998-10-12 15:12:28 +00:00
										 |  |  | import urllib | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | knownfiles = [ | 
					
						
							|  |  |  |     "/usr/local/etc/httpd/conf/mime.types", | 
					
						
							|  |  |  |     "/usr/local/lib/netscape/mime.types", | 
					
						
							| 
									
										
										
										
											2000-02-10 17:17:14 +00:00
										 |  |  |     "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2 | 
					
						
							|  |  |  |     "/usr/local/etc/mime.types",                # Apache 1.3 | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | inited = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def guess_type(url): | 
					
						
							|  |  |  |     """Guess the type of a file based on its URL.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Return value is a tuple (type, encoding) where type is None if the | 
					
						
							|  |  |  |     type can't be guessed (no or unknown suffix) or a string of the | 
					
						
							|  |  |  |     form type/subtype, usable for a MIME Content-type header; and | 
					
						
							|  |  |  |     encoding is None for no encoding or the name of the program used | 
					
						
							|  |  |  |     to encode (e.g. compress or gzip).  The mappings are table | 
					
						
							|  |  |  |     driven.  Encoding suffixes are case sensitive; type suffixes are | 
					
						
							|  |  |  |     first tried case sensitive, then case insensitive. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped | 
					
						
							|  |  |  |     to ".tar.gz".  (This is table-driven too, using the dictionary | 
					
						
							| 
									
										
										
										
											1998-05-18 16:05:24 +00:00
										 |  |  |     suffix_map). | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not inited: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         init() | 
					
						
							| 
									
										
										
										
											1998-10-12 15:12:28 +00:00
										 |  |  |     scheme, url = urllib.splittype(url) | 
					
						
							|  |  |  |     if scheme == 'data': | 
					
						
							| 
									
										
										
										
											2000-02-10 17:17:14 +00:00
										 |  |  |         # syntax of data URLs: | 
					
						
							|  |  |  |         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data | 
					
						
							|  |  |  |         # mediatype := [ type "/" subtype ] *( ";" parameter ) | 
					
						
							|  |  |  |         # data      := *urlchar | 
					
						
							|  |  |  |         # parameter := attribute "=" value | 
					
						
							|  |  |  |         # type/subtype defaults to "text/plain" | 
					
						
							|  |  |  |         comma = string.find(url, ',') | 
					
						
							|  |  |  |         if comma < 0: | 
					
						
							|  |  |  |             # bad data URL | 
					
						
							|  |  |  |             return None, None | 
					
						
							|  |  |  |         semi = string.find(url, ';', 0, comma) | 
					
						
							|  |  |  |         if semi >= 0: | 
					
						
							|  |  |  |             type = url[:semi] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             type = url[:comma] | 
					
						
							|  |  |  |         if '=' in type or '/' not in type: | 
					
						
							|  |  |  |             type = 'text/plain' | 
					
						
							|  |  |  |         return type, None               # never compressed, so encoding is None | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     base, ext = posixpath.splitext(url) | 
					
						
							|  |  |  |     while suffix_map.has_key(ext): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         base, ext = posixpath.splitext(base + suffix_map[ext]) | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     if encodings_map.has_key(ext): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         encoding = encodings_map[ext] | 
					
						
							|  |  |  |         base, ext = posixpath.splitext(base) | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         encoding = None | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     if types_map.has_key(ext): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         return types_map[ext], encoding | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     elif types_map.has_key(string.lower(ext)): | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         return types_map[string.lower(ext)], encoding | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         return None, encoding | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-05-18 16:27:20 +00:00
										 |  |  | def guess_extension(type): | 
					
						
							|  |  |  |     """Guess the extension for a file based on its MIME type.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Return value is a string giving a filename extension, including the | 
					
						
							|  |  |  |     leading dot ('.').  The extension is not guaranteed to have been | 
					
						
							| 
									
										
										
										
											1998-05-19 15:15:59 +00:00
										 |  |  |     associated with any particular data stream, but would be mapped to the | 
					
						
							|  |  |  |     MIME type `type' by guess_type().  If no extension can be guessed for | 
					
						
							|  |  |  |     `type', None is returned. | 
					
						
							| 
									
										
										
										
											1998-05-18 16:27:20 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											1998-05-19 15:15:59 +00:00
										 |  |  |     global inited | 
					
						
							|  |  |  |     if not inited: | 
					
						
							|  |  |  |         init() | 
					
						
							| 
									
										
										
										
											1998-05-18 16:27:20 +00:00
										 |  |  |     type = string.lower(type) | 
					
						
							|  |  |  |     for ext, stype in types_map.items(): | 
					
						
							|  |  |  |         if type == stype: | 
					
						
							|  |  |  |             return ext | 
					
						
							|  |  |  |     return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  | def init(files=None): | 
					
						
							|  |  |  |     global inited | 
					
						
							|  |  |  |     for file in files or knownfiles: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         s = read_mime_types(file) | 
					
						
							|  |  |  |         if s: | 
					
						
							|  |  |  |             for key, value in s.items(): | 
					
						
							|  |  |  |                 types_map[key] = value | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     inited = 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def read_mime_types(file): | 
					
						
							|  |  |  |     try: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         f = open(file) | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     except IOError: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         return None | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     map = {} | 
					
						
							|  |  |  |     while 1: | 
					
						
							| 
									
										
										
										
											1998-03-26 21:13:24 +00:00
										 |  |  |         line = f.readline() | 
					
						
							|  |  |  |         if not line: break | 
					
						
							|  |  |  |         words = string.split(line) | 
					
						
							|  |  |  |         for i in range(len(words)): | 
					
						
							|  |  |  |             if words[i][0] == '#': | 
					
						
							|  |  |  |                 del words[i:] | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         if not words: continue | 
					
						
							|  |  |  |         type, suffixes = words[0], words[1:] | 
					
						
							|  |  |  |         for suff in suffixes: | 
					
						
							|  |  |  |             map['.'+suff] = type | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     f.close() | 
					
						
							|  |  |  |     return map | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | suffix_map = { | 
					
						
							|  |  |  |     '.tgz': '.tar.gz', | 
					
						
							|  |  |  |     '.taz': '.tar.gz', | 
					
						
							|  |  |  |     '.tz': '.tar.gz', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | encodings_map = { | 
					
						
							|  |  |  |     '.gz': 'gzip', | 
					
						
							|  |  |  |     '.Z': 'compress', | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | types_map = { | 
					
						
							|  |  |  |     '.a': 'application/octet-stream', | 
					
						
							|  |  |  |     '.ai': 'application/postscript', | 
					
						
							|  |  |  |     '.aif': 'audio/x-aiff', | 
					
						
							|  |  |  |     '.aifc': 'audio/x-aiff', | 
					
						
							|  |  |  |     '.aiff': 'audio/x-aiff', | 
					
						
							|  |  |  |     '.au': 'audio/basic', | 
					
						
							|  |  |  |     '.avi': 'video/x-msvideo', | 
					
						
							|  |  |  |     '.bcpio': 'application/x-bcpio', | 
					
						
							|  |  |  |     '.bin': 'application/octet-stream', | 
					
						
							|  |  |  |     '.cdf': 'application/x-netcdf', | 
					
						
							|  |  |  |     '.cpio': 'application/x-cpio', | 
					
						
							|  |  |  |     '.csh': 'application/x-csh', | 
					
						
							|  |  |  |     '.dll': 'application/octet-stream', | 
					
						
							|  |  |  |     '.dvi': 'application/x-dvi', | 
					
						
							|  |  |  |     '.exe': 'application/octet-stream', | 
					
						
							|  |  |  |     '.eps': 'application/postscript', | 
					
						
							|  |  |  |     '.etx': 'text/x-setext', | 
					
						
							|  |  |  |     '.gif': 'image/gif', | 
					
						
							|  |  |  |     '.gtar': 'application/x-gtar', | 
					
						
							|  |  |  |     '.hdf': 'application/x-hdf', | 
					
						
							|  |  |  |     '.htm': 'text/html', | 
					
						
							|  |  |  |     '.html': 'text/html', | 
					
						
							|  |  |  |     '.ief': 'image/ief', | 
					
						
							|  |  |  |     '.jpe': 'image/jpeg', | 
					
						
							|  |  |  |     '.jpeg': 'image/jpeg', | 
					
						
							|  |  |  |     '.jpg': 'image/jpeg', | 
					
						
							| 
									
										
										
										
											1999-05-20 12:52:04 +00:00
										 |  |  |     '.js': 'application/x-javascript', | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     '.latex': 'application/x-latex', | 
					
						
							|  |  |  |     '.man': 'application/x-troff-man', | 
					
						
							|  |  |  |     '.me': 'application/x-troff-me', | 
					
						
							|  |  |  |     '.mif': 'application/x-mif', | 
					
						
							|  |  |  |     '.mov': 'video/quicktime', | 
					
						
							|  |  |  |     '.movie': 'video/x-sgi-movie', | 
					
						
							|  |  |  |     '.mpe': 'video/mpeg', | 
					
						
							|  |  |  |     '.mpeg': 'video/mpeg', | 
					
						
							|  |  |  |     '.mpg': 'video/mpeg', | 
					
						
							|  |  |  |     '.ms': 'application/x-troff-ms', | 
					
						
							|  |  |  |     '.nc': 'application/x-netcdf', | 
					
						
							|  |  |  |     '.o': 'application/octet-stream', | 
					
						
							|  |  |  |     '.obj': 'application/octet-stream', | 
					
						
							|  |  |  |     '.oda': 'application/oda', | 
					
						
							|  |  |  |     '.pbm': 'image/x-portable-bitmap', | 
					
						
							|  |  |  |     '.pdf': 'application/pdf', | 
					
						
							|  |  |  |     '.pgm': 'image/x-portable-graymap', | 
					
						
							|  |  |  |     '.pnm': 'image/x-portable-anymap', | 
					
						
							|  |  |  |     '.png': 'image/png', | 
					
						
							|  |  |  |     '.ppm': 'image/x-portable-pixmap', | 
					
						
							|  |  |  |     '.py': 'text/x-python', | 
					
						
							|  |  |  |     '.pyc': 'application/x-python-code', | 
					
						
							|  |  |  |     '.ps': 'application/postscript', | 
					
						
							|  |  |  |     '.qt': 'video/quicktime', | 
					
						
							|  |  |  |     '.ras': 'image/x-cmu-raster', | 
					
						
							|  |  |  |     '.rgb': 'image/x-rgb', | 
					
						
							| 
									
										
										
										
											1999-03-11 16:04:04 +00:00
										 |  |  |     '.rdf': 'application/xml', | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     '.roff': 'application/x-troff', | 
					
						
							|  |  |  |     '.rtf': 'application/rtf', | 
					
						
							|  |  |  |     '.rtx': 'text/richtext', | 
					
						
							|  |  |  |     '.sgm': 'text/x-sgml', | 
					
						
							|  |  |  |     '.sgml': 'text/x-sgml', | 
					
						
							|  |  |  |     '.sh': 'application/x-sh', | 
					
						
							|  |  |  |     '.shar': 'application/x-shar', | 
					
						
							|  |  |  |     '.snd': 'audio/basic', | 
					
						
							|  |  |  |     '.so': 'application/octet-stream', | 
					
						
							|  |  |  |     '.src': 'application/x-wais-source', | 
					
						
							|  |  |  |     '.sv4cpio': 'application/x-sv4cpio', | 
					
						
							|  |  |  |     '.sv4crc': 'application/x-sv4crc', | 
					
						
							|  |  |  |     '.t': 'application/x-troff', | 
					
						
							|  |  |  |     '.tar': 'application/x-tar', | 
					
						
							|  |  |  |     '.tcl': 'application/x-tcl', | 
					
						
							|  |  |  |     '.tex': 'application/x-tex', | 
					
						
							|  |  |  |     '.texi': 'application/x-texinfo', | 
					
						
							|  |  |  |     '.texinfo': 'application/x-texinfo', | 
					
						
							|  |  |  |     '.tif': 'image/tiff', | 
					
						
							|  |  |  |     '.tiff': 'image/tiff', | 
					
						
							|  |  |  |     '.tr': 'application/x-troff', | 
					
						
							|  |  |  |     '.tsv': 'text/tab-separated-values', | 
					
						
							|  |  |  |     '.txt': 'text/plain', | 
					
						
							|  |  |  |     '.ustar': 'application/x-ustar', | 
					
						
							|  |  |  |     '.wav': 'audio/x-wav', | 
					
						
							|  |  |  |     '.xbm': 'image/x-xbitmap', | 
					
						
							| 
									
										
										
										
											1998-05-18 14:25:08 +00:00
										 |  |  |     '.xml': 'text/xml', | 
					
						
							| 
									
										
										
										
											1999-03-11 16:04:04 +00:00
										 |  |  |     '.xsl': 'application/xml', | 
					
						
							| 
									
										
										
										
											1997-09-30 19:05:50 +00:00
										 |  |  |     '.xpm': 'image/x-xpixmap', | 
					
						
							|  |  |  |     '.xwd': 'image/x-xwindowdump', | 
					
						
							|  |  |  |     '.zip': 'application/zip', | 
					
						
							|  |  |  |     } |