mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	Applying proposed patch for bug #474583, optional support for
non-standard but common types. Including Martin's suggestion to add rejected non-standard types from patch #438790. Specifically, guess_type(), guess_extension(): Both the functions and the methods grow an optional "strict" flag, defaulting to true, which determines whether to recognize non-standard, but commonly found types or not. Also, I sorted, reformatted, and culled duplicates from the big types_map dictionary. Note that there are a few non-equivalent duplicates (e.g. .cdf and .xls) for which the first will just get thrown away. I didn't remove those though. Finally, use of the module as a script as grown the -l and -e options to toggle strictness and to do guess_extension(), respectively. Doc and unittest updates too.
This commit is contained in:
		
							parent
							
								
									9cd0efcee9
								
							
						
					
					
						commit
						107771a228
					
				
					 3 changed files with 263 additions and 148 deletions
				
			
		|  | @ -8,10 +8,10 @@ | ||||||
| 
 | 
 | ||||||
| \indexii{MIME}{content type} | \indexii{MIME}{content type} | ||||||
| 
 | 
 | ||||||
| The \module{mimetypes} converts between a filename or URL and the MIME | The \module{mimetypes} module converts between a filename or URL and | ||||||
| type associated with the filename extension.  Conversions are provided  | the MIME type associated with the filename extension.  Conversions are | ||||||
| from filename to MIME type and from MIME type to filename extension; | provided from filename to MIME type and from MIME type to filename | ||||||
| encodings are not supported for the later conversion. | extension; encodings are not supported for the latter conversion. | ||||||
| 
 | 
 | ||||||
| The module provides one class and a number of convenience functions. | The module provides one class and a number of convenience functions. | ||||||
| The functions are the normal interface to this module, but some | The functions are the normal interface to this module, but some | ||||||
|  | @ -23,22 +23,31 @@ module.  If the module has not been initialized, they will call | ||||||
| sets up. | sets up. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| \begin{funcdesc}{guess_type}{filename} | \begin{funcdesc}{guess_type}{filename\optional{, strict}} | ||||||
| Guess the type of a file based on its filename or URL, given by | Guess the type of a file based on its filename or URL, given by | ||||||
| \var{filename}.  The return value is a tuple \code{(\var{type}, | \var{filename}.  The return value is a tuple \code{(\var{type}, | ||||||
| \var{encoding})} where \var{type} is \code{None} if the type can't be | \var{encoding})} where \var{type} is \code{None} if the type can't be | ||||||
| guessed (no or unknown suffix) or a string of the form | guessed (missing or unknown suffix) or a string of the form | ||||||
| \code{'\var{type}/\var{subtype}'}, usable for a MIME | \code{'\var{type}/\var{subtype}'}, usable for a MIME | ||||||
| \mailheader{content-type} header\indexii{MIME}{headers}; and encoding | \mailheader{content-type} header\indexii{MIME}{headers}. | ||||||
| is \code{None} for no encoding or the name of the program used to | 
 | ||||||
| encode (e.g. \program{compress} or \program{gzip}).  The encoding is | \var{encoding} is \code{None} for no encoding or the name of the | ||||||
| suitable for use as a \mailheader{Content-Encoding} header, \emph{not} | program used to encode (e.g. \program{compress} or \program{gzip}). | ||||||
| as a \mailheader{Content-Transfer-Encoding} header.  The mappings are | The encoding is suitable for use as a \mailheader{Content-Encoding} | ||||||
| table driven.  Encoding suffixes are case sensitive; type suffixes are | header, \emph{not} as a \mailheader{Content-Transfer-Encoding} header. | ||||||
| first tried case sensitive, then case insensitive. | The mappings are table driven.  Encoding suffixes are case sensitive; | ||||||
|  | type suffixes are first tried case sensitively, then case | ||||||
|  | insensitively. | ||||||
|  | 
 | ||||||
|  | Optional \var{strict} is a flag specifying whether the list of known | ||||||
|  | MIME types is limited to only the official types \ulink{registered | ||||||
|  | with IANA}{http://www.isi.edu/in-notes/iana/assignments/media-types} | ||||||
|  | are recognized.  When \var{strict} is true (the default), only the | ||||||
|  | IANA types are supported; when \var{strict} is false, some additional | ||||||
|  | non-standard but commonly used MIME types are also recognized. | ||||||
| \end{funcdesc} | \end{funcdesc} | ||||||
| 
 | 
 | ||||||
| \begin{funcdesc}{guess_extension}{type} | \begin{funcdesc}{guess_extension}{type\optional{, strict}} | ||||||
| Guess the extension for a file based on its MIME type, given by | Guess the extension for a file based on its MIME type, given by | ||||||
| \var{type}. | \var{type}. | ||||||
| The return value is a string giving a filename extension, including the | The return value is a string giving a filename extension, including the | ||||||
|  | @ -46,6 +55,9 @@ leading dot (\character{.}).  The extension is not guaranteed to have been | ||||||
| associated with any particular data stream, but would be mapped to the  | associated with any particular data stream, but would be mapped to the  | ||||||
| MIME type \var{type} by \function{guess_type()}.  If no extension can | MIME type \var{type} by \function{guess_type()}.  If no extension can | ||||||
| be guessed for \var{type}, \code{None} is returned. | be guessed for \var{type}, \code{None} is returned. | ||||||
|  | 
 | ||||||
|  | Optional \var{strict} has the same meaning as with the | ||||||
|  | \function{guess_type()} function. | ||||||
| \end{funcdesc} | \end{funcdesc} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -98,6 +110,11 @@ Dictionary mapping filename extensions to encoding types. | ||||||
| Dictionary mapping filename extensions to MIME types. | Dictionary mapping filename extensions to MIME types. | ||||||
| \end{datadesc} | \end{datadesc} | ||||||
| 
 | 
 | ||||||
|  | \begin{datadesc}{common_types} | ||||||
|  | Dictionary mapping filename extensions to non-standard, but commonly | ||||||
|  | found MIME types. | ||||||
|  | \end{datadesc} | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| The \class{MimeTypes} class may be useful for applications which may | The \class{MimeTypes} class may be useful for applications which may | ||||||
| want more than one MIME-type database: | want more than one MIME-type database: | ||||||
|  | @ -144,12 +161,18 @@ that of the \refmodule{mimetypes} module. | ||||||
|   module. |   module. | ||||||
| \end{datadesc} | \end{datadesc} | ||||||
| 
 | 
 | ||||||
| \begin{methoddesc}{guess_extension}{type} | \begin{datadesc}{common_types} | ||||||
|  |   Dictionary mapping filename extensions to non-standard, but commonly | ||||||
|  |   found MIME types.  This is initially a copy of the global | ||||||
|  |   \code{common_types} defined in the module. | ||||||
|  | \end{datadesc} | ||||||
|  | 
 | ||||||
|  | \begin{methoddesc}{guess_extension}{type\optional{, strict}} | ||||||
|   Similar to the \function{guess_extension()} function, using the |   Similar to the \function{guess_extension()} function, using the | ||||||
|   tables stored as part of the object. |   tables stored as part of the object. | ||||||
| \end{methoddesc} | \end{methoddesc} | ||||||
| 
 | 
 | ||||||
| \begin{methoddesc}{guess_type}{url} | \begin{methoddesc}{guess_type}{url\optional{, strict}} | ||||||
|   Similar to the \function{guess_type()} function, using the tables |   Similar to the \function{guess_type()} function, using the tables | ||||||
|   stored as part of the object. |   stored as part of the object. | ||||||
| \end{methoddesc} | \end{methoddesc} | ||||||
|  |  | ||||||
							
								
								
									
										344
									
								
								Lib/mimetypes.py
									
										
									
									
									
								
							
							
						
						
									
										344
									
								
								Lib/mimetypes.py
									
										
									
									
									
								
							|  | @ -2,9 +2,9 @@ | ||||||
| 
 | 
 | ||||||
| This module defines two useful functions: | This module defines two useful functions: | ||||||
| 
 | 
 | ||||||
| guess_type(url) -- guess the MIME type and encoding of a URL. | guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. | ||||||
| 
 | 
 | ||||||
| guess_extension(type) -- guess the extension for a given MIME type. | guess_extension(type, strict=1) -- guess the extension for a given MIME type. | ||||||
| 
 | 
 | ||||||
| It also contains the following, for tuning the behavior: | It also contains the following, for tuning the behavior: | ||||||
| 
 | 
 | ||||||
|  | @ -21,6 +21,16 @@ | ||||||
| init([files]) -- parse a list of files, default knownfiles | init([files]) -- parse a list of files, default knownfiles | ||||||
| read_mime_types(file) -- parse one file, return a dictionary or None | read_mime_types(file) -- parse one file, return a dictionary or None | ||||||
| 
 | 
 | ||||||
|  | When run as a script, the following command line options are recognized: | ||||||
|  | 
 | ||||||
|  | Usage: mimetypes.py [options] type | ||||||
|  | Options: | ||||||
|  |     --help / -h       -- print this message and exit | ||||||
|  |     --lenient / -l    -- additionally search of some common, but non-standard | ||||||
|  |                          types. | ||||||
|  |     --extension / -e  -- guess extension instead of type | ||||||
|  | 
 | ||||||
|  | More than one type argument may be given. | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| import os | import os | ||||||
|  | @ -53,10 +63,11 @@ def __init__(self, filenames=()): | ||||||
|         self.encodings_map = encodings_map.copy() |         self.encodings_map = encodings_map.copy() | ||||||
|         self.suffix_map = suffix_map.copy() |         self.suffix_map = suffix_map.copy() | ||||||
|         self.types_map = types_map.copy() |         self.types_map = types_map.copy() | ||||||
|  |         self.common_types = common_types.copy() | ||||||
|         for name in filenames: |         for name in filenames: | ||||||
|             self.read(name) |             self.read(name) | ||||||
| 
 | 
 | ||||||
|     def guess_type(self, url): |     def guess_type(self, url, strict=1): | ||||||
|         """Guess the type of a file based on its URL. |         """Guess the type of a file based on its URL. | ||||||
| 
 | 
 | ||||||
|         Return value is a tuple (type, encoding) where type is None if |         Return value is a tuple (type, encoding) where type is None if | ||||||
|  | @ -71,6 +82,9 @@ def guess_type(self, url): | ||||||
|         The suffixes .tgz, .taz and .tz (case sensitive!) are all |         The suffixes .tgz, .taz and .tz (case sensitive!) are all | ||||||
|         mapped to '.tar.gz'.  (This is table-driven too, using the |         mapped to '.tar.gz'.  (This is table-driven too, using the | ||||||
|         dictionary suffix_map.) |         dictionary suffix_map.) | ||||||
|  | 
 | ||||||
|  |         Optional `strict' argument when false adds a bunch of commonly found, | ||||||
|  |         but non-standard types. | ||||||
|         """ |         """ | ||||||
|         scheme, url = urllib.splittype(url) |         scheme, url = urllib.splittype(url) | ||||||
|         if scheme == 'data': |         if scheme == 'data': | ||||||
|  | @ -101,14 +115,21 @@ def guess_type(self, url): | ||||||
|         else: |         else: | ||||||
|             encoding = None |             encoding = None | ||||||
|         types_map = self.types_map |         types_map = self.types_map | ||||||
|  |         common_types = self.common_types | ||||||
|         if types_map.has_key(ext): |         if types_map.has_key(ext): | ||||||
|             return types_map[ext], encoding |             return types_map[ext], encoding | ||||||
|         elif types_map.has_key(ext.lower()): |         elif types_map.has_key(ext.lower()): | ||||||
|             return types_map[ext.lower()], encoding |             return types_map[ext.lower()], encoding | ||||||
|  |         elif strict: | ||||||
|  |             return None, encoding | ||||||
|  |         elif common_types.has_key(ext): | ||||||
|  |             return common_types[ext], encoding | ||||||
|  |         elif common_types.has_key(ext.lower()): | ||||||
|  |             return common_types[ext.lower()], encoding | ||||||
|         else: |         else: | ||||||
|             return None, encoding |             return None, encoding | ||||||
| 
 | 
 | ||||||
|     def guess_extension(self, type): |     def guess_extension(self, type, strict=1): | ||||||
|         """Guess the extension for a file based on its MIME type. |         """Guess the extension for a file based on its MIME type. | ||||||
| 
 | 
 | ||||||
|         Return value is a string giving a filename extension, |         Return value is a string giving a filename extension, | ||||||
|  | @ -117,11 +138,18 @@ def guess_extension(self, type): | ||||||
|         stream, but would be mapped to the MIME type `type' by |         stream, but would be mapped to the MIME type `type' by | ||||||
|         guess_type().  If no extension can be guessed for `type', None |         guess_type().  If no extension can be guessed for `type', None | ||||||
|         is returned. |         is returned. | ||||||
|  | 
 | ||||||
|  |         Optional `strict' argument when false adds a bunch of commonly found, | ||||||
|  |         but non-standard types. | ||||||
|         """ |         """ | ||||||
|         type = type.lower() |         type = type.lower() | ||||||
|         for ext, stype in self.types_map.items(): |         for ext, stype in self.types_map.items(): | ||||||
|             if type == stype: |             if type == stype: | ||||||
|                 return ext |                 return ext | ||||||
|  |         if not strict: | ||||||
|  |             for ext, stype in common_types.items(): | ||||||
|  |                 if type == stype: | ||||||
|  |                     return ext | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|     def read(self, filename): |     def read(self, filename): | ||||||
|  | @ -149,7 +177,7 @@ def readfp(self, fp): | ||||||
|                 map['.' + suff] = type |                 map['.' + suff] = type | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def guess_type(url): | def guess_type(url, strict=1): | ||||||
|     """Guess the type of a file based on its URL. |     """Guess the type of a file based on its URL. | ||||||
| 
 | 
 | ||||||
|     Return value is a tuple (type, encoding) where type is None if the |     Return value is a tuple (type, encoding) where type is None if the | ||||||
|  | @ -163,12 +191,15 @@ def guess_type(url): | ||||||
|     The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |     The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped | ||||||
|     to ".tar.gz".  (This is table-driven too, using the dictionary |     to ".tar.gz".  (This is table-driven too, using the dictionary | ||||||
|     suffix_map). |     suffix_map). | ||||||
|  | 
 | ||||||
|  |     Optional `strict' argument when false adds a bunch of commonly found, but | ||||||
|  |     non-standard types. | ||||||
|     """ |     """ | ||||||
|     init() |     init() | ||||||
|     return guess_type(url) |     return guess_type(url, strict) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def guess_extension(type): | def guess_extension(type, strict=1): | ||||||
|     """Guess the extension for a file based on its MIME type. |     """Guess the extension for a file based on its MIME type. | ||||||
| 
 | 
 | ||||||
|     Return value is a string giving a filename extension, including the |     Return value is a string giving a filename extension, including the | ||||||
|  | @ -176,14 +207,17 @@ def guess_extension(type): | ||||||
|     associated with any particular data stream, but would be mapped to the |     associated with any particular data stream, but would be mapped to the | ||||||
|     MIME type `type' by guess_type().  If no extension can be guessed for |     MIME type `type' by guess_type().  If no extension can be guessed for | ||||||
|     `type', None is returned. |     `type', None is returned. | ||||||
|  | 
 | ||||||
|  |     Optional `strict' argument when false adds a bunch of commonly found, | ||||||
|  |     but non-standard types. | ||||||
|     """ |     """ | ||||||
|     init() |     init() | ||||||
|     return guess_extension(type) |     return guess_extension(type, strict) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def init(files=None): | def init(files=None): | ||||||
|     global guess_extension, guess_type |     global guess_extension, guess_type | ||||||
|     global suffix_map, types_map, encodings_map |     global suffix_map, types_map, encodings_map, common_types | ||||||
|     global inited |     global inited | ||||||
|     inited = 1 |     inited = 1 | ||||||
|     db = MimeTypes() |     db = MimeTypes() | ||||||
|  | @ -197,6 +231,7 @@ def init(files=None): | ||||||
|     types_map = db.types_map |     types_map = db.types_map | ||||||
|     guess_extension = db.guess_extension |     guess_extension = db.guess_extension | ||||||
|     guess_type = db.guess_type |     guess_type = db.guess_type | ||||||
|  |     common_types = db.common_types | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def read_mime_types(file): | def read_mime_types(file): | ||||||
|  | @ -223,133 +258,178 @@ def read_mime_types(file): | ||||||
| # Before adding new types, make sure they are either registered with IANA, at | # Before adding new types, make sure they are either registered with IANA, at | ||||||
| # http://www.isi.edu/in-notes/iana/assignments/media-types | # http://www.isi.edu/in-notes/iana/assignments/media-types | ||||||
| # or extensions, i.e. using the x- prefix | # or extensions, i.e. using the x- prefix | ||||||
|  | 
 | ||||||
|  | # If you add to these, please keep them sorted! | ||||||
| types_map = { | types_map = { | ||||||
|     '.a': 'application/octet-stream', |     '.a'      : 'application/octet-stream', | ||||||
|     '.ai': 'application/postscript', |     '.ai'     : 'application/postscript', | ||||||
|     '.aif': 'audio/x-aiff', |     '.aif'    : 'audio/x-aiff', | ||||||
|     '.aifc': 'audio/x-aiff', |     '.aifc'   : 'audio/x-aiff', | ||||||
|     '.aiff': 'audio/x-aiff', |     '.aiff'   : 'audio/x-aiff', | ||||||
|     '.au': 'audio/basic', |     '.au'     : 'audio/basic', | ||||||
|     '.avi': 'video/x-msvideo', |     '.avi'    : 'video/x-msvideo', | ||||||
|     '.bcpio': 'application/x-bcpio', |     '.bat'    : 'text/plain', | ||||||
|     '.bin': 'application/octet-stream', |     '.bcpio'  : 'application/x-bcpio', | ||||||
|     '.bmp': 'image/x-ms-bmp', |     '.bin'    : 'application/octet-stream', | ||||||
|     '.cdf': 'application/x-netcdf', |     '.bmp'    : 'image/x-ms-bmp', | ||||||
|     '.cpio': 'application/x-cpio', |     '.c'      : 'text/plain', | ||||||
|     '.csh': 'application/x-csh', |     # Duplicates :( | ||||||
|     '.css': 'text/css', |     '.cdf'    : 'application/x-cdf', | ||||||
|     '.dll': 'application/octet-stream', |     '.cdf'    : 'application/x-netcdf', | ||||||
|     '.doc': 'application/msword', |     '.cpio'   : 'application/x-cpio', | ||||||
|     '.dvi': 'application/x-dvi', |     '.csh'    : 'application/x-csh', | ||||||
|     '.exe': 'application/octet-stream', |     '.css'    : 'text/css', | ||||||
|     '.eps': 'application/postscript', |     '.dll'    : 'application/octet-stream', | ||||||
|     '.etx': 'text/x-setext', |     '.doc'    : 'application/msword', | ||||||
|     '.gif': 'image/gif', |     '.dot'    : 'application/msword', | ||||||
|     '.gtar': 'application/x-gtar', |     '.dvi'    : 'application/x-dvi', | ||||||
|     '.hdf': 'application/x-hdf', |     '.eml'    : 'message/rfc822', | ||||||
|     '.htm': 'text/html', |     '.eps'    : 'application/postscript', | ||||||
|     '.html': 'text/html', |     '.etx'    : 'text/x-setext', | ||||||
|     '.ief': 'image/ief', |     '.exe'    : 'application/octet-stream', | ||||||
|     '.jpe': 'image/jpeg', |     '.gif'    : 'image/gif', | ||||||
|     '.jpeg': 'image/jpeg', |     '.gtar'   : 'application/x-gtar', | ||||||
|     '.jpg': 'image/jpeg', |     '.h'      : 'text/plain', | ||||||
|     '.js': 'application/x-javascript', |     '.hdf'    : 'application/x-hdf', | ||||||
|     '.latex': 'application/x-latex', |     '.htm'    : 'text/html', | ||||||
|     '.man': 'application/x-troff-man', |     '.html'   : 'text/html', | ||||||
|     '.me': 'application/x-troff-me', |     '.ief'    : 'image/ief', | ||||||
|     '.mif': 'application/x-mif', |     '.jpe'    : 'image/jpeg', | ||||||
|     '.mov': 'video/quicktime', |     '.jpeg'   : 'image/jpeg', | ||||||
|     '.movie': 'video/x-sgi-movie', |     '.jpg'    : 'image/jpeg', | ||||||
|     '.mp2': 'audio/mpeg', |     '.js'     : 'application/x-javascript', | ||||||
|     '.mp3': 'audio/mpeg', |     '.ksh'    : 'text/plain', | ||||||
|     '.mpe': 'video/mpeg', |     '.latex'  : 'application/x-latex', | ||||||
|     '.mpeg': 'video/mpeg', |     '.m1v'    : 'video/mpeg', | ||||||
|     '.mpg': 'video/mpeg', |     '.man'    : 'application/x-troff-man', | ||||||
|     '.ms': 'application/x-troff-ms', |     '.me'     : 'application/x-troff-me', | ||||||
|     '.nc': 'application/x-netcdf', |     '.mht'    : 'message/rfc822', | ||||||
|     '.o': 'application/octet-stream', |     '.mhtml'  : 'message/rfc822', | ||||||
|     '.obj': 'application/octet-stream', |     '.mif'    : 'application/x-mif', | ||||||
|     '.oda': 'application/oda', |     '.mov'    : 'video/quicktime', | ||||||
|     '.pbm': 'image/x-portable-bitmap', |     '.movie'  : 'video/x-sgi-movie', | ||||||
|     '.pdf': 'application/pdf', |     '.mp2'    : 'audio/mpeg', | ||||||
|     '.pgm': 'image/x-portable-graymap', |     '.mp3'    : 'audio/mpeg', | ||||||
|     '.pnm': 'image/x-portable-anymap', |     '.mpa'    : 'video/mpeg', | ||||||
|     '.png': 'image/png', |     '.mpe'    : 'video/mpeg', | ||||||
|     '.ppm': 'image/x-portable-pixmap', |     '.mpeg'   : 'video/mpeg', | ||||||
|     '.ps': 'application/postscript', |     '.mpg'    : 'video/mpeg', | ||||||
|     '.py': 'text/x-python', |     '.ms'     : 'application/x-troff-ms', | ||||||
|     '.pyc': 'application/x-python-code', |     '.nc'     : 'application/x-netcdf', | ||||||
|     '.pyo': 'application/x-python-code', |     '.nws'    : 'message/rfc822', | ||||||
|     '.qt': 'video/quicktime', |     '.o'      : 'application/octet-stream', | ||||||
|     '.ras': 'image/x-cmu-raster', |     '.obj'    : 'application/octet-stream', | ||||||
|     '.rgb': 'image/x-rgb', |     '.oda'    : 'application/oda', | ||||||
|     '.rdf': 'application/xml', |     '.p12'    : 'application/x-pkcs12', | ||||||
|     '.roff': 'application/x-troff', |     '.p7c'    : 'application/pkcs7-mime', | ||||||
|     '.rtx': 'text/richtext', |     '.pbm'    : 'image/x-portable-bitmap', | ||||||
|     '.sgm': 'text/x-sgml', |     '.pdf'    : 'application/pdf', | ||||||
|     '.sgml': 'text/x-sgml', |     '.pfx'    : 'application/x-pkcs12', | ||||||
|     '.sh': 'application/x-sh', |     '.pgm'    : 'image/x-portable-graymap', | ||||||
|     '.shar': 'application/x-shar', |     '.pl'     : 'text/plain', | ||||||
|     '.snd': 'audio/basic', |     '.png'    : 'image/png', | ||||||
|     '.so': 'application/octet-stream', |     '.pnm'    : 'image/x-portable-anymap', | ||||||
|     '.src': 'application/x-wais-source', |     '.pot'    : 'application/vnd.ms-powerpoint', | ||||||
|  |     '.ppa'    : 'application/vnd.ms-powerpoint', | ||||||
|  |     '.ppm'    : 'image/x-portable-pixmap', | ||||||
|  |     '.pps'    : 'application/vnd.ms-powerpoint', | ||||||
|  |     '.ppt'    : 'application/vnd.ms-powerpoint', | ||||||
|  |     '.ps'     : 'application/postscript', | ||||||
|  |     '.pwz'    : 'application/vnd.ms-powerpoint', | ||||||
|  |     '.py'     : 'text/x-python', | ||||||
|  |     '.pyc'    : 'application/x-python-code', | ||||||
|  |     '.pyo'    : 'application/x-python-code', | ||||||
|  |     '.qt'     : 'video/quicktime', | ||||||
|  |     '.ra'     : 'audio/x-pn-realaudio', | ||||||
|  |     '.ram'    : 'application/x-pn-realaudio', | ||||||
|  |     '.ras'    : 'image/x-cmu-raster', | ||||||
|  |     '.rdf'    : 'application/xml', | ||||||
|  |     '.rgb'    : 'image/x-rgb', | ||||||
|  |     '.roff'   : 'application/x-troff', | ||||||
|  |     '.rtx'    : 'text/richtext', | ||||||
|  |     '.sgm'    : 'text/x-sgml', | ||||||
|  |     '.sgml'   : 'text/x-sgml', | ||||||
|  |     '.sh'     : 'application/x-sh', | ||||||
|  |     '.shar'   : 'application/x-shar', | ||||||
|  |     '.snd'    : 'audio/basic', | ||||||
|  |     '.so'     : 'application/octet-stream', | ||||||
|  |     '.src'    : 'application/x-wais-source', | ||||||
|     '.sv4cpio': 'application/x-sv4cpio', |     '.sv4cpio': 'application/x-sv4cpio', | ||||||
|     '.sv4crc': 'application/x-sv4crc', |     '.sv4crc' : 'application/x-sv4crc', | ||||||
|     '.t': 'application/x-troff', |     '.t'      : 'application/x-troff', | ||||||
|     '.tar': 'application/x-tar', |     '.tar'    : 'application/x-tar', | ||||||
|     '.tcl': 'application/x-tcl', |     '.tcl'    : 'application/x-tcl', | ||||||
|     '.tex': 'application/x-tex', |     '.tex'    : 'application/x-tex', | ||||||
|     '.texi': 'application/x-texinfo', |     '.texi'   : 'application/x-texinfo', | ||||||
|     '.texinfo': 'application/x-texinfo', |     '.texinfo': 'application/x-texinfo', | ||||||
|     '.tif': 'image/tiff', |     '.tif'    : 'image/tiff', | ||||||
|     '.tiff': 'image/tiff', |     '.tiff'   : 'image/tiff', | ||||||
|     '.tr': 'application/x-troff', |     '.tr'     : 'application/x-troff', | ||||||
|     '.tsv': 'text/tab-separated-values', |     '.tsv'    : 'text/tab-separated-values', | ||||||
|     '.txt': 'text/plain', |     '.txt'    : 'text/plain', | ||||||
|     '.ustar': 'application/x-ustar', |     '.ustar'  : 'application/x-ustar', | ||||||
|     '.wav': 'audio/x-wav', |     '.vcf'    : 'text/x-vcard', | ||||||
|     '.xbm': 'image/x-xbitmap', |     '.wav'    : 'audio/x-wav', | ||||||
|     '.xls': 'application/excel', |     '.wiz'    : 'application/msword', | ||||||
|     '.xml': 'text/xml', |     '.xbm'    : 'image/x-xbitmap', | ||||||
|     '.xsl': 'application/xml', |     '.xlb'    : 'application/vnd.ms-excel', | ||||||
|     '.xpm': 'image/x-xpixmap', |     # Duplicates :( | ||||||
|     '.xwd': 'image/x-xwindowdump', |     '.xls'    : 'application/excel', | ||||||
|     '.zip': 'application/zip', |     '.xls'    : 'application/vnd.ms-excel', | ||||||
|     '.mp3': 'audio/mpeg', |     '.xml'    : 'text/xml', | ||||||
|     '.ra': 'audio/x-pn-realaudio', |     '.xpm'    : 'image/x-xpixmap', | ||||||
|     '.pdf': 'application/pdf', |     '.xsl'    : 'application/xml', | ||||||
|     '.c': 'text/plain', |     '.xwd'    : 'image/x-xwindowdump', | ||||||
|     '.bat': 'text/plain', |     '.zip'    : 'application/zip', | ||||||
|     '.h': 'text/plain', |  | ||||||
|     '.pl': 'text/plain', |  | ||||||
|     '.ksh': 'text/plain', |  | ||||||
|     '.ram': 'application/x-pn-realaudio', |  | ||||||
|     '.cdf': 'application/x-cdf', |  | ||||||
|     '.doc': 'application/msword', |  | ||||||
|     '.dot': 'application/msword', |  | ||||||
|     '.wiz': 'application/msword', |  | ||||||
|     '.xlb': 'application/vnd.ms-excel', |  | ||||||
|     '.xls': 'application/vnd.ms-excel', |  | ||||||
|     '.ppa': 'application/vnd.ms-powerpoint', |  | ||||||
|     '.ppt': 'application/vnd.ms-powerpoint', |  | ||||||
|     '.pps': 'application/vnd.ms-powerpoint', |  | ||||||
|     '.pot': 'application/vnd.ms-powerpoint', |  | ||||||
|     '.pwz': 'application/vnd.ms-powerpoint', |  | ||||||
|     '.eml':   'message/rfc822', |  | ||||||
|     '.nws':   'message/rfc822', |  | ||||||
|     '.mht':   'message/rfc822', |  | ||||||
|     '.mhtml': 'message/rfc822', |  | ||||||
|     '.css': 'text/css', |  | ||||||
|     '.p7c': 'application/pkcs7-mime', |  | ||||||
|     '.p12': 'application/x-pkcs12', |  | ||||||
|     '.pfx': 'application/x-pkcs12', |  | ||||||
|     '.js':  'application/x-javascript', |  | ||||||
|     '.m1v': 'video/mpeg', |  | ||||||
|     '.mpa': 'video/mpeg', |  | ||||||
|     '.vcf': 'text/x-vcard', |  | ||||||
|     '.xml': 'text/xml', |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | # These are non-standard types, commonly found in the wild.  They will only | ||||||
|  | # match if strict=0 flag is given to the API methods. | ||||||
|  | 
 | ||||||
|  | # Please sort these too | ||||||
|  | common_types = { | ||||||
|  |     '.jpg' : 'image/jpg', | ||||||
|  |     '.mid' : 'audio/midi', | ||||||
|  |     '.midi': 'audio/midi', | ||||||
|  |     '.pct' : 'image/pict', | ||||||
|  |     '.pic' : 'image/pict', | ||||||
|  |     '.pict': 'image/pict', | ||||||
|  |     '.rtf' : 'application/rtf', | ||||||
|  |     '.xul' : 'text/xul' | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def usage(code, msg=''): | ||||||
|  |     print __doc__ | ||||||
|  |     if msg: print msg | ||||||
|  |     sys.exit(code) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     import sys |     import sys | ||||||
|     print guess_type(sys.argv[1]) |     import getopt | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         opts, args = getopt.getopt(sys.argv[1:], 'hle', | ||||||
|  |                                    ['help', 'lenient', 'extension']) | ||||||
|  |     except getopt.error, msg: | ||||||
|  |         usage(1, msg) | ||||||
|  | 
 | ||||||
|  |     strict = 1 | ||||||
|  |     extension = 0 | ||||||
|  |     for opt, arg in opts: | ||||||
|  |         if opt in ('-h', '--help'): | ||||||
|  |             usage(0) | ||||||
|  |         elif opt in ('-l', '--lenient'): | ||||||
|  |             strict = 0 | ||||||
|  |         elif opt in ('-e', '--extension'): | ||||||
|  |             extension = 1 | ||||||
|  |     for gtype in args: | ||||||
|  |         if extension: | ||||||
|  |             guess = guess_extension(gtype, strict) | ||||||
|  |             if not guess: print "I don't know anything about type", gtype | ||||||
|  |             else: print guess | ||||||
|  |         else: | ||||||
|  |             guess, encoding = guess_type(gtype, strict) | ||||||
|  |             if not guess: print "I don't know anything about type", gtype | ||||||
|  |             else: print 'type:', guess, 'encoding:', encoding | ||||||
|  |  | ||||||
|  | @ -38,6 +38,18 @@ def test_file_parsing(self): | ||||||
|         self.assertEqual(self.db.guess_extension("x-application/x-unittest"), |         self.assertEqual(self.db.guess_extension("x-application/x-unittest"), | ||||||
|                          ".pyunit") |                          ".pyunit") | ||||||
| 
 | 
 | ||||||
|  |     def test_non_standard_types(self): | ||||||
|  |         # First try strict  | ||||||
|  |         self.assertEqual(self.db.guess_type('foo.xul', strict=1), | ||||||
|  |                          (None, None)) | ||||||
|  |         self.assertEqual(self.db.guess_extension('image/jpg', strict=1), | ||||||
|  |                          None) | ||||||
|  |         # And then non-strict | ||||||
|  |         self.assertEqual(self.db.guess_type('foo.xul', strict=0), | ||||||
|  |                          ('text/xul', None)) | ||||||
|  |         self.assertEqual(self.db.guess_extension('image/jpg', strict=0), | ||||||
|  |                          '.jpg') | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def test_main(): | def test_main(): | ||||||
|     test_support.run_unittest(MimeTypesTestCase) |     test_support.run_unittest(MimeTypesTestCase) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Barry Warsaw
						Barry Warsaw