| 
									
										
										
										
											1998-08-10 19:42:37 +00:00
										 |  |  | \section{\module{multifile} --- | 
					
						
							| 
									
										
										
										
											1999-04-23 14:46:18 +00:00
										 |  |  |          Support for files containing distinct parts} | 
					
						
							| 
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-04-23 14:46:18 +00:00
										 |  |  | \declaremodule{standard}{multifile} | 
					
						
							| 
									
										
										
										
											1998-08-07 15:55:14 +00:00
										 |  |  | \modulesynopsis{Support for reading files which contain distinct | 
					
						
							| 
									
										
										
										
											1999-04-23 14:46:18 +00:00
										 |  |  |                 parts, such as some MIME data.} | 
					
						
							|  |  |  | \sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com} | 
					
						
							| 
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | The \class{MultiFile} object enables you to treat sections of a text | 
					
						
							|  |  |  | file as file-like input objects, with \code{''} being returned by | 
					
						
							|  |  |  | \method{readline()} when a given delimiter pattern is encountered.  The | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | defaults of this class are designed to make it useful for parsing | 
					
						
							|  |  |  | MIME multipart messages, but by subclassing it and overriding methods  | 
					
						
							|  |  |  | it can be easily adapted for more general use. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | \begin{classdesc}{MultiFile}{fp\optional{, seekable}} | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | Create a multi-file.  You must instantiate this class with an input | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | object argument for the \class{MultiFile} instance to get lines from, | 
					
						
							|  |  |  | such as as a file object returned by \function{open()}. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \class{MultiFile} only ever looks at the input object's | 
					
						
							|  |  |  | \method{readline()}, \method{seek()} and \method{tell()} methods, and | 
					
						
							|  |  |  | the latter two are only needed if you want random access to the | 
					
						
							|  |  |  | individual MIME parts. To use \class{MultiFile} on a non-seekable | 
					
						
							|  |  |  | stream object, set the optional \var{seekable} argument to false; this | 
					
						
							|  |  |  | will prevent using the input object's \method{seek()} and | 
					
						
							|  |  |  | \method{tell()} methods. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{classdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | It will be useful to know that in \class{MultiFile}'s view of the world, text | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | is composed of three kinds of lines: data, section-dividers, and | 
					
						
							|  |  |  | end-markers.  MultiFile is designed to support parsing of | 
					
						
							|  |  |  | messages that may have multiple nested message parts, each with its | 
					
						
							|  |  |  | own pattern for section-divider and end-marker lines. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-08-06 21:26:01 +00:00
										 |  |  | \begin{seealso} | 
					
						
							|  |  |  |   \seemodule{email}{Comprehensive email handling package; supercedes | 
					
						
							|  |  |  |                     the \module{multifile} module.} | 
					
						
							|  |  |  | \end{seealso} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-08-07 15:55:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | \subsection{MultiFile Objects \label{MultiFile-objects}} | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | A \class{MultiFile} instance has the following methods: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{methoddesc}{readline}{str} | 
					
						
							|  |  |  | Read a line.  If the line is data (not a section-divider or end-marker | 
					
						
							|  |  |  | or real EOF) return it.  If the line matches the most-recently-stacked | 
					
						
							| 
									
										
										
										
											1998-06-30 16:35:25 +00:00
										 |  |  | boundary, return \code{''} and set \code{self.last} to 1 or 0 according as | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | the match is or is not an end-marker.  If the line matches any other | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | stacked boundary, raise an error.  On encountering end-of-file on the | 
					
						
							|  |  |  | underlying stream object, the method raises \exception{Error} unless | 
					
						
							|  |  |  | all boundaries have been popped. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{methoddesc}{readlines}{str} | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | Return all lines remaining in this part as a list of strings. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | \begin{methoddesc}{read}{} | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | Read all lines, up to the next section.  Return them as a single | 
					
						
							|  |  |  | (multiline) string.  Note that this doesn't take a size argument! | 
					
						
							|  |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | \begin{methoddesc}{seek}{pos\optional{, whence}} | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | Seek.  Seek indices are relative to the start of the current section. | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | The \var{pos} and \var{whence} arguments are interpreted as for a file | 
					
						
							|  |  |  | seek. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | \begin{methoddesc}{tell}{} | 
					
						
							|  |  |  | Return the file position relative to the start of the current section. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-03-08 22:46:41 +00:00
										 |  |  | \begin{methoddesc}{next}{} | 
					
						
							|  |  |  | Skip lines to the next section (that is, read lines until a | 
					
						
							|  |  |  | section-divider or end-marker has been consumed).  Return true if | 
					
						
							|  |  |  | there is such a section, false if an end-marker is seen.  Re-enable | 
					
						
							|  |  |  | the most-recently-pushed boundary. | 
					
						
							|  |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \begin{methoddesc}{is_data}{str} | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | Return true if \var{str} is data and false if it might be a section | 
					
						
							| 
									
										
										
										
											1999-04-23 14:46:18 +00:00
										 |  |  | boundary.  As written, it tests for a prefix other than \code{'-}\code{-'} at | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | start of line (which all MIME boundaries have) but it is declared so | 
					
						
							|  |  |  | it can be overridden in derived classes. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Note that this test is used intended as a fast guard for the real | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | boundary tests; if it always returns false it will merely slow | 
					
						
							|  |  |  | processing, not cause it to fail. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-03-08 22:46:41 +00:00
										 |  |  | \begin{methoddesc}{push}{str} | 
					
						
							|  |  |  | Push a boundary string.  When an appropriately decorated version of | 
					
						
							|  |  |  | this boundary is found as an input line, it will be interpreted as a | 
					
						
							|  |  |  | section-divider or end-marker.  All subsequent | 
					
						
							|  |  |  | reads will return the empty string to indicate end-of-file, until a | 
					
						
							|  |  |  | call to \method{pop()} removes the boundary a or \method{next()} call | 
					
						
							|  |  |  | reenables it. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | It is possible to push more than one boundary.  Encountering the | 
					
						
							|  |  |  | most-recently-pushed boundary will return EOF; encountering any other | 
					
						
							|  |  |  | boundary will raise an error. | 
					
						
							|  |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{methoddesc}{pop}{} | 
					
						
							|  |  |  | Pop a section boundary.  This boundary will no longer be interpreted | 
					
						
							|  |  |  | as EOF. | 
					
						
							|  |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \begin{methoddesc}{section_divider}{str} | 
					
						
							|  |  |  | Turn a boundary into a section-divider line.  By default, this | 
					
						
							| 
									
										
										
										
											1999-04-23 14:46:18 +00:00
										 |  |  | method prepends \code{'-}\code{-'} (which MIME section boundaries have) but | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | it is declared so it can be overridden in derived classes.  This | 
					
						
							|  |  |  | method need not append LF or CR-LF, as comparison with the result | 
					
						
							|  |  |  | ignores trailing whitespace.  | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{methoddesc}{end_marker}{str} | 
					
						
							|  |  |  | Turn a boundary string into an end-marker line.  By default, this | 
					
						
							| 
									
										
										
										
											1999-04-23 14:46:18 +00:00
										 |  |  | method prepends \code{'-}\code{-'} and appends \code{'-}\code{-'} (like a | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | MIME-multipart end-of-message marker) but it is declared so it can be | 
					
						
							|  |  |  | be overridden in derived classes.  This method need not append LF or | 
					
						
							|  |  |  | CR-LF, as comparison with the result ignores trailing whitespace. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{methoddesc} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Finally, \class{MultiFile} instances have two public instance variables: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{memberdesc}{level} | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | Nesting depth of the current part. | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{memberdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{memberdesc}{last} | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | True if the last end-of-file was for an end-of-message marker.  | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{memberdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-02 19:36:50 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-08-07 15:55:14 +00:00
										 |  |  | \subsection{\class{MultiFile} Example \label{multifile-example}} | 
					
						
							| 
									
										
										
										
											2000-04-08 04:53:29 +00:00
										 |  |  | \sectionauthor{Skip Montanaro}{skip@mojam.com} | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | \begin{verbatim} | 
					
						
							| 
									
										
										
										
											2000-04-07 16:09:59 +00:00
										 |  |  | import mimetools | 
					
						
							| 
									
										
										
										
											2000-09-30 17:04:40 +00:00
										 |  |  | import multifile | 
					
						
							| 
									
										
										
										
											2000-04-07 16:09:59 +00:00
										 |  |  | import StringIO | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def extract_mime_part_matching(stream, mimetype): | 
					
						
							|  |  |  |     """Return the first element in a multipart MIME message on stream | 
					
						
							|  |  |  |     matching mimetype.""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     msg = mimetools.Message(stream) | 
					
						
							|  |  |  |     msgtype = msg.gettype() | 
					
						
							|  |  |  |     params = msg.getplist() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     data = StringIO.StringIO() | 
					
						
							|  |  |  |     if msgtype[:10] == "multipart/": | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         file = multifile.MultiFile(stream) | 
					
						
							|  |  |  |         file.push(msg.getparam("boundary")) | 
					
						
							|  |  |  |         while file.next(): | 
					
						
							|  |  |  |             submsg = mimetools.Message(file) | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 data = StringIO.StringIO() | 
					
						
							|  |  |  |                 mimetools.decode(file, data, submsg.getencoding()) | 
					
						
							|  |  |  |             except ValueError: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             if submsg.gettype() == mimetype: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         file.pop() | 
					
						
							|  |  |  |     return data.getvalue() | 
					
						
							| 
									
										
										
										
											1998-06-28 17:55:53 +00:00
										 |  |  | \end{verbatim} |