| 
									
										
										
										
											2006-03-18 15:41:53 +00:00
										 |  |  |  | # Copyright (C) 2001-2006 Python Software Foundation | 
					
						
							| 
									
										
										
										
											2004-05-09 03:46:42 +00:00
										 |  |  |  | # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter | 
					
						
							|  |  |  |  | # Contact: email-sig@python.org | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-05-09 03:46:42 +00:00
										 |  |  |  | """A parser of RFC 2822 and MIME email messages.""" | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-18 15:41:53 +00:00
										 |  |  |  | __all__ = ['Parser', 'HeaderParser'] | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-10-03 03:16:19 +00:00
										 |  |  |  | import warnings | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | from cStringIO import StringIO | 
					
						
							| 
									
										
										
										
											2006-03-18 15:41:53 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | from email.feedparser import FeedParser | 
					
						
							|  |  |  |  | from email.message import Message | 
					
						
							| 
									
										
										
										
											2002-09-28 20:44:58 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-10-04 17:05:11 +00:00
										 |  |  |  |  | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | class Parser: | 
					
						
							| 
									
										
										
										
											2004-10-03 03:16:19 +00:00
										 |  |  |  |     def __init__(self, *args, **kws): | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  |         """Parser of RFC 2822 and MIME email messages.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         Creates an in-memory object tree representing the email message, which | 
					
						
							|  |  |  |  |         can then be manipulated and turned over to a Generator to return the | 
					
						
							|  |  |  |  |         textual representation of the message. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         The string must be formatted as a block of RFC 2822 headers and header | 
					
						
							|  |  |  |  |         continuation lines, optionally preceeded by a `Unix-from' header.  The | 
					
						
							|  |  |  |  |         header block is terminated either by the end of the string or by a | 
					
						
							|  |  |  |  |         blank line. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         _class is the class to instantiate for new message objects when they | 
					
						
							|  |  |  |  |         must be created.  This class must have a constructor that can take | 
					
						
							|  |  |  |  |         zero arguments.  Default is Message.Message. | 
					
						
							|  |  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2004-10-03 03:16:19 +00:00
										 |  |  |  |         if len(args) >= 1: | 
					
						
							|  |  |  |  |             if '_class' in kws: | 
					
						
							|  |  |  |  |                 raise TypeError("Multiple values for keyword arg '_class'") | 
					
						
							|  |  |  |  |             kws['_class'] = args[0] | 
					
						
							|  |  |  |  |         if len(args) == 2: | 
					
						
							|  |  |  |  |             if 'strict' in kws: | 
					
						
							|  |  |  |  |                 raise TypeError("Multiple values for keyword arg 'strict'") | 
					
						
							|  |  |  |  |             kws['strict'] = args[1] | 
					
						
							|  |  |  |  |         if len(args) > 2: | 
					
						
							|  |  |  |  |             raise TypeError('Too many arguments') | 
					
						
							|  |  |  |  |         if '_class' in kws: | 
					
						
							|  |  |  |  |             self._class = kws['_class'] | 
					
						
							|  |  |  |  |             del kws['_class'] | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             self._class = Message | 
					
						
							|  |  |  |  |         if 'strict' in kws: | 
					
						
							|  |  |  |  |             warnings.warn("'strict' argument is deprecated (and ignored)", | 
					
						
							|  |  |  |  |                           DeprecationWarning, 2) | 
					
						
							|  |  |  |  |             del kws['strict'] | 
					
						
							|  |  |  |  |         if kws: | 
					
						
							|  |  |  |  |             raise TypeError('Unexpected keyword arguments') | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-28 20:44:58 +00:00
										 |  |  |  |     def parse(self, fp, headersonly=False): | 
					
						
							| 
									
										
										
										
											2002-09-30 20:07:22 +00:00
										 |  |  |  |         """Create a message structure from the data in a file.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         Reads all the data from the file and returns the root of the message | 
					
						
							|  |  |  |  |         structure.  Optional headersonly is a flag specifying whether to stop | 
					
						
							|  |  |  |  |         parsing after reading the headers or not.  The default is False, | 
					
						
							|  |  |  |  |         meaning it parses the entire contents of the file. | 
					
						
							|  |  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2004-05-09 03:46:42 +00:00
										 |  |  |  |         feedparser = FeedParser(self._class) | 
					
						
							|  |  |  |  |         if headersonly: | 
					
						
							|  |  |  |  |             feedparser._set_headersonly() | 
					
						
							|  |  |  |  |         while True: | 
					
						
							|  |  |  |  |             data = fp.read(8192) | 
					
						
							|  |  |  |  |             if not data: | 
					
						
							|  |  |  |  |                 break | 
					
						
							|  |  |  |  |             feedparser.feed(data) | 
					
						
							|  |  |  |  |         return feedparser.close() | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-28 20:44:58 +00:00
										 |  |  |  |     def parsestr(self, text, headersonly=False): | 
					
						
							| 
									
										
										
										
											2002-09-30 20:07:22 +00:00
										 |  |  |  |         """Create a message structure from a string.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         Returns the root of the message structure.  Optional headersonly is a | 
					
						
							|  |  |  |  |         flag specifying whether to stop parsing after reading the headers or | 
					
						
							|  |  |  |  |         not.  The default is False, meaning it parses the entire contents of | 
					
						
							|  |  |  |  |         the file. | 
					
						
							|  |  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2002-07-09 02:50:02 +00:00
										 |  |  |  |         return self.parse(StringIO(text), headersonly=headersonly) | 
					
						
							| 
									
										
										
										
											2001-09-23 03:17:28 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-10-11 15:43:00 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |  | 
					
						
							|  |  |  |  | class HeaderParser(Parser): | 
					
						
							| 
									
										
										
										
											2004-05-09 03:46:42 +00:00
										 |  |  |  |     def parse(self, fp, headersonly=True): | 
					
						
							|  |  |  |  |         return Parser.parse(self, fp, True) | 
					
						
							| 
									
										
										
										
											2001-10-11 15:43:00 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-05-09 03:46:42 +00:00
										 |  |  |  |     def parsestr(self, text, headersonly=True): | 
					
						
							|  |  |  |  |         return Parser.parsestr(self, text, True) |