| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  | # Copyright (C) 2001-2007 Python Software Foundation | 
					
						
							|  |  |  | # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter | 
					
						
							|  |  |  | # Contact: email-sig@python.org | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """A parser of RFC 2822 and MIME email messages.""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-03-15 21:00:48 -04:00
										 |  |  | __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', | 
					
						
							|  |  |  |            'FeedParser', 'BytesFeedParser'] | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-08 15:55:28 +00:00
										 |  |  | from io import StringIO, TextIOWrapper | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-03-15 20:38:15 -04:00
										 |  |  | from email.feedparser import FeedParser, BytesFeedParser | 
					
						
							| 
									
										
										
										
											2012-05-25 15:01:48 -04:00
										 |  |  | from email._policybase import compat32 | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Parser: | 
					
						
							| 
									
										
										
										
											2014-02-07 10:44:16 -05:00
										 |  |  |     def __init__(self, _class=None, *, policy=compat32): | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |         """Parser of RFC 2822 and MIME email messages.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Creates an in-memory object tree representing the email message, which | 
					
						
							|  |  |  |         can then be manipulated and turned over to a Generator to return the | 
					
						
							|  |  |  |         textual representation of the message. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         The string must be formatted as a block of RFC 2822 headers and header | 
					
						
							| 
									
										
										
										
											2016-05-26 05:35:26 +00:00
										 |  |  |         continuation lines, optionally preceded by a `Unix-from' header.  The | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |         header block is terminated either by the end of the string or by a | 
					
						
							|  |  |  |         blank line. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         _class is the class to instantiate for new message objects when they | 
					
						
							|  |  |  |         must be created.  This class must have a constructor that can take | 
					
						
							|  |  |  |         zero arguments.  Default is Message.Message. | 
					
						
							| 
									
										
										
										
											2011-04-18 13:59:37 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         The policy keyword specifies a policy object that controls a number of | 
					
						
							|  |  |  |         aspects of the parser's operation.  The default policy maintains | 
					
						
							|  |  |  |         backward compatibility. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2011-03-29 11:32:35 -04:00
										 |  |  |         self._class = _class | 
					
						
							| 
									
										
										
										
											2011-04-18 13:59:37 -04:00
										 |  |  |         self.policy = policy | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def parse(self, fp, headersonly=False): | 
					
						
							|  |  |  |         """Create a message structure from the data in a file.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Reads all the data from the file and returns the root of the message | 
					
						
							|  |  |  |         structure.  Optional headersonly is a flag specifying whether to stop | 
					
						
							|  |  |  |         parsing after reading the headers or not.  The default is False, | 
					
						
							|  |  |  |         meaning it parses the entire contents of the file. | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2011-04-18 13:59:37 -04:00
										 |  |  |         feedparser = FeedParser(self._class, policy=self.policy) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |         if headersonly: | 
					
						
							|  |  |  |             feedparser._set_headersonly() | 
					
						
							| 
									
										
										
										
											2022-11-26 16:33:25 -06:00
										 |  |  |         while data := fp.read(8192): | 
					
						
							| 
									
										
										
										
											2008-06-12 04:06:45 +00:00
										 |  |  |             feedparser.feed(data) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |         return feedparser.close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def parsestr(self, text, headersonly=False): | 
					
						
							|  |  |  |         """Create a message structure from a string.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Returns the root of the message structure.  Optional headersonly is a | 
					
						
							|  |  |  |         flag specifying whether to stop parsing after reading the headers or | 
					
						
							|  |  |  |         not.  The default is False, meaning it parses the entire contents of | 
					
						
							|  |  |  |         the file. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         return self.parse(StringIO(text), headersonly=headersonly) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class HeaderParser(Parser): | 
					
						
							|  |  |  |     def parse(self, fp, headersonly=True): | 
					
						
							|  |  |  |         return Parser.parse(self, fp, True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def parsestr(self, text, headersonly=True): | 
					
						
							|  |  |  |         return Parser.parsestr(self, text, True) | 
					
						
							| 
									
										
										
										
											2010-10-08 15:55:28 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-08 13:58:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-08 15:55:28 +00:00
										 |  |  | class BytesParser: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, *args, **kw): | 
					
						
							|  |  |  |         """Parser of binary RFC 2822 and MIME email messages.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Creates an in-memory object tree representing the email message, which | 
					
						
							|  |  |  |         can then be manipulated and turned over to a Generator to return the | 
					
						
							|  |  |  |         textual representation of the message. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         The input must be formatted as a block of RFC 2822 headers and header | 
					
						
							| 
									
										
										
										
											2016-05-26 05:35:26 +00:00
										 |  |  |         continuation lines, optionally preceded by a `Unix-from' header.  The | 
					
						
							| 
									
										
										
										
											2010-10-08 15:55:28 +00:00
										 |  |  |         header block is terminated either by the end of the input or by a | 
					
						
							|  |  |  |         blank line. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         _class is the class to instantiate for new message objects when they | 
					
						
							|  |  |  |         must be created.  This class must have a constructor that can take | 
					
						
							|  |  |  |         zero arguments.  Default is Message.Message. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         self.parser = Parser(*args, **kw) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def parse(self, fp, headersonly=False): | 
					
						
							|  |  |  |         """Create a message structure from the data in a binary file.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Reads all the data from the file and returns the root of the message | 
					
						
							|  |  |  |         structure.  Optional headersonly is a flag specifying whether to stop | 
					
						
							|  |  |  |         parsing after reading the headers or not.  The default is False, | 
					
						
							|  |  |  |         meaning it parses the entire contents of the file. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') | 
					
						
							| 
									
										
										
										
											2014-06-26 13:31:43 -04:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2010-10-29 23:08:13 +00:00
										 |  |  |             return self.parser.parse(fp, headersonly) | 
					
						
							| 
									
										
										
										
											2014-06-26 13:31:43 -04:00
										 |  |  |         finally: | 
					
						
							|  |  |  |             fp.detach() | 
					
						
							| 
									
										
										
										
											2010-10-08 15:55:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def parsebytes(self, text, headersonly=False): | 
					
						
							|  |  |  |         """Create a message structure from a byte string.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Returns the root of the message structure.  Optional headersonly is a | 
					
						
							|  |  |  |         flag specifying whether to stop parsing after reading the headers or | 
					
						
							|  |  |  |         not.  The default is False, meaning it parses the entire contents of | 
					
						
							|  |  |  |         the file. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         text = text.decode('ASCII', errors='surrogateescape') | 
					
						
							|  |  |  |         return self.parser.parsestr(text, headersonly) | 
					
						
							| 
									
										
										
										
											2011-04-13 16:46:05 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class BytesHeaderParser(BytesParser): | 
					
						
							|  |  |  |     def parse(self, fp, headersonly=True): | 
					
						
							|  |  |  |         return BytesParser.parse(self, fp, headersonly=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def parsebytes(self, text, headersonly=True): | 
					
						
							|  |  |  |         return BytesParser.parsebytes(self, text, headersonly=True) |