mirror of
				https://github.com/python/cpython.git
				synced 2025-10-23 01:43:53 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			136 lines
		
	
	
	
		
			5.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			136 lines
		
	
	
	
		
			5.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (C) 2001-2007 Python Software Foundation
 | ||
| # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
 | ||
| # Contact: email-sig@python.org
 | ||
| 
 | ||
| """A parser of RFC 2822 and MIME email messages."""
 | ||
| 
 | ||
| __all__ = ['Parser', 'HeaderParser']
 | ||
| 
 | ||
| import warnings
 | ||
| from io import StringIO, TextIOWrapper
 | ||
| 
 | ||
| from email.feedparser import FeedParser
 | ||
| from email.message import Message
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| class Parser:
 | ||
|     def __init__(self, *args, **kws):
 | ||
|         """Parser of RFC 2822 and MIME email messages.
 | ||
| 
 | ||
|         Creates an in-memory object tree representing the email message, which
 | ||
|         can then be manipulated and turned over to a Generator to return the
 | ||
|         textual representation of the message.
 | ||
| 
 | ||
|         The string must be formatted as a block of RFC 2822 headers and header
 | ||
|         continuation lines, optionally preceeded by a `Unix-from' header.  The
 | ||
|         header block is terminated either by the end of the string or by a
 | ||
|         blank line.
 | ||
| 
 | ||
|         _class is the class to instantiate for new message objects when they
 | ||
|         must be created.  This class must have a constructor that can take
 | ||
|         zero arguments.  Default is Message.Message.
 | ||
|         """
 | ||
|         if len(args) >= 1:
 | ||
|             if '_class' in kws:
 | ||
|                 raise TypeError("Multiple values for keyword arg '_class'")
 | ||
|             kws['_class'] = args[0]
 | ||
|         if len(args) == 2:
 | ||
|             if 'strict' in kws:
 | ||
|                 raise TypeError("Multiple values for keyword arg 'strict'")
 | ||
|             kws['strict'] = args[1]
 | ||
|         if len(args) > 2:
 | ||
|             raise TypeError('Too many arguments')
 | ||
|         if '_class' in kws:
 | ||
|             self._class = kws['_class']
 | ||
|             del kws['_class']
 | ||
|         else:
 | ||
|             self._class = Message
 | ||
|         if 'strict' in kws:
 | ||
|             warnings.warn("'strict' argument is deprecated (and ignored)",
 | ||
|                           DeprecationWarning, 2)
 | ||
|             del kws['strict']
 | ||
|         if kws:
 | ||
|             raise TypeError('Unexpected keyword arguments')
 | ||
| 
 | ||
|     def parse(self, fp, headersonly=False):
 | ||
|         """Create a message structure from the data in a file.
 | ||
| 
 | ||
|         Reads all the data from the file and returns the root of the message
 | ||
|         structure.  Optional headersonly is a flag specifying whether to stop
 | ||
|         parsing after reading the headers or not.  The default is False,
 | ||
|         meaning it parses the entire contents of the file.
 | ||
|         """
 | ||
|         feedparser = FeedParser(self._class)
 | ||
|         if headersonly:
 | ||
|             feedparser._set_headersonly()
 | ||
|         while True:
 | ||
|             data = fp.read(8192)
 | ||
|             if not data:
 | ||
|                 break
 | ||
|             feedparser.feed(data)
 | ||
|         return feedparser.close()
 | ||
| 
 | ||
|     def parsestr(self, text, headersonly=False):
 | ||
|         """Create a message structure from a string.
 | ||
| 
 | ||
|         Returns the root of the message structure.  Optional headersonly is a
 | ||
|         flag specifying whether to stop parsing after reading the headers or
 | ||
|         not.  The default is False, meaning it parses the entire contents of
 | ||
|         the file.
 | ||
|         """
 | ||
|         return self.parse(StringIO(text), headersonly=headersonly)
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| class HeaderParser(Parser):
 | ||
|     def parse(self, fp, headersonly=True):
 | ||
|         return Parser.parse(self, fp, True)
 | ||
| 
 | ||
|     def parsestr(self, text, headersonly=True):
 | ||
|         return Parser.parsestr(self, text, True)
 | ||
| 
 | ||
| 
 | ||
| class BytesParser:
 | ||
| 
 | ||
|     def __init__(self, *args, **kw):
 | ||
|         """Parser of binary RFC 2822 and MIME email messages.
 | ||
| 
 | ||
|         Creates an in-memory object tree representing the email message, which
 | ||
|         can then be manipulated and turned over to a Generator to return the
 | ||
|         textual representation of the message.
 | ||
| 
 | ||
|         The input must be formatted as a block of RFC 2822 headers and header
 | ||
|         continuation lines, optionally preceeded by a `Unix-from' header.  The
 | ||
|         header block is terminated either by the end of the input or by a
 | ||
|         blank line.
 | ||
| 
 | ||
|         _class is the class to instantiate for new message objects when they
 | ||
|         must be created.  This class must have a constructor that can take
 | ||
|         zero arguments.  Default is Message.Message.
 | ||
|         """
 | ||
|         self.parser = Parser(*args, **kw)
 | ||
| 
 | ||
|     def parse(self, fp, headersonly=False):
 | ||
|         """Create a message structure from the data in a binary file.
 | ||
| 
 | ||
|         Reads all the data from the file and returns the root of the message
 | ||
|         structure.  Optional headersonly is a flag specifying whether to stop
 | ||
|         parsing after reading the headers or not.  The default is False,
 | ||
|         meaning it parses the entire contents of the file.
 | ||
|         """
 | ||
|         fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
 | ||
|         with fp:
 | ||
|             return self.parser.parse(fp, headersonly)
 | ||
| 
 | ||
| 
 | ||
|     def parsebytes(self, text, headersonly=False):
 | ||
|         """Create a message structure from a byte string.
 | ||
| 
 | ||
|         Returns the root of the message structure.  Optional headersonly is a
 | ||
|         flag specifying whether to stop parsing after reading the headers or
 | ||
|         not.  The default is False, meaning it parses the entire contents of
 | ||
|         the file.
 | ||
|         """
 | ||
|         text = text.decode('ASCII', errors='surrogateescape')
 | ||
|         return self.parser.parsestr(text, headersonly)
 | 
