mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			224 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			224 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""This will be the home for the policy that hooks in the new
 | 
						|
code that adds all the email6 features.
 | 
						|
"""
 | 
						|
 | 
						|
import re
 | 
						|
import sys
 | 
						|
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
 | 
						|
from email.utils import _has_surrogates
 | 
						|
from email.headerregistry import HeaderRegistry as HeaderRegistry
 | 
						|
from email.contentmanager import raw_data_manager
 | 
						|
from email.message import EmailMessage
 | 
						|
 | 
						|
__all__ = [
 | 
						|
    'Compat32',
 | 
						|
    'compat32',
 | 
						|
    'Policy',
 | 
						|
    'EmailPolicy',
 | 
						|
    'default',
 | 
						|
    'strict',
 | 
						|
    'SMTP',
 | 
						|
    'HTTP',
 | 
						|
    ]
 | 
						|
 | 
						|
linesep_splitter = re.compile(r'\n|\r')
 | 
						|
 | 
						|
@_extend_docstrings
 | 
						|
class EmailPolicy(Policy):
 | 
						|
 | 
						|
    """+
 | 
						|
    PROVISIONAL
 | 
						|
 | 
						|
    The API extensions enabled by this policy are currently provisional.
 | 
						|
    Refer to the documentation for details.
 | 
						|
 | 
						|
    This policy adds new header parsing and folding algorithms.  Instead of
 | 
						|
    simple strings, headers are custom objects with custom attributes
 | 
						|
    depending on the type of the field.  The folding algorithm fully
 | 
						|
    implements RFCs 2047 and 5322.
 | 
						|
 | 
						|
    In addition to the settable attributes listed above that apply to
 | 
						|
    all Policies, this policy adds the following additional attributes:
 | 
						|
 | 
						|
    utf8                -- if False (the default) message headers will be
 | 
						|
                           serialized as ASCII, using encoded words to encode
 | 
						|
                           any non-ASCII characters in the source strings.  If
 | 
						|
                           True, the message headers will be serialized using
 | 
						|
                           utf8 and will not contain encoded words (see RFC
 | 
						|
                           6532 for more on this serialization format).
 | 
						|
 | 
						|
    refold_source       -- if the value for a header in the Message object
 | 
						|
                           came from the parsing of some source, this attribute
 | 
						|
                           indicates whether or not a generator should refold
 | 
						|
                           that value when transforming the message back into
 | 
						|
                           stream form.  The possible values are:
 | 
						|
 | 
						|
                           none  -- all source values use original folding
 | 
						|
                           long  -- source values that have any line that is
 | 
						|
                                    longer than max_line_length will be
 | 
						|
                                    refolded
 | 
						|
                           all  -- all values are refolded.
 | 
						|
 | 
						|
                           The default is 'long'.
 | 
						|
 | 
						|
    header_factory      -- a callable that takes two arguments, 'name' and
 | 
						|
                           'value', where 'name' is a header field name and
 | 
						|
                           'value' is an unfolded header field value, and
 | 
						|
                           returns a string-like object that represents that
 | 
						|
                           header.  A default header_factory is provided that
 | 
						|
                           understands some of the RFC5322 header field types.
 | 
						|
                           (Currently address fields and date fields have
 | 
						|
                           special treatment, while all other fields are
 | 
						|
                           treated as unstructured.  This list will be
 | 
						|
                           completed before the extension is marked stable.)
 | 
						|
 | 
						|
    content_manager     -- an object with at least two methods: get_content
 | 
						|
                           and set_content.  When the get_content or
 | 
						|
                           set_content method of a Message object is called,
 | 
						|
                           it calls the corresponding method of this object,
 | 
						|
                           passing it the message object as its first argument,
 | 
						|
                           and any arguments or keywords that were passed to
 | 
						|
                           it as additional arguments.  The default
 | 
						|
                           content_manager is
 | 
						|
                           :data:`~email.contentmanager.raw_data_manager`.
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    message_factory = EmailMessage
 | 
						|
    utf8 = False
 | 
						|
    refold_source = 'long'
 | 
						|
    header_factory = HeaderRegistry()
 | 
						|
    content_manager = raw_data_manager
 | 
						|
 | 
						|
    def __init__(self, **kw):
 | 
						|
        # Ensure that each new instance gets a unique header factory
 | 
						|
        # (as opposed to clones, which share the factory).
 | 
						|
        if 'header_factory' not in kw:
 | 
						|
            object.__setattr__(self, 'header_factory', HeaderRegistry())
 | 
						|
        super().__init__(**kw)
 | 
						|
 | 
						|
    def header_max_count(self, name):
 | 
						|
        """+
 | 
						|
        The implementation for this class returns the max_count attribute from
 | 
						|
        the specialized header class that would be used to construct a header
 | 
						|
        of type 'name'.
 | 
						|
        """
 | 
						|
        return self.header_factory[name].max_count
 | 
						|
 | 
						|
    # The logic of the next three methods is chosen such that it is possible to
 | 
						|
    # switch a Message object between a Compat32 policy and a policy derived
 | 
						|
    # from this class and have the results stay consistent.  This allows a
 | 
						|
    # Message object constructed with this policy to be passed to a library
 | 
						|
    # that only handles Compat32 objects, or to receive such an object and
 | 
						|
    # convert it to use the newer style by just changing its policy.  It is
 | 
						|
    # also chosen because it postpones the relatively expensive full rfc5322
 | 
						|
    # parse until as late as possible when parsing from source, since in many
 | 
						|
    # applications only a few headers will actually be inspected.
 | 
						|
 | 
						|
    def header_source_parse(self, sourcelines):
 | 
						|
        """+
 | 
						|
        The name is parsed as everything up to the ':' and returned unmodified.
 | 
						|
        The value is determined by stripping leading whitespace off the
 | 
						|
        remainder of the first line, joining all subsequent lines together, and
 | 
						|
        stripping any trailing carriage return or linefeed characters.  (This
 | 
						|
        is the same as Compat32).
 | 
						|
 | 
						|
        """
 | 
						|
        name, value = sourcelines[0].split(':', 1)
 | 
						|
        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
 | 
						|
        return (name, value.rstrip('\r\n'))
 | 
						|
 | 
						|
    def header_store_parse(self, name, value):
 | 
						|
        """+
 | 
						|
        The name is returned unchanged.  If the input value has a 'name'
 | 
						|
        attribute and it matches the name ignoring case, the value is returned
 | 
						|
        unchanged.  Otherwise the name and value are passed to header_factory
 | 
						|
        method, and the resulting custom header object is returned as the
 | 
						|
        value.  In this case a ValueError is raised if the input value contains
 | 
						|
        CR or LF characters.
 | 
						|
 | 
						|
        """
 | 
						|
        if hasattr(value, 'name') and value.name.lower() == name.lower():
 | 
						|
            return (name, value)
 | 
						|
        if isinstance(value, str) and len(value.splitlines())>1:
 | 
						|
            # XXX this error message isn't quite right when we use splitlines
 | 
						|
            # (see issue 22233), but I'm not sure what should happen here.
 | 
						|
            raise ValueError("Header values may not contain linefeed "
 | 
						|
                             "or carriage return characters")
 | 
						|
        return (name, self.header_factory(name, value))
 | 
						|
 | 
						|
    def header_fetch_parse(self, name, value):
 | 
						|
        """+
 | 
						|
        If the value has a 'name' attribute, it is returned to unmodified.
 | 
						|
        Otherwise the name and the value with any linesep characters removed
 | 
						|
        are passed to the header_factory method, and the resulting custom
 | 
						|
        header object is returned.  Any surrogateescaped bytes get turned
 | 
						|
        into the unicode unknown-character glyph.
 | 
						|
 | 
						|
        """
 | 
						|
        if hasattr(value, 'name'):
 | 
						|
            return value
 | 
						|
        # We can't use splitlines here because it splits on more than \r and \n.
 | 
						|
        value = ''.join(linesep_splitter.split(value))
 | 
						|
        return self.header_factory(name, value)
 | 
						|
 | 
						|
    def fold(self, name, value):
 | 
						|
        """+
 | 
						|
        Header folding is controlled by the refold_source policy setting.  A
 | 
						|
        value is considered to be a 'source value' if and only if it does not
 | 
						|
        have a 'name' attribute (having a 'name' attribute means it is a header
 | 
						|
        object of some sort).  If a source value needs to be refolded according
 | 
						|
        to the policy, it is converted into a custom header object by passing
 | 
						|
        the name and the value with any linesep characters removed to the
 | 
						|
        header_factory method.  Folding of a custom header object is done by
 | 
						|
        calling its fold method with the current policy.
 | 
						|
 | 
						|
        Source values are split into lines using splitlines.  If the value is
 | 
						|
        not to be refolded, the lines are rejoined using the linesep from the
 | 
						|
        policy and returned.  The exception is lines containing non-ascii
 | 
						|
        binary data.  In that case the value is refolded regardless of the
 | 
						|
        refold_source setting, which causes the binary data to be CTE encoded
 | 
						|
        using the unknown-8bit charset.
 | 
						|
 | 
						|
        """
 | 
						|
        return self._fold(name, value, refold_binary=True)
 | 
						|
 | 
						|
    def fold_binary(self, name, value):
 | 
						|
        """+
 | 
						|
        The same as fold if cte_type is 7bit, except that the returned value is
 | 
						|
        bytes.
 | 
						|
 | 
						|
        If cte_type is 8bit, non-ASCII binary data is converted back into
 | 
						|
        bytes.  Headers with binary data are not refolded, regardless of the
 | 
						|
        refold_header setting, since there is no way to know whether the binary
 | 
						|
        data consists of single byte characters or multibyte characters.
 | 
						|
 | 
						|
        If utf8 is true, headers are encoded to utf8, otherwise to ascii with
 | 
						|
        non-ASCII unicode rendered as encoded words.
 | 
						|
 | 
						|
        """
 | 
						|
        folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
 | 
						|
        charset = 'utf8' if self.utf8 else 'ascii'
 | 
						|
        return folded.encode(charset, 'surrogateescape')
 | 
						|
 | 
						|
    def _fold(self, name, value, refold_binary=False):
 | 
						|
        if hasattr(value, 'name'):
 | 
						|
            return value.fold(policy=self)
 | 
						|
        maxlen = self.max_line_length if self.max_line_length else sys.maxsize
 | 
						|
        lines = value.splitlines()
 | 
						|
        refold = (self.refold_source == 'all' or
 | 
						|
                  self.refold_source == 'long' and
 | 
						|
                    (lines and len(lines[0])+len(name)+2 > maxlen or
 | 
						|
                     any(len(x) > maxlen for x in lines[1:])))
 | 
						|
        if refold or refold_binary and _has_surrogates(value):
 | 
						|
            return self.header_factory(name, ''.join(lines)).fold(policy=self)
 | 
						|
        return name + ': ' + self.linesep.join(lines) + self.linesep
 | 
						|
 | 
						|
 | 
						|
default = EmailPolicy()
 | 
						|
# Make the default policy use the class default header_factory
 | 
						|
del default.header_factory
 | 
						|
strict = default.clone(raise_on_defect=True)
 | 
						|
SMTP = default.clone(linesep='\r\n')
 | 
						|
HTTP = default.clone(linesep='\r\n', max_line_length=None)
 | 
						|
SMTPUTF8 = SMTP.clone(utf8=True)
 |