mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			861 lines
		
	
	
	
		
			33 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			861 lines
		
	
	
	
		
			33 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# Copyright (C) 2001-2007 Python Software Foundation
 | 
						||
# Author: Barry Warsaw
 | 
						||
# Contact: email-sig@python.org
 | 
						||
 | 
						||
"""Basic message object for the email package object model."""
 | 
						||
 | 
						||
__all__ = ['Message']
 | 
						||
 | 
						||
import re
 | 
						||
import uu
 | 
						||
import base64
 | 
						||
import binascii
 | 
						||
import warnings
 | 
						||
from io import BytesIO, StringIO
 | 
						||
 | 
						||
# Intrapackage imports
 | 
						||
from email import utils
 | 
						||
from email import errors
 | 
						||
from email import header
 | 
						||
from email import charset as _charset
 | 
						||
Charset = _charset.Charset
 | 
						||
 | 
						||
SEMISPACE = '; '
 | 
						||
 | 
						||
# Regular expression that matches `special' characters in parameters, the
 | 
						||
# existence of which force quoting of the parameter value.
 | 
						||
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
 | 
						||
 | 
						||
# How to figure out if we are processing strings that come from a byte
 | 
						||
# source with undecodable characters.
 | 
						||
_has_surrogates = re.compile(
 | 
						||
    '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
 | 
						||
 | 
						||
 | 
						||
# Helper functions
 | 
						||
def _sanitize_header(name, value):
 | 
						||
    # If the header value contains surrogates, return a Header using
 | 
						||
    # the unknown-8bit charset to encode the bytes as encoded words.
 | 
						||
    if not isinstance(value, str):
 | 
						||
        # Assume it is already a header object
 | 
						||
        return value
 | 
						||
    if _has_surrogates(value):
 | 
						||
        return header.Header(value, charset=_charset.UNKNOWN8BIT,
 | 
						||
                             header_name=name)
 | 
						||
    else:
 | 
						||
        return value
 | 
						||
 | 
						||
def _splitparam(param):
 | 
						||
    # Split header parameters.  BAW: this may be too simple.  It isn't
 | 
						||
    # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
 | 
						||
    # found in the wild.  We may eventually need a full fledged parser.
 | 
						||
    # RDM: we might have a Header here; for now just stringify it.
 | 
						||
    a, sep, b = str(param).partition(';')
 | 
						||
    if not sep:
 | 
						||
        return a.strip(), None
 | 
						||
    return a.strip(), b.strip()
 | 
						||
 | 
						||
def _formatparam(param, value=None, quote=True):
 | 
						||
    """Convenience function to format and return a key=value pair.
 | 
						||
 | 
						||
    This will quote the value if needed or if quote is true.  If value is a
 | 
						||
    three tuple (charset, language, value), it will be encoded according
 | 
						||
    to RFC2231 rules.  If it contains non-ascii characters it will likewise
 | 
						||
    be encoded according to RFC2231 rules, using the utf-8 charset and
 | 
						||
    a null language.
 | 
						||
    """
 | 
						||
    if value is not None and len(value) > 0:
 | 
						||
        # A tuple is used for RFC 2231 encoded parameter values where items
 | 
						||
        # are (charset, language, value).  charset is a string, not a Charset
 | 
						||
        # instance.  RFC 2231 encoded values are never quoted, per RFC.
 | 
						||
        if isinstance(value, tuple):
 | 
						||
            # Encode as per RFC 2231
 | 
						||
            param += '*'
 | 
						||
            value = utils.encode_rfc2231(value[2], value[0], value[1])
 | 
						||
            return '%s=%s' % (param, value)
 | 
						||
        else:
 | 
						||
            try:
 | 
						||
                value.encode('ascii')
 | 
						||
            except UnicodeEncodeError:
 | 
						||
                param += '*'
 | 
						||
                value = utils.encode_rfc2231(value, 'utf-8', '')
 | 
						||
                return '%s=%s' % (param, value)
 | 
						||
        # BAW: Please check this.  I think that if quote is set it should
 | 
						||
        # force quoting even if not necessary.
 | 
						||
        if quote or tspecials.search(value):
 | 
						||
            return '%s="%s"' % (param, utils.quote(value))
 | 
						||
        else:
 | 
						||
            return '%s=%s' % (param, value)
 | 
						||
    else:
 | 
						||
        return param
 | 
						||
 | 
						||
def _parseparam(s):
 | 
						||
    # RDM This might be a Header, so for now stringify it.
 | 
						||
    s = ';' + str(s)
 | 
						||
    plist = []
 | 
						||
    while s[:1] == ';':
 | 
						||
        s = s[1:]
 | 
						||
        end = s.find(';')
 | 
						||
        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
 | 
						||
            end = s.find(';', end + 1)
 | 
						||
        if end < 0:
 | 
						||
            end = len(s)
 | 
						||
        f = s[:end]
 | 
						||
        if '=' in f:
 | 
						||
            i = f.index('=')
 | 
						||
            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
 | 
						||
        plist.append(f.strip())
 | 
						||
        s = s[end:]
 | 
						||
    return plist
 | 
						||
 | 
						||
 | 
						||
def _unquotevalue(value):
 | 
						||
    # This is different than utils.collapse_rfc2231_value() because it doesn't
 | 
						||
    # try to convert the value to a unicode.  Message.get_param() and
 | 
						||
    # Message.get_params() are both currently defined to return the tuple in
 | 
						||
    # the face of RFC 2231 parameters.
 | 
						||
    if isinstance(value, tuple):
 | 
						||
        return value[0], value[1], utils.unquote(value[2])
 | 
						||
    else:
 | 
						||
        return utils.unquote(value)
 | 
						||
 | 
						||
 | 
						||
 | 
						||
class Message:
 | 
						||
    """Basic message object.
 | 
						||
 | 
						||
    A message object is defined as something that has a bunch of RFC 2822
 | 
						||
    headers and a payload.  It may optionally have an envelope header
 | 
						||
    (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
 | 
						||
    multipart or a message/rfc822), then the payload is a list of Message
 | 
						||
    objects, otherwise it is a string.
 | 
						||
 | 
						||
    Message objects implement part of the `mapping' interface, which assumes
 | 
						||
    there is exactly one occurrence of the header per message.  Some headers
 | 
						||
    do in fact appear multiple times (e.g. Received) and for those headers,
 | 
						||
    you must use the explicit API to set or get all the headers.  Not all of
 | 
						||
    the mapping methods are implemented.
 | 
						||
    """
 | 
						||
    def __init__(self):
 | 
						||
        self._headers = []
 | 
						||
        self._unixfrom = None
 | 
						||
        self._payload = None
 | 
						||
        self._charset = None
 | 
						||
        # Defaults for multipart messages
 | 
						||
        self.preamble = self.epilogue = None
 | 
						||
        self.defects = []
 | 
						||
        # Default content type
 | 
						||
        self._default_type = 'text/plain'
 | 
						||
 | 
						||
    def __str__(self):
 | 
						||
        """Return the entire formatted message as a string.
 | 
						||
        This includes the headers, body, and envelope header.
 | 
						||
        """
 | 
						||
        return self.as_string()
 | 
						||
 | 
						||
    def as_string(self, unixfrom=False, maxheaderlen=0):
 | 
						||
        """Return the entire formatted message as a string.
 | 
						||
        Optional `unixfrom' when True, means include the Unix From_ envelope
 | 
						||
        header.
 | 
						||
 | 
						||
        This is a convenience method and may not generate the message exactly
 | 
						||
        as you intend.  For more flexibility, use the flatten() method of a
 | 
						||
        Generator instance.
 | 
						||
        """
 | 
						||
        from email.generator import Generator
 | 
						||
        fp = StringIO()
 | 
						||
        g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
 | 
						||
        g.flatten(self, unixfrom=unixfrom)
 | 
						||
        return fp.getvalue()
 | 
						||
 | 
						||
    def is_multipart(self):
 | 
						||
        """Return True if the message consists of multiple parts."""
 | 
						||
        return isinstance(self._payload, list)
 | 
						||
 | 
						||
    #
 | 
						||
    # Unix From_ line
 | 
						||
    #
 | 
						||
    def set_unixfrom(self, unixfrom):
 | 
						||
        self._unixfrom = unixfrom
 | 
						||
 | 
						||
    def get_unixfrom(self):
 | 
						||
        return self._unixfrom
 | 
						||
 | 
						||
    #
 | 
						||
    # Payload manipulation.
 | 
						||
    #
 | 
						||
    def attach(self, payload):
 | 
						||
        """Add the given payload to the current payload.
 | 
						||
 | 
						||
        The current payload will always be a list of objects after this method
 | 
						||
        is called.  If you want to set the payload to a scalar object, use
 | 
						||
        set_payload() instead.
 | 
						||
        """
 | 
						||
        if self._payload is None:
 | 
						||
            self._payload = [payload]
 | 
						||
        else:
 | 
						||
            self._payload.append(payload)
 | 
						||
 | 
						||
    def get_payload(self, i=None, decode=False):
 | 
						||
        """Return a reference to the payload.
 | 
						||
 | 
						||
        The payload will either be a list object or a string.  If you mutate
 | 
						||
        the list object, you modify the message's payload in place.  Optional
 | 
						||
        i returns that index into the payload.
 | 
						||
 | 
						||
        Optional decode is a flag indicating whether the payload should be
 | 
						||
        decoded or not, according to the Content-Transfer-Encoding header
 | 
						||
        (default is False).
 | 
						||
 | 
						||
        When True and the message is not a multipart, the payload will be
 | 
						||
        decoded if this header's value is `quoted-printable' or `base64'.  If
 | 
						||
        some other encoding is used, or the header is missing, or if the
 | 
						||
        payload has bogus data (i.e. bogus base64 or uuencoded data), the
 | 
						||
        payload is returned as-is.
 | 
						||
 | 
						||
        If the message is a multipart and the decode flag is True, then None
 | 
						||
        is returned.
 | 
						||
        """
 | 
						||
        # Here is the logic table for this code, based on the email5.0.0 code:
 | 
						||
        #   i     decode  is_multipart  result
 | 
						||
        # ------  ------  ------------  ------------------------------
 | 
						||
        #  None   True    True          None
 | 
						||
        #   i     True    True          None
 | 
						||
        #  None   False   True          _payload (a list)
 | 
						||
        #   i     False   True          _payload element i (a Message)
 | 
						||
        #   i     False   False         error (not a list)
 | 
						||
        #   i     True    False         error (not a list)
 | 
						||
        #  None   False   False         _payload
 | 
						||
        #  None   True    False         _payload decoded (bytes)
 | 
						||
        # Note that Barry planned to factor out the 'decode' case, but that
 | 
						||
        # isn't so easy now that we handle the 8 bit data, which needs to be
 | 
						||
        # converted in both the decode and non-decode path.
 | 
						||
        if self.is_multipart():
 | 
						||
            if decode:
 | 
						||
                return None
 | 
						||
            if i is None:
 | 
						||
                return self._payload
 | 
						||
            else:
 | 
						||
                return self._payload[i]
 | 
						||
        # For backward compatibility, Use isinstance and this error message
 | 
						||
        # instead of the more logical is_multipart test.
 | 
						||
        if i is not None and not isinstance(self._payload, list):
 | 
						||
            raise TypeError('Expected list, got %s' % type(self._payload))
 | 
						||
        payload = self._payload
 | 
						||
        # cte might be a Header, so for now stringify it.
 | 
						||
        cte = str(self.get('content-transfer-encoding', '')).lower()
 | 
						||
        # payload may be bytes here.
 | 
						||
        if isinstance(payload, str):
 | 
						||
            if _has_surrogates(payload):
 | 
						||
                bpayload = payload.encode('ascii', 'surrogateescape')
 | 
						||
                if not decode:
 | 
						||
                    try:
 | 
						||
                        payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
 | 
						||
                    except LookupError:
 | 
						||
                        payload = bpayload.decode('ascii', 'replace')
 | 
						||
            elif decode:
 | 
						||
                try:
 | 
						||
                    bpayload = payload.encode('ascii')
 | 
						||
                except UnicodeError:
 | 
						||
                    # This won't happen for RFC compliant messages (messages
 | 
						||
                    # containing only ASCII codepoints in the unicode input).
 | 
						||
                    # If it does happen, turn the string into bytes in a way
 | 
						||
                    # guaranteed not to fail.
 | 
						||
                    bpayload = payload.encode('raw-unicode-escape')
 | 
						||
        if not decode:
 | 
						||
            return payload
 | 
						||
        if cte == 'quoted-printable':
 | 
						||
            return utils._qdecode(bpayload)
 | 
						||
        elif cte == 'base64':
 | 
						||
            try:
 | 
						||
                return base64.b64decode(bpayload)
 | 
						||
            except binascii.Error:
 | 
						||
                # Incorrect padding
 | 
						||
                return bpayload
 | 
						||
        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
 | 
						||
            in_file = BytesIO(bpayload)
 | 
						||
            out_file = BytesIO()
 | 
						||
            try:
 | 
						||
                uu.decode(in_file, out_file, quiet=True)
 | 
						||
                return out_file.getvalue()
 | 
						||
            except uu.Error:
 | 
						||
                # Some decoding problem
 | 
						||
                return bpayload
 | 
						||
        if isinstance(payload, str):
 | 
						||
            return bpayload
 | 
						||
        return payload
 | 
						||
 | 
						||
    def set_payload(self, payload, charset=None):
 | 
						||
        """Set the payload to the given value.
 | 
						||
 | 
						||
        Optional charset sets the message's default character set.  See
 | 
						||
        set_charset() for details.
 | 
						||
        """
 | 
						||
        self._payload = payload
 | 
						||
        if charset is not None:
 | 
						||
            self.set_charset(charset)
 | 
						||
 | 
						||
    def set_charset(self, charset):
 | 
						||
        """Set the charset of the payload to a given character set.
 | 
						||
 | 
						||
        charset can be a Charset instance, a string naming a character set, or
 | 
						||
        None.  If it is a string it will be converted to a Charset instance.
 | 
						||
        If charset is None, the charset parameter will be removed from the
 | 
						||
        Content-Type field.  Anything else will generate a TypeError.
 | 
						||
 | 
						||
        The message will be assumed to be of type text/* encoded with
 | 
						||
        charset.input_charset.  It will be converted to charset.output_charset
 | 
						||
        and encoded properly, if needed, when generating the plain text
 | 
						||
        representation of the message.  MIME headers (MIME-Version,
 | 
						||
        Content-Type, Content-Transfer-Encoding) will be added as needed.
 | 
						||
        """
 | 
						||
        if charset is None:
 | 
						||
            self.del_param('charset')
 | 
						||
            self._charset = None
 | 
						||
            return
 | 
						||
        if not isinstance(charset, Charset):
 | 
						||
            charset = Charset(charset)
 | 
						||
        self._charset = charset
 | 
						||
        if 'MIME-Version' not in self:
 | 
						||
            self.add_header('MIME-Version', '1.0')
 | 
						||
        if 'Content-Type' not in self:
 | 
						||
            self.add_header('Content-Type', 'text/plain',
 | 
						||
                            charset=charset.get_output_charset())
 | 
						||
        else:
 | 
						||
            self.set_param('charset', charset.get_output_charset())
 | 
						||
        if charset != charset.get_output_charset():
 | 
						||
            self._payload = charset.body_encode(self._payload)
 | 
						||
        if 'Content-Transfer-Encoding' not in self:
 | 
						||
            cte = charset.get_body_encoding()
 | 
						||
            try:
 | 
						||
                cte(self)
 | 
						||
            except TypeError:
 | 
						||
                self._payload = charset.body_encode(self._payload)
 | 
						||
                self.add_header('Content-Transfer-Encoding', cte)
 | 
						||
 | 
						||
    def get_charset(self):
 | 
						||
        """Return the Charset instance associated with the message's payload.
 | 
						||
        """
 | 
						||
        return self._charset
 | 
						||
 | 
						||
    #
 | 
						||
    # MAPPING INTERFACE (partial)
 | 
						||
    #
 | 
						||
    def __len__(self):
 | 
						||
        """Return the total number of headers, including duplicates."""
 | 
						||
        return len(self._headers)
 | 
						||
 | 
						||
    def __getitem__(self, name):
 | 
						||
        """Get a header value.
 | 
						||
 | 
						||
        Return None if the header is missing instead of raising an exception.
 | 
						||
 | 
						||
        Note that if the header appeared multiple times, exactly which
 | 
						||
        occurrence gets returned is undefined.  Use get_all() to get all
 | 
						||
        the values matching a header field name.
 | 
						||
        """
 | 
						||
        return self.get(name)
 | 
						||
 | 
						||
    def __setitem__(self, name, val):
 | 
						||
        """Set the value of a header.
 | 
						||
 | 
						||
        Note: this does not overwrite an existing header with the same field
 | 
						||
        name.  Use __delitem__() first to delete any existing headers.
 | 
						||
        """
 | 
						||
        self._headers.append((name, val))
 | 
						||
 | 
						||
    def __delitem__(self, name):
 | 
						||
        """Delete all occurrences of a header, if present.
 | 
						||
 | 
						||
        Does not raise an exception if the header is missing.
 | 
						||
        """
 | 
						||
        name = name.lower()
 | 
						||
        newheaders = []
 | 
						||
        for k, v in self._headers:
 | 
						||
            if k.lower() != name:
 | 
						||
                newheaders.append((k, v))
 | 
						||
        self._headers = newheaders
 | 
						||
 | 
						||
    def __contains__(self, name):
 | 
						||
        return name.lower() in [k.lower() for k, v in self._headers]
 | 
						||
 | 
						||
    def __iter__(self):
 | 
						||
        for field, value in self._headers:
 | 
						||
            yield field
 | 
						||
 | 
						||
    def keys(self):
 | 
						||
        """Return a list of all the message's header field names.
 | 
						||
 | 
						||
        These will be sorted in the order they appeared in the original
 | 
						||
        message, or were added to the message, and may contain duplicates.
 | 
						||
        Any fields deleted and re-inserted are always appended to the header
 | 
						||
        list.
 | 
						||
        """
 | 
						||
        return [k for k, v in self._headers]
 | 
						||
 | 
						||
    def values(self):
 | 
						||
        """Return a list of all the message's header values.
 | 
						||
 | 
						||
        These will be sorted in the order they appeared in the original
 | 
						||
        message, or were added to the message, and may contain duplicates.
 | 
						||
        Any fields deleted and re-inserted are always appended to the header
 | 
						||
        list.
 | 
						||
        """
 | 
						||
        return [_sanitize_header(k, v) for k, v in self._headers]
 | 
						||
 | 
						||
    def items(self):
 | 
						||
        """Get all the message's header fields and values.
 | 
						||
 | 
						||
        These will be sorted in the order they appeared in the original
 | 
						||
        message, or were added to the message, and may contain duplicates.
 | 
						||
        Any fields deleted and re-inserted are always appended to the header
 | 
						||
        list.
 | 
						||
        """
 | 
						||
        return [(k, _sanitize_header(k, v)) for k, v in self._headers]
 | 
						||
 | 
						||
    def get(self, name, failobj=None):
 | 
						||
        """Get a header value.
 | 
						||
 | 
						||
        Like __getitem__() but return failobj instead of None when the field
 | 
						||
        is missing.
 | 
						||
        """
 | 
						||
        name = name.lower()
 | 
						||
        for k, v in self._headers:
 | 
						||
            if k.lower() == name:
 | 
						||
                return _sanitize_header(k, v)
 | 
						||
        return failobj
 | 
						||
 | 
						||
    #
 | 
						||
    # Additional useful stuff
 | 
						||
    #
 | 
						||
 | 
						||
    def get_all(self, name, failobj=None):
 | 
						||
        """Return a list of all the values for the named field.
 | 
						||
 | 
						||
        These will be sorted in the order they appeared in the original
 | 
						||
        message, and may contain duplicates.  Any fields deleted and
 | 
						||
        re-inserted are always appended to the header list.
 | 
						||
 | 
						||
        If no such fields exist, failobj is returned (defaults to None).
 | 
						||
        """
 | 
						||
        values = []
 | 
						||
        name = name.lower()
 | 
						||
        for k, v in self._headers:
 | 
						||
            if k.lower() == name:
 | 
						||
                values.append(_sanitize_header(k, v))
 | 
						||
        if not values:
 | 
						||
            return failobj
 | 
						||
        return values
 | 
						||
 | 
						||
    def add_header(self, _name, _value, **_params):
 | 
						||
        """Extended header setting.
 | 
						||
 | 
						||
        name is the header field to add.  keyword arguments can be used to set
 | 
						||
        additional parameters for the header field, with underscores converted
 | 
						||
        to dashes.  Normally the parameter will be added as key="value" unless
 | 
						||
        value is None, in which case only the key will be added.  If a
 | 
						||
        parameter value contains non-ASCII characters it can be specified as a
 | 
						||
        three-tuple of (charset, language, value), in which case it will be
 | 
						||
        encoded according to RFC2231 rules.  Otherwise it will be encoded using
 | 
						||
        the utf-8 charset and a language of ''.
 | 
						||
 | 
						||
        Examples:
 | 
						||
 | 
						||
        msg.add_header('content-disposition', 'attachment', filename='bud.gif')
 | 
						||
        msg.add_header('content-disposition', 'attachment',
 | 
						||
                       filename=('utf-8', '', Fußballer.ppt'))
 | 
						||
        msg.add_header('content-disposition', 'attachment',
 | 
						||
                       filename='Fußballer.ppt'))
 | 
						||
        """
 | 
						||
        parts = []
 | 
						||
        for k, v in _params.items():
 | 
						||
            if v is None:
 | 
						||
                parts.append(k.replace('_', '-'))
 | 
						||
            else:
 | 
						||
                parts.append(_formatparam(k.replace('_', '-'), v))
 | 
						||
        if _value is not None:
 | 
						||
            parts.insert(0, _value)
 | 
						||
        self._headers.append((_name, SEMISPACE.join(parts)))
 | 
						||
 | 
						||
    def replace_header(self, _name, _value):
 | 
						||
        """Replace a header.
 | 
						||
 | 
						||
        Replace the first matching header found in the message, retaining
 | 
						||
        header order and case.  If no matching header was found, a KeyError is
 | 
						||
        raised.
 | 
						||
        """
 | 
						||
        _name = _name.lower()
 | 
						||
        for i, (k, v) in zip(range(len(self._headers)), self._headers):
 | 
						||
            if k.lower() == _name:
 | 
						||
                self._headers[i] = (k, _value)
 | 
						||
                break
 | 
						||
        else:
 | 
						||
            raise KeyError(_name)
 | 
						||
 | 
						||
    #
 | 
						||
    # Use these three methods instead of the three above.
 | 
						||
    #
 | 
						||
 | 
						||
    def get_content_type(self):
 | 
						||
        """Return the message's content type.
 | 
						||
 | 
						||
        The returned string is coerced to lower case of the form
 | 
						||
        `maintype/subtype'.  If there was no Content-Type header in the
 | 
						||
        message, the default type as given by get_default_type() will be
 | 
						||
        returned.  Since according to RFC 2045, messages always have a default
 | 
						||
        type this will always return a value.
 | 
						||
 | 
						||
        RFC 2045 defines a message's default type to be text/plain unless it
 | 
						||
        appears inside a multipart/digest container, in which case it would be
 | 
						||
        message/rfc822.
 | 
						||
        """
 | 
						||
        missing = object()
 | 
						||
        value = self.get('content-type', missing)
 | 
						||
        if value is missing:
 | 
						||
            # This should have no parameters
 | 
						||
            return self.get_default_type()
 | 
						||
        ctype = _splitparam(value)[0].lower()
 | 
						||
        # RFC 2045, section 5.2 says if its invalid, use text/plain
 | 
						||
        if ctype.count('/') != 1:
 | 
						||
            return 'text/plain'
 | 
						||
        return ctype
 | 
						||
 | 
						||
    def get_content_maintype(self):
 | 
						||
        """Return the message's main content type.
 | 
						||
 | 
						||
        This is the `maintype' part of the string returned by
 | 
						||
        get_content_type().
 | 
						||
        """
 | 
						||
        ctype = self.get_content_type()
 | 
						||
        return ctype.split('/')[0]
 | 
						||
 | 
						||
    def get_content_subtype(self):
 | 
						||
        """Returns the message's sub-content type.
 | 
						||
 | 
						||
        This is the `subtype' part of the string returned by
 | 
						||
        get_content_type().
 | 
						||
        """
 | 
						||
        ctype = self.get_content_type()
 | 
						||
        return ctype.split('/')[1]
 | 
						||
 | 
						||
    def get_default_type(self):
 | 
						||
        """Return the `default' content type.
 | 
						||
 | 
						||
        Most messages have a default content type of text/plain, except for
 | 
						||
        messages that are subparts of multipart/digest containers.  Such
 | 
						||
        subparts have a default content type of message/rfc822.
 | 
						||
        """
 | 
						||
        return self._default_type
 | 
						||
 | 
						||
    def set_default_type(self, ctype):
 | 
						||
        """Set the `default' content type.
 | 
						||
 | 
						||
        ctype should be either "text/plain" or "message/rfc822", although this
 | 
						||
        is not enforced.  The default content type is not stored in the
 | 
						||
        Content-Type header.
 | 
						||
        """
 | 
						||
        self._default_type = ctype
 | 
						||
 | 
						||
    def _get_params_preserve(self, failobj, header):
 | 
						||
        # Like get_params() but preserves the quoting of values.  BAW:
 | 
						||
        # should this be part of the public interface?
 | 
						||
        missing = object()
 | 
						||
        value = self.get(header, missing)
 | 
						||
        if value is missing:
 | 
						||
            return failobj
 | 
						||
        params = []
 | 
						||
        for p in _parseparam(value):
 | 
						||
            try:
 | 
						||
                name, val = p.split('=', 1)
 | 
						||
                name = name.strip()
 | 
						||
                val = val.strip()
 | 
						||
            except ValueError:
 | 
						||
                # Must have been a bare attribute
 | 
						||
                name = p.strip()
 | 
						||
                val = ''
 | 
						||
            params.append((name, val))
 | 
						||
        params = utils.decode_params(params)
 | 
						||
        return params
 | 
						||
 | 
						||
    def get_params(self, failobj=None, header='content-type', unquote=True):
 | 
						||
        """Return the message's Content-Type parameters, as a list.
 | 
						||
 | 
						||
        The elements of the returned list are 2-tuples of key/value pairs, as
 | 
						||
        split on the `=' sign.  The left hand side of the `=' is the key,
 | 
						||
        while the right hand side is the value.  If there is no `=' sign in
 | 
						||
        the parameter the value is the empty string.  The value is as
 | 
						||
        described in the get_param() method.
 | 
						||
 | 
						||
        Optional failobj is the object to return if there is no Content-Type
 | 
						||
        header.  Optional header is the header to search instead of
 | 
						||
        Content-Type.  If unquote is True, the value is unquoted.
 | 
						||
        """
 | 
						||
        missing = object()
 | 
						||
        params = self._get_params_preserve(missing, header)
 | 
						||
        if params is missing:
 | 
						||
            return failobj
 | 
						||
        if unquote:
 | 
						||
            return [(k, _unquotevalue(v)) for k, v in params]
 | 
						||
        else:
 | 
						||
            return params
 | 
						||
 | 
						||
    def get_param(self, param, failobj=None, header='content-type',
 | 
						||
                  unquote=True):
 | 
						||
        """Return the parameter value if found in the Content-Type header.
 | 
						||
 | 
						||
        Optional failobj is the object to return if there is no Content-Type
 | 
						||
        header, or the Content-Type header has no such parameter.  Optional
 | 
						||
        header is the header to search instead of Content-Type.
 | 
						||
 | 
						||
        Parameter keys are always compared case insensitively.  The return
 | 
						||
        value can either be a string, or a 3-tuple if the parameter was RFC
 | 
						||
        2231 encoded.  When it's a 3-tuple, the elements of the value are of
 | 
						||
        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
 | 
						||
        LANGUAGE can be None, in which case you should consider VALUE to be
 | 
						||
        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
 | 
						||
        The parameter value (either the returned string, or the VALUE item in
 | 
						||
        the 3-tuple) is always unquoted, unless unquote is set to False.
 | 
						||
 | 
						||
        If your application doesn't care whether the parameter was RFC 2231
 | 
						||
        encoded, it can turn the return value into a string as follows:
 | 
						||
 | 
						||
            param = msg.get_param('foo')
 | 
						||
            param = email.utils.collapse_rfc2231_value(rawparam)
 | 
						||
 | 
						||
        """
 | 
						||
        if header not in self:
 | 
						||
            return failobj
 | 
						||
        for k, v in self._get_params_preserve(failobj, header):
 | 
						||
            if k.lower() == param.lower():
 | 
						||
                if unquote:
 | 
						||
                    return _unquotevalue(v)
 | 
						||
                else:
 | 
						||
                    return v
 | 
						||
        return failobj
 | 
						||
 | 
						||
    def set_param(self, param, value, header='Content-Type', requote=True,
 | 
						||
                  charset=None, language=''):
 | 
						||
        """Set a parameter in the Content-Type header.
 | 
						||
 | 
						||
        If the parameter already exists in the header, its value will be
 | 
						||
        replaced with the new value.
 | 
						||
 | 
						||
        If header is Content-Type and has not yet been defined for this
 | 
						||
        message, it will be set to "text/plain" and the new parameter and
 | 
						||
        value will be appended as per RFC 2045.
 | 
						||
 | 
						||
        An alternate header can specified in the header argument, and all
 | 
						||
        parameters will be quoted as necessary unless requote is False.
 | 
						||
 | 
						||
        If charset is specified, the parameter will be encoded according to RFC
 | 
						||
        2231.  Optional language specifies the RFC 2231 language, defaulting
 | 
						||
        to the empty string.  Both charset and language should be strings.
 | 
						||
        """
 | 
						||
        if not isinstance(value, tuple) and charset:
 | 
						||
            value = (charset, language, value)
 | 
						||
 | 
						||
        if header not in self and header.lower() == 'content-type':
 | 
						||
            ctype = 'text/plain'
 | 
						||
        else:
 | 
						||
            ctype = self.get(header)
 | 
						||
        if not self.get_param(param, header=header):
 | 
						||
            if not ctype:
 | 
						||
                ctype = _formatparam(param, value, requote)
 | 
						||
            else:
 | 
						||
                ctype = SEMISPACE.join(
 | 
						||
                    [ctype, _formatparam(param, value, requote)])
 | 
						||
        else:
 | 
						||
            ctype = ''
 | 
						||
            for old_param, old_value in self.get_params(header=header,
 | 
						||
                                                        unquote=requote):
 | 
						||
                append_param = ''
 | 
						||
                if old_param.lower() == param.lower():
 | 
						||
                    append_param = _formatparam(param, value, requote)
 | 
						||
                else:
 | 
						||
                    append_param = _formatparam(old_param, old_value, requote)
 | 
						||
                if not ctype:
 | 
						||
                    ctype = append_param
 | 
						||
                else:
 | 
						||
                    ctype = SEMISPACE.join([ctype, append_param])
 | 
						||
        if ctype != self.get(header):
 | 
						||
            del self[header]
 | 
						||
            self[header] = ctype
 | 
						||
 | 
						||
    def del_param(self, param, header='content-type', requote=True):
 | 
						||
        """Remove the given parameter completely from the Content-Type header.
 | 
						||
 | 
						||
        The header will be re-written in place without the parameter or its
 | 
						||
        value. All values will be quoted as necessary unless requote is
 | 
						||
        False.  Optional header specifies an alternative to the Content-Type
 | 
						||
        header.
 | 
						||
        """
 | 
						||
        if header not in self:
 | 
						||
            return
 | 
						||
        new_ctype = ''
 | 
						||
        for p, v in self.get_params(header=header, unquote=requote):
 | 
						||
            if p.lower() != param.lower():
 | 
						||
                if not new_ctype:
 | 
						||
                    new_ctype = _formatparam(p, v, requote)
 | 
						||
                else:
 | 
						||
                    new_ctype = SEMISPACE.join([new_ctype,
 | 
						||
                                                _formatparam(p, v, requote)])
 | 
						||
        if new_ctype != self.get(header):
 | 
						||
            del self[header]
 | 
						||
            self[header] = new_ctype
 | 
						||
 | 
						||
    def set_type(self, type, header='Content-Type', requote=True):
 | 
						||
        """Set the main type and subtype for the Content-Type header.
 | 
						||
 | 
						||
        type must be a string in the form "maintype/subtype", otherwise a
 | 
						||
        ValueError is raised.
 | 
						||
 | 
						||
        This method replaces the Content-Type header, keeping all the
 | 
						||
        parameters in place.  If requote is False, this leaves the existing
 | 
						||
        header's quoting as is.  Otherwise, the parameters will be quoted (the
 | 
						||
        default).
 | 
						||
 | 
						||
        An alternative header can be specified in the header argument.  When
 | 
						||
        the Content-Type header is set, we'll always also add a MIME-Version
 | 
						||
        header.
 | 
						||
        """
 | 
						||
        # BAW: should we be strict?
 | 
						||
        if not type.count('/') == 1:
 | 
						||
            raise ValueError
 | 
						||
        # Set the Content-Type, you get a MIME-Version
 | 
						||
        if header.lower() == 'content-type':
 | 
						||
            del self['mime-version']
 | 
						||
            self['MIME-Version'] = '1.0'
 | 
						||
        if header not in self:
 | 
						||
            self[header] = type
 | 
						||
            return
 | 
						||
        params = self.get_params(header=header, unquote=requote)
 | 
						||
        del self[header]
 | 
						||
        self[header] = type
 | 
						||
        # Skip the first param; it's the old type.
 | 
						||
        for p, v in params[1:]:
 | 
						||
            self.set_param(p, v, header, requote)
 | 
						||
 | 
						||
    def get_filename(self, failobj=None):
 | 
						||
        """Return the filename associated with the payload if present.
 | 
						||
 | 
						||
        The filename is extracted from the Content-Disposition header's
 | 
						||
        `filename' parameter, and it is unquoted.  If that header is missing
 | 
						||
        the `filename' parameter, this method falls back to looking for the
 | 
						||
        `name' parameter.
 | 
						||
        """
 | 
						||
        missing = object()
 | 
						||
        filename = self.get_param('filename', missing, 'content-disposition')
 | 
						||
        if filename is missing:
 | 
						||
            filename = self.get_param('name', missing, 'content-type')
 | 
						||
        if filename is missing:
 | 
						||
            return failobj
 | 
						||
        return utils.collapse_rfc2231_value(filename).strip()
 | 
						||
 | 
						||
    def get_boundary(self, failobj=None):
 | 
						||
        """Return the boundary associated with the payload if present.
 | 
						||
 | 
						||
        The boundary is extracted from the Content-Type header's `boundary'
 | 
						||
        parameter, and it is unquoted.
 | 
						||
        """
 | 
						||
        missing = object()
 | 
						||
        boundary = self.get_param('boundary', missing)
 | 
						||
        if boundary is missing:
 | 
						||
            return failobj
 | 
						||
        # RFC 2046 says that boundaries may begin but not end in w/s
 | 
						||
        return utils.collapse_rfc2231_value(boundary).rstrip()
 | 
						||
 | 
						||
    def set_boundary(self, boundary):
 | 
						||
        """Set the boundary parameter in Content-Type to 'boundary'.
 | 
						||
 | 
						||
        This is subtly different than deleting the Content-Type header and
 | 
						||
        adding a new one with a new boundary parameter via add_header().  The
 | 
						||
        main difference is that using the set_boundary() method preserves the
 | 
						||
        order of the Content-Type header in the original message.
 | 
						||
 | 
						||
        HeaderParseError is raised if the message has no Content-Type header.
 | 
						||
        """
 | 
						||
        missing = object()
 | 
						||
        params = self._get_params_preserve(missing, 'content-type')
 | 
						||
        if params is missing:
 | 
						||
            # There was no Content-Type header, and we don't know what type
 | 
						||
            # to set it to, so raise an exception.
 | 
						||
            raise errors.HeaderParseError('No Content-Type header found')
 | 
						||
        newparams = []
 | 
						||
        foundp = False
 | 
						||
        for pk, pv in params:
 | 
						||
            if pk.lower() == 'boundary':
 | 
						||
                newparams.append(('boundary', '"%s"' % boundary))
 | 
						||
                foundp = True
 | 
						||
            else:
 | 
						||
                newparams.append((pk, pv))
 | 
						||
        if not foundp:
 | 
						||
            # The original Content-Type header had no boundary attribute.
 | 
						||
            # Tack one on the end.  BAW: should we raise an exception
 | 
						||
            # instead???
 | 
						||
            newparams.append(('boundary', '"%s"' % boundary))
 | 
						||
        # Replace the existing Content-Type header with the new value
 | 
						||
        newheaders = []
 | 
						||
        for h, v in self._headers:
 | 
						||
            if h.lower() == 'content-type':
 | 
						||
                parts = []
 | 
						||
                for k, v in newparams:
 | 
						||
                    if v == '':
 | 
						||
                        parts.append(k)
 | 
						||
                    else:
 | 
						||
                        parts.append('%s=%s' % (k, v))
 | 
						||
                newheaders.append((h, SEMISPACE.join(parts)))
 | 
						||
 | 
						||
            else:
 | 
						||
                newheaders.append((h, v))
 | 
						||
        self._headers = newheaders
 | 
						||
 | 
						||
    def get_content_charset(self, failobj=None):
 | 
						||
        """Return the charset parameter of the Content-Type header.
 | 
						||
 | 
						||
        The returned string is always coerced to lower case.  If there is no
 | 
						||
        Content-Type header, or if that header has no charset parameter,
 | 
						||
        failobj is returned.
 | 
						||
        """
 | 
						||
        missing = object()
 | 
						||
        charset = self.get_param('charset', missing)
 | 
						||
        if charset is missing:
 | 
						||
            return failobj
 | 
						||
        if isinstance(charset, tuple):
 | 
						||
            # RFC 2231 encoded, so decode it, and it better end up as ascii.
 | 
						||
            pcharset = charset[0] or 'us-ascii'
 | 
						||
            try:
 | 
						||
                # LookupError will be raised if the charset isn't known to
 | 
						||
                # Python.  UnicodeError will be raised if the encoded text
 | 
						||
                # contains a character not in the charset.
 | 
						||
                as_bytes = charset[2].encode('raw-unicode-escape')
 | 
						||
                charset = str(as_bytes, pcharset)
 | 
						||
            except (LookupError, UnicodeError):
 | 
						||
                charset = charset[2]
 | 
						||
        # charset characters must be in us-ascii range
 | 
						||
        try:
 | 
						||
            charset.encode('us-ascii')
 | 
						||
        except UnicodeError:
 | 
						||
            return failobj
 | 
						||
        # RFC 2046, $4.1.2 says charsets are not case sensitive
 | 
						||
        return charset.lower()
 | 
						||
 | 
						||
    def get_charsets(self, failobj=None):
 | 
						||
        """Return a list containing the charset(s) used in this message.
 | 
						||
 | 
						||
        The returned list of items describes the Content-Type headers'
 | 
						||
        charset parameter for this message and all the subparts in its
 | 
						||
        payload.
 | 
						||
 | 
						||
        Each item will either be a string (the value of the charset parameter
 | 
						||
        in the Content-Type header of that part) or the value of the
 | 
						||
        'failobj' parameter (defaults to None), if the part does not have a
 | 
						||
        main MIME type of "text", or the charset is not defined.
 | 
						||
 | 
						||
        The list will contain one string for each part of the message, plus
 | 
						||
        one for the container message (i.e. self), so that a non-multipart
 | 
						||
        message will still return a list of length 1.
 | 
						||
        """
 | 
						||
        return [part.get_content_charset(failobj) for part in self.walk()]
 | 
						||
 | 
						||
    # I.e. def walk(self): ...
 | 
						||
    from email.iterators import walk
 |