mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			268 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			268 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| \section{\module{rfc822} ---
 | |
|          Parse RFC 822 mail headers}
 | |
| 
 | |
| \declaremodule{standard}{rfc822}
 | |
| \modulesynopsis{Parse \rfc{822} style mail headers.}
 | |
| 
 | |
| This module defines a class, \class{Message}, which represents a
 | |
| collection of ``email headers'' as defined by the Internet standard
 | |
| \rfc{822}.  It is used in various contexts, usually to read such
 | |
| headers from a file.  This module also defines a helper class 
 | |
| \class{AddressList} for parsing \rfc{822} addresses.  Please refer to
 | |
| the RFC for information on the specific syntax of \rfc{822} headers.
 | |
| 
 | |
| The \refmodule{mailbox}\refstmodindex{mailbox} module provides classes 
 | |
| to read mailboxes produced by various end-user mail programs.
 | |
| 
 | |
| \begin{classdesc}{Message}{file\optional{, seekable}}
 | |
| A \class{Message} instance is instantiated with an input object as
 | |
| parameter.  Message relies only on the input object having a
 | |
| \method{readline()} method; in particular, ordinary file objects
 | |
| qualify.  Instantiation reads headers from the input object up to a
 | |
| delimiter line (normally a blank line) and stores them in the
 | |
| instance.
 | |
| 
 | |
| This class can work with any input object that supports a
 | |
| \method{readline()} method.  If the input object has seek and tell
 | |
| capability, the \method{rewindbody()} method will work; also, illegal
 | |
| lines will be pushed back onto the input stream.  If the input object
 | |
| lacks seek but has an \method{unread()} method that can push back a
 | |
| line of input, \class{Message} will use that to push back illegal
 | |
| lines.  Thus this class can be used to parse messages coming from a
 | |
| buffered stream.
 | |
| 
 | |
| The optional \var{seekable} argument is provided as a workaround for
 | |
| certain stdio libraries in which \cfunction{tell()} discards buffered
 | |
| data before discovering that the \cfunction{lseek()} system call
 | |
| doesn't work.  For maximum portability, you should set the seekable
 | |
| argument to zero to prevent that initial \method{tell()} when passing
 | |
| in an unseekable object such as a a file object created from a socket
 | |
| object.
 | |
| 
 | |
| Input lines as read from the file may either be terminated by CR-LF or
 | |
| by a single linefeed; a terminating CR-LF is replaced by a single
 | |
| linefeed before the line is stored.
 | |
| 
 | |
| All header matching is done independent of upper or lower case;
 | |
| e.g.\ \code{\var{m}['From']}, \code{\var{m}['from']} and
 | |
| \code{\var{m}['FROM']} all yield the same result.
 | |
| \end{classdesc}
 | |
| 
 | |
| \begin{classdesc}{AddressList}{field}
 | |
| You may instantiate the \class{AddressList} helper class using a single
 | |
| string parameter, a comma-separated list of \rfc{822} addresses to be
 | |
| parsed.  (The parameter \code{None} yields an empty list.)
 | |
| \end{classdesc}
 | |
| 
 | |
| \begin{funcdesc}{parsedate}{date}
 | |
| Attempts to parse a date according to the rules in \rfc{822}.
 | |
| however, some mailers don't follow that format as specified, so
 | |
| \function{parsedate()} tries to guess correctly in such cases. 
 | |
| \var{date} is a string containing an \rfc{822} date, such as 
 | |
| \code{'Mon, 20 Nov 1995 19:12:08 -0500'}.  If it succeeds in parsing
 | |
| the date, \function{parsedate()} returns a 9-tuple that can be passed
 | |
| directly to \function{time.mktime()}; otherwise \code{None} will be
 | |
| returned.  Note that fields 6, 7, and 8 of the result tuple are not
 | |
| usable.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{parsedate_tz}{date}
 | |
| Performs the same function as \function{parsedate()}, but returns
 | |
| either \code{None} or a 10-tuple; the first 9 elements make up a tuple
 | |
| that can be passed directly to \function{time.mktime()}, and the tenth
 | |
| is the offset of the date's timezone from UTC (which is the official
 | |
| term for Greenwich Mean Time).  (Note that the sign of the timezone
 | |
| offset is the opposite of the sign of the \code{time.timezone}
 | |
| variable for the same timezone; the latter variable follows the
 | |
| \POSIX{} standard while this module follows \rfc{822}.)  If the input
 | |
| string has no timezone, the last element of the tuple returned is
 | |
| \code{None}.  Note that fields 6, 7, and 8 of the result tuple are not
 | |
| usable.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{mktime_tz}{tuple}
 | |
| Turn a 10-tuple as returned by \function{parsedate_tz()} into a UTC
 | |
| timestamp.  It the timezone item in the tuple is \code{None}, assume
 | |
| local time.  Minor deficiency: this first interprets the first 8
 | |
| elements as a local time and then compensates for the timezone
 | |
| difference; this may yield a slight error around daylight savings time
 | |
| switch dates.  Not enough to worry about for common use.
 | |
| \end{funcdesc}
 | |
| 
 | |
| 
 | |
| \begin{seealso}
 | |
|   \seemodule{mailbox}{Classes to read various mailbox formats produced 
 | |
|                       by end-user mail programs.}
 | |
|   \seemodule{mimetools}{Subclass of rfc.Message that handles MIME encoded
 | |
| 		        messages.} 
 | |
| \end{seealso}
 | |
| 
 | |
| 
 | |
| \subsection{Message Objects \label{message-objects}}
 | |
| 
 | |
| A \class{Message} instance has the following methods:
 | |
| 
 | |
| \begin{methoddesc}{rewindbody}{}
 | |
| Seek to the start of the message body.  This only works if the file
 | |
| object is seekable.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{isheader}{line}
 | |
| Returns a line's canonicalized fieldname (the dictionary key that will
 | |
| be used to index it) if the line is a legal \rfc{822} header; otherwise
 | |
| returns None (implying that parsing should stop here and the line be
 | |
| pushed back on the input stream).  It is sometimes useful to override
 | |
| this method in a subclass.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{islast}{line}
 | |
| Return true if the given line is a delimiter on which Message should
 | |
| stop.  The delimiter line is consumed, and the file object's read
 | |
| location positioned immediately after it.  By default this method just
 | |
| checks that the line is blank, but you can override it in a subclass.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{iscomment}{line}
 | |
| Return true if the given line should be ignored entirely, just skipped.
 | |
| By default this is a stub that always returns false, but you can
 | |
| override it in a subclass.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getallmatchingheaders}{name}
 | |
| Return a list of lines consisting of all headers matching
 | |
| \var{name}, if any.  Each physical line, whether it is a continuation
 | |
| line or not, is a separate list item.  Return the empty list if no
 | |
| header matches \var{name}.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getfirstmatchingheader}{name}
 | |
| Return a list of lines comprising the first header matching
 | |
| \var{name}, and its continuation line(s), if any.  Return
 | |
| \code{None} if there is no header matching \var{name}.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getrawheader}{name}
 | |
| Return a single string consisting of the text after the colon in the
 | |
| first header matching \var{name}.  This includes leading whitespace,
 | |
| the trailing linefeed, and internal linefeeds and whitespace if there
 | |
| any continuation line(s) were present.  Return \code{None} if there is
 | |
| no header matching \var{name}.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getheader}{name\optional{, default}}
 | |
| Like \code{getrawheader(\var{name})}, but strip leading and trailing
 | |
| whitespace.  Internal whitespace is not stripped.  The optional
 | |
| \var{default} argument can be used to specify a different default to
 | |
| be returned when there is no header matching \var{name}.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{get}{name\optional{, default}}
 | |
| An alias for \method{getheader()}, to make the interface more compatible 
 | |
| with regular dictionaries.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getaddr}{name}
 | |
| Return a pair \code{(\var{full name}, \var{email address})} parsed
 | |
| from the string returned by \code{getheader(\var{name})}.  If no
 | |
| header matching \var{name} exists, return \code{(None, None)};
 | |
| otherwise both the full name and the address are (possibly empty)
 | |
| strings.
 | |
| 
 | |
| Example: If \var{m}'s first \code{From} header contains the string
 | |
| \code{'jack@cwi.nl (Jack Jansen)'}, then
 | |
| \code{m.getaddr('From')} will yield the pair
 | |
| \code{('Jack Jansen', 'jack@cwi.nl')}.
 | |
| If the header contained
 | |
| \code{'Jack Jansen <jack@cwi.nl>'} instead, it would yield the
 | |
| exact same result.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getaddrlist}{name}
 | |
| This is similar to \code{getaddr(\var{list})}, but parses a header
 | |
| containing a list of email addresses (e.g.\ a \code{To} header) and
 | |
| returns a list of \code{(\var{full name}, \var{email address})} pairs
 | |
| (even if there was only one address in the header).  If there is no
 | |
| header matching \var{name}, return an empty list.
 | |
| 
 | |
| If multiple headers exist that match the named header (e.g. if there
 | |
| are several \code{Cc} headers), all are parsed for addresses.  Any
 | |
| continuation lines the named headers contain are also parsed.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getdate}{name}
 | |
| Retrieve a header using \method{getheader()} and parse it into a 9-tuple
 | |
| compatible with \function{time.mktime()}; note that fields 6, 7, and 8 
 | |
| are not usable.  If there is no header matching
 | |
| \var{name}, or it is unparsable, return \code{None}.
 | |
| 
 | |
| Date parsing appears to be a black art, and not all mailers adhere to
 | |
| the standard.  While it has been tested and found correct on a large
 | |
| collection of email from many sources, it is still possible that this
 | |
| function may occasionally yield an incorrect result.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{getdate_tz}{name}
 | |
| Retrieve a header using \method{getheader()} and parse it into a
 | |
| 10-tuple; the first 9 elements will make a tuple compatible with
 | |
| \function{time.mktime()}, and the 10th is a number giving the offset
 | |
| of the date's timezone from UTC.  Note that fields 6, 7, and 8 
 | |
| are not usable.  Similarly to \method{getdate()}, if
 | |
| there is no header matching \var{name}, or it is unparsable, return
 | |
| \code{None}. 
 | |
| \end{methoddesc}
 | |
| 
 | |
| \class{Message} instances also support a read-only mapping interface.
 | |
| In particular: \code{\var{m}[name]} is like
 | |
| \code{\var{m}.getheader(name)} but raises \exception{KeyError} if
 | |
| there is no matching header; and \code{len(\var{m})},
 | |
| \code{\var{m}.has_key(name)}, \code{\var{m}.keys()},
 | |
| \code{\var{m}.values()} and \code{\var{m}.items()} act as expected
 | |
| (and consistently).
 | |
| 
 | |
| Finally, \class{Message} instances have two public instance variables:
 | |
| 
 | |
| \begin{memberdesc}{headers}
 | |
| A list containing the entire set of header lines, in the order in
 | |
| which they were read (except that setitem calls may disturb this
 | |
| order). Each line contains a trailing newline.  The
 | |
| blank line terminating the headers is not contained in the list.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{fp}
 | |
| The file or file-like object passed at instantiation time.  This can
 | |
| be used to read the message content.
 | |
| \end{memberdesc}
 | |
| 
 | |
| 
 | |
| \subsection{AddressList Objects \label{addresslist-objects}}
 | |
| 
 | |
| An \class{AddressList} instance has the following methods:
 | |
| 
 | |
| \begin{methoddesc}{__len__}{name}
 | |
| Return the number of addresses in the address list.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{__str__}{name}
 | |
| Return a canonicalized string representation of the address list.
 | |
| Addresses are rendered in "name" <host@domain> form, comma-separated.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{__add__}{name}
 | |
| Return an \class{AddressList} instance that contains all addresses in
 | |
| both \class{AddressList} operands, with duplicates removed (set union).
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{__sub__}{name}
 | |
| Return an \class{AddressList} instance that contains every address in the
 | |
| left-hand \class{AddressList} operand that is not present in the right-hand
 | |
| address operand (set difference).
 | |
| \end{methoddesc}
 | |
| 
 | |
| 
 | |
| Finally, \class{AddressList} instances have one public instance variable:
 | |
| 
 | |
| \begin{memberdesc}{addresslist}
 | |
| A list of tuple string pairs, one per address.  In each member, the
 | |
| first is the canonicalized name part of the address, the second is the
 | |
| route-address (@-separated host-domain pair).
 | |
| \end{memberdesc}
 | 
