| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | # Copyright (C) 2002-2007 Python Software Foundation | 
					
						
							|  |  |  |  | # Author: Ben Gertzfield, Barry Warsaw | 
					
						
							|  |  |  |  | # Contact: email-sig@python.org | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | """Header encoding and decoding functionality.""" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | __all__ = [ | 
					
						
							|  |  |  |  |     'Header', | 
					
						
							|  |  |  |  |     'decode_header', | 
					
						
							|  |  |  |  |     'make_header', | 
					
						
							|  |  |  |  |     ] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import re | 
					
						
							|  |  |  |  | import binascii | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import email.quoprimime | 
					
						
							|  |  |  |  | import email.base64mime | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | from email.errors import HeaderParseError | 
					
						
							| 
									
										
										
										
											2011-01-07 23:25:30 +00:00
										 |  |  |  | from email import charset as _charset | 
					
						
							|  |  |  |  | Charset = _charset.Charset | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | NL = '\n' | 
					
						
							|  |  |  |  | SPACE = ' ' | 
					
						
							|  |  |  |  | BSPACE = b' ' | 
					
						
							|  |  |  |  | SPACE8 = ' ' * 8 | 
					
						
							|  |  |  |  | EMPTYSTRING = '' | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  | MAXLINELEN = 78 | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  | FWS = ' \t' | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | USASCII = Charset('us-ascii') | 
					
						
							|  |  |  |  | UTF8 = Charset('utf-8') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # Match encoded-word strings in the form =?charset?q?Hello_World?= | 
					
						
							|  |  |  |  | ecre = re.compile(r'''
 | 
					
						
							|  |  |  |  |   =\?                   # literal =? | 
					
						
							|  |  |  |  |   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset | 
					
						
							|  |  |  |  |   \?                    # literal ? | 
					
						
							|  |  |  |  |   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive | 
					
						
							|  |  |  |  |   \?                    # literal ? | 
					
						
							|  |  |  |  |   (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string | 
					
						
							|  |  |  |  |   \?=                   # literal ?= | 
					
						
							|  |  |  |  |   ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # Field name regexp, including trailing colon, but not separating whitespace, | 
					
						
							|  |  |  |  | # according to RFC 2822.  Character range is from tilde to exclamation mark. | 
					
						
							|  |  |  |  | # For use with .match() | 
					
						
							|  |  |  |  | fcre = re.compile(r'[\041-\176]+:$') | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-03-16 11:05:33 +02:00
										 |  |  |  | # Find a header embedded in a putative header value.  Used to check for | 
					
						
							| 
									
										
										
										
											2011-01-09 02:35:24 +00:00
										 |  |  |  | # header injection attack. | 
					
						
							|  |  |  |  | _embeded_header = re.compile(r'\n[^ \t]+:') | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |  | 
					
						
							|  |  |  |  | # Helpers | 
					
						
							|  |  |  |  | _max_append = email.quoprimime._max_append | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |  | 
					
						
							|  |  |  |  | def decode_header(header): | 
					
						
							|  |  |  |  |     """Decode a message header value without converting charset.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     Returns a list of (string, charset) pairs containing each of the decoded | 
					
						
							|  |  |  |  |     parts of the header.  Charset is None for non-encoded parts of the header, | 
					
						
							|  |  |  |  |     otherwise a lower-case string containing the name of the character set | 
					
						
							|  |  |  |  |     specified in the encoded string. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-03-25 15:10:55 -04:00
										 |  |  |  |     header may be a string that may or may not contain RFC2047 encoded words, | 
					
						
							|  |  |  |  |     or it may be a Header object. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-07-12 16:43:19 +00:00
										 |  |  |  |     An email.errors.HeaderParseError may be raised when certain decoding error | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |     occurs (e.g. a base64 decoding exception). | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2011-06-18 12:30:55 -04:00
										 |  |  |  |     # If it is a Header object, we can just return the encoded chunks. | 
					
						
							| 
									
										
										
										
											2011-03-25 15:10:55 -04:00
										 |  |  |  |     if hasattr(header, '_chunks'): | 
					
						
							| 
									
										
										
										
											2011-06-18 12:30:55 -04:00
										 |  |  |  |         return [(_charset._encode(string, str(charset)), str(charset)) | 
					
						
							|  |  |  |  |                     for string, charset in header._chunks] | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |     # If no encoding, just return the header with no charset. | 
					
						
							|  |  |  |  |     if not ecre.search(header): | 
					
						
							|  |  |  |  |         return [(header, None)] | 
					
						
							|  |  |  |  |     # First step is to parse all the encoded parts into triplets of the form | 
					
						
							|  |  |  |  |     # (encoded_string, encoding, charset).  For unencoded strings, the last | 
					
						
							|  |  |  |  |     # two parts will be None. | 
					
						
							|  |  |  |  |     words = [] | 
					
						
							|  |  |  |  |     for line in header.splitlines(): | 
					
						
							|  |  |  |  |         parts = ecre.split(line) | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |         first = True | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         while parts: | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |             unencoded = parts.pop(0) | 
					
						
							|  |  |  |  |             if first: | 
					
						
							|  |  |  |  |                 unencoded = unencoded.lstrip() | 
					
						
							|  |  |  |  |                 first = False | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             if unencoded: | 
					
						
							|  |  |  |  |                 words.append((unencoded, None, None)) | 
					
						
							|  |  |  |  |             if parts: | 
					
						
							|  |  |  |  |                 charset = parts.pop(0).lower() | 
					
						
							|  |  |  |  |                 encoding = parts.pop(0).lower() | 
					
						
							|  |  |  |  |                 encoded = parts.pop(0) | 
					
						
							|  |  |  |  |                 words.append((encoded, encoding, charset)) | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |     # Now loop over words and remove words that consist of whitespace | 
					
						
							|  |  |  |  |     # between two encoded strings. | 
					
						
							|  |  |  |  |     droplist = [] | 
					
						
							|  |  |  |  |     for n, w in enumerate(words): | 
					
						
							|  |  |  |  |         if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): | 
					
						
							|  |  |  |  |             droplist.append(n-1) | 
					
						
							|  |  |  |  |     for d in reversed(droplist): | 
					
						
							|  |  |  |  |         del words[d] | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |     # The next step is to decode each encoded word by applying the reverse | 
					
						
							|  |  |  |  |     # base64 or quopri transformation.  decoded_words is now a list of the | 
					
						
							|  |  |  |  |     # form (decoded_word, charset). | 
					
						
							|  |  |  |  |     decoded_words = [] | 
					
						
							|  |  |  |  |     for encoded_string, encoding, charset in words: | 
					
						
							|  |  |  |  |         if encoding is None: | 
					
						
							|  |  |  |  |             # This is an unencoded word. | 
					
						
							|  |  |  |  |             decoded_words.append((encoded_string, charset)) | 
					
						
							|  |  |  |  |         elif encoding == 'q': | 
					
						
							|  |  |  |  |             word = email.quoprimime.header_decode(encoded_string) | 
					
						
							|  |  |  |  |             decoded_words.append((word, charset)) | 
					
						
							|  |  |  |  |         elif encoding == 'b': | 
					
						
							| 
									
										
										
										
											2010-08-03 22:14:10 +00:00
										 |  |  |  |             paderr = len(encoded_string) % 4   # Postel's law: add missing padding | 
					
						
							|  |  |  |  |             if paderr: | 
					
						
							|  |  |  |  |                 encoded_string += '==='[:4 - paderr] | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             try: | 
					
						
							|  |  |  |  |                 word = email.base64mime.decode(encoded_string) | 
					
						
							|  |  |  |  |             except binascii.Error: | 
					
						
							|  |  |  |  |                 raise HeaderParseError('Base64 decoding error') | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 decoded_words.append((word, charset)) | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             raise AssertionError('Unexpected encoding: ' + encoding) | 
					
						
							|  |  |  |  |     # Now convert all words to bytes and collapse consecutive runs of | 
					
						
							|  |  |  |  |     # similarly encoded words. | 
					
						
							|  |  |  |  |     collapsed = [] | 
					
						
							|  |  |  |  |     last_word = last_charset = None | 
					
						
							|  |  |  |  |     for word, charset in decoded_words: | 
					
						
							|  |  |  |  |         if isinstance(word, str): | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |             word = bytes(word, 'raw-unicode-escape') | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         if last_word is None: | 
					
						
							|  |  |  |  |             last_word = word | 
					
						
							|  |  |  |  |             last_charset = charset | 
					
						
							|  |  |  |  |         elif charset != last_charset: | 
					
						
							|  |  |  |  |             collapsed.append((last_word, last_charset)) | 
					
						
							|  |  |  |  |             last_word = word | 
					
						
							|  |  |  |  |             last_charset = charset | 
					
						
							|  |  |  |  |         elif last_charset is None: | 
					
						
							|  |  |  |  |             last_word += BSPACE + word | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             last_word += word | 
					
						
							|  |  |  |  |     collapsed.append((last_word, last_charset)) | 
					
						
							|  |  |  |  |     return collapsed | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |  | 
					
						
							|  |  |  |  | def make_header(decoded_seq, maxlinelen=None, header_name=None, | 
					
						
							|  |  |  |  |                 continuation_ws=' '): | 
					
						
							|  |  |  |  |     """Create a Header from a sequence of pairs as returned by decode_header()
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     decode_header() takes a header value string and returns a sequence of | 
					
						
							|  |  |  |  |     pairs of the format (decoded_string, charset) where charset is the string | 
					
						
							|  |  |  |  |     name of the character set. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     This function takes one of those sequence of pairs and returns a Header | 
					
						
							|  |  |  |  |     instance.  Optional maxlinelen, header_name, and continuation_ws are as in | 
					
						
							|  |  |  |  |     the Header constructor. | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     h = Header(maxlinelen=maxlinelen, header_name=header_name, | 
					
						
							|  |  |  |  |                continuation_ws=continuation_ws) | 
					
						
							|  |  |  |  |     for s, charset in decoded_seq: | 
					
						
							|  |  |  |  |         # None means us-ascii but we can simply pass it on to h.append() | 
					
						
							|  |  |  |  |         if charset is not None and not isinstance(charset, Charset): | 
					
						
							|  |  |  |  |             charset = Charset(charset) | 
					
						
							|  |  |  |  |         h.append(s, charset) | 
					
						
							|  |  |  |  |     return h | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |  | 
					
						
							|  |  |  |  | class Header: | 
					
						
							|  |  |  |  |     def __init__(self, s=None, charset=None, | 
					
						
							|  |  |  |  |                  maxlinelen=None, header_name=None, | 
					
						
							|  |  |  |  |                  continuation_ws=' ', errors='strict'): | 
					
						
							|  |  |  |  |         """Create a MIME-compliant header that can contain many character sets.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         Optional s is the initial header value.  If None, the initial header | 
					
						
							|  |  |  |  |         value is not set.  You can later append to the header with .append() | 
					
						
							|  |  |  |  |         method calls.  s may be a byte string or a Unicode string, but see the | 
					
						
							|  |  |  |  |         .append() documentation for semantics. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         Optional charset serves two purposes: it has the same meaning as the | 
					
						
							|  |  |  |  |         charset argument to the .append() method.  It also sets the default | 
					
						
							|  |  |  |  |         character set for all subsequent .append() calls that omit the charset | 
					
						
							|  |  |  |  |         argument.  If charset is not provided in the constructor, the us-ascii | 
					
						
							|  |  |  |  |         charset is used both as s's initial charset and as the default for | 
					
						
							|  |  |  |  |         subsequent .append() calls. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-12-29 16:57:24 +00:00
										 |  |  |  |         The maximum line length can be specified explicitly via maxlinelen. For | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         splitting the first line to a shorter value (to account for the field | 
					
						
							|  |  |  |  |         header which isn't included in s, e.g. `Subject') pass in the name of | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         the field in header_name.  The default maxlinelen is 78 as recommended | 
					
						
							|  |  |  |  |         by RFC 2822. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         continuation_ws must be RFC 2822 compliant folding whitespace (usually | 
					
						
							|  |  |  |  |         either a space or a hard tab) which will be prepended to continuation | 
					
						
							|  |  |  |  |         lines. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         errors is passed through to the .append() call. | 
					
						
							|  |  |  |  |         """
 | 
					
						
							|  |  |  |  |         if charset is None: | 
					
						
							|  |  |  |  |             charset = USASCII | 
					
						
							|  |  |  |  |         elif not isinstance(charset, Charset): | 
					
						
							|  |  |  |  |             charset = Charset(charset) | 
					
						
							|  |  |  |  |         self._charset = charset | 
					
						
							|  |  |  |  |         self._continuation_ws = continuation_ws | 
					
						
							|  |  |  |  |         self._chunks = [] | 
					
						
							|  |  |  |  |         if s is not None: | 
					
						
							|  |  |  |  |             self.append(s, charset, errors) | 
					
						
							|  |  |  |  |         if maxlinelen is None: | 
					
						
							|  |  |  |  |             maxlinelen = MAXLINELEN | 
					
						
							|  |  |  |  |         self._maxlinelen = maxlinelen | 
					
						
							|  |  |  |  |         if header_name is None: | 
					
						
							|  |  |  |  |             self._headerlen = 0 | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             # Take the separating colon and space into account. | 
					
						
							|  |  |  |  |             self._headerlen = len(header_name) + 2 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def __str__(self): | 
					
						
							|  |  |  |  |         """Return the string value of the header.""" | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         self._normalize() | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         uchunks = [] | 
					
						
							|  |  |  |  |         lastcs = None | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |         lastspace = None | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         for string, charset in self._chunks: | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             # We must preserve spaces between encoded and non-encoded word | 
					
						
							|  |  |  |  |             # boundaries, which means for us we need to add a space when we go | 
					
						
							|  |  |  |  |             # from a charset to None/us-ascii, or from None/us-ascii to a | 
					
						
							|  |  |  |  |             # charset.  Only do this for the second and subsequent chunks. | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |             # Don't add a space if the None/us-ascii string already has | 
					
						
							|  |  |  |  |             # a space (trailing or leading depending on transition) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             nextcs = charset | 
					
						
							| 
									
										
										
										
											2011-01-07 23:25:30 +00:00
										 |  |  |  |             if nextcs == _charset.UNKNOWN8BIT: | 
					
						
							|  |  |  |  |                 original_bytes = string.encode('ascii', 'surrogateescape') | 
					
						
							|  |  |  |  |                 string = original_bytes.decode('ascii', 'replace') | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             if uchunks: | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |                 hasspace = string and self._nonctext(string[0]) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |                 if lastcs not in (None, 'us-ascii'): | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |                     if nextcs in (None, 'us-ascii') and not hasspace: | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |                         uchunks.append(SPACE) | 
					
						
							|  |  |  |  |                         nextcs = None | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |                 elif nextcs not in (None, 'us-ascii') and not lastspace: | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |                     uchunks.append(SPACE) | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |             lastspace = string and self._nonctext(string[-1]) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             lastcs = nextcs | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |             uchunks.append(string) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         return EMPTYSTRING.join(uchunks) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # Rich comparison operators for equality only.  BAW: does it make sense to | 
					
						
							|  |  |  |  |     # have or explicitly disable <, <=, >, >= operators? | 
					
						
							|  |  |  |  |     def __eq__(self, other): | 
					
						
							|  |  |  |  |         # other may be a Header or a string.  Both are fine so coerce | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         # ourselves to a unicode (of the unencoded header value), swap the | 
					
						
							|  |  |  |  |         # args and do another comparison. | 
					
						
							|  |  |  |  |         return other == str(self) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def __ne__(self, other): | 
					
						
							|  |  |  |  |         return not self == other | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def append(self, s, charset=None, errors='strict'): | 
					
						
							|  |  |  |  |         """Append a string to the MIME header.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         Optional charset, if given, should be a Charset instance or the name | 
					
						
							|  |  |  |  |         of a character set (which will be converted to a Charset instance).  A | 
					
						
							|  |  |  |  |         value of None (the default) means that the charset given in the | 
					
						
							|  |  |  |  |         constructor is used. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         s may be a byte string or a Unicode string.  If it is a byte string | 
					
						
							| 
									
										
										
										
											2010-12-29 16:57:24 +00:00
										 |  |  |  |         (i.e. isinstance(s, str) is false), then charset is the encoding of | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         that byte string, and a UnicodeError will be raised if the string | 
					
						
							|  |  |  |  |         cannot be decoded with that charset.  If s is a Unicode string, then | 
					
						
							|  |  |  |  |         charset is a hint specifying the character set of the characters in | 
					
						
							| 
									
										
										
										
											2011-01-05 01:39:32 +00:00
										 |  |  |  |         the string.  In either case, when producing an RFC 2822 compliant | 
					
						
							|  |  |  |  |         header using RFC 2047 rules, the string will be encoded using the | 
					
						
							|  |  |  |  |         output codec of the charset.  If the string cannot be encoded to the | 
					
						
							|  |  |  |  |         output codec, a UnicodeError will be raised. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-01-05 01:39:32 +00:00
										 |  |  |  |         Optional `errors' is passed as the errors argument to the decode | 
					
						
							|  |  |  |  |         call if s is a byte string. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         """
 | 
					
						
							|  |  |  |  |         if charset is None: | 
					
						
							|  |  |  |  |             charset = self._charset | 
					
						
							|  |  |  |  |         elif not isinstance(charset, Charset): | 
					
						
							|  |  |  |  |             charset = Charset(charset) | 
					
						
							| 
									
										
										
										
											2011-01-05 01:39:32 +00:00
										 |  |  |  |         if not isinstance(s, str): | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             input_charset = charset.input_codec or 'us-ascii' | 
					
						
							| 
									
										
										
										
											2011-06-18 12:57:28 -04:00
										 |  |  |  |             if input_charset == _charset.UNKNOWN8BIT: | 
					
						
							|  |  |  |  |                 s = s.decode('us-ascii', 'surrogateescape') | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 s = s.decode(input_charset, errors) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         # Ensure that the bytes we're storing can be decoded to the output | 
					
						
							| 
									
										
										
										
											2012-12-18 21:14:22 +02:00
										 |  |  |  |         # character set, otherwise an early error is raised. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         output_charset = charset.output_codec or 'us-ascii' | 
					
						
							| 
									
										
										
										
											2011-01-07 23:25:30 +00:00
										 |  |  |  |         if output_charset != _charset.UNKNOWN8BIT: | 
					
						
							| 
									
										
										
										
											2012-03-14 02:59:51 -04:00
										 |  |  |  |             try: | 
					
						
							|  |  |  |  |                 s.encode(output_charset, errors) | 
					
						
							|  |  |  |  |             except UnicodeEncodeError: | 
					
						
							|  |  |  |  |                 if output_charset!='us-ascii': | 
					
						
							|  |  |  |  |                     raise | 
					
						
							|  |  |  |  |                 charset = UTF8 | 
					
						
							| 
									
										
										
										
											2011-01-05 01:39:32 +00:00
										 |  |  |  |         self._chunks.append((s, charset)) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |     def _nonctext(self, s): | 
					
						
							|  |  |  |  |         """True if string s is not a ctext character of RFC822.
 | 
					
						
							|  |  |  |  |         """
 | 
					
						
							|  |  |  |  |         return s.isspace() or s in ('(', ')', '\\') | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-23 22:19:56 +00:00
										 |  |  |  |     def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): | 
					
						
							| 
									
										
										
										
											2011-03-14 18:35:56 -04:00
										 |  |  |  |         r"""Encode a message header into an RFC-compliant format.
 | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         There are many issues involved in converting a given string for use in | 
					
						
							|  |  |  |  |         an email header.  Only certain character sets are readable in most | 
					
						
							|  |  |  |  |         email clients, and as header strings can only contain a subset of | 
					
						
							|  |  |  |  |         7-bit ASCII, care must be taken to properly convert and encode (with | 
					
						
							|  |  |  |  |         Base64 or quoted-printable) header strings.  In addition, there is a | 
					
						
							|  |  |  |  |         75-character length limit on any given encoded header field, so | 
					
						
							|  |  |  |  |         line-wrapping must be performed, even with double-byte character sets. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-13 16:43:21 +03:00
										 |  |  |  |         Optional maxlinelen specifies the maximum length of each generated | 
					
						
							| 
									
										
										
										
											2011-04-12 15:00:44 -04:00
										 |  |  |  |         line, exclusive of the linesep string.  Individual lines may be longer | 
					
						
							|  |  |  |  |         than maxlinelen if a folding point cannot be found.  The first line | 
					
						
							|  |  |  |  |         will be shorter by the length of the header name plus ": " if a header | 
					
						
							|  |  |  |  |         name was specified at Header construction time.  The default value for | 
					
						
							|  |  |  |  |         maxlinelen is determined at header construction time. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         Optional splitchars is a string containing characters which should be | 
					
						
							|  |  |  |  |         given extra weight by the splitting algorithm during normal header | 
					
						
							|  |  |  |  |         wrapping.  This is in very rough support of RFC 2822's `higher level | 
					
						
							|  |  |  |  |         syntactic breaks':  split points preceded by a splitchar are preferred | 
					
						
							|  |  |  |  |         during line splitting, with the characters preferred in the order in | 
					
						
							|  |  |  |  |         which they appear in the string.  Space and tab may be included in the | 
					
						
							|  |  |  |  |         string to indicate whether preference should be given to one over the | 
					
						
							|  |  |  |  |         other as a split point when other split chars do not appear in the line | 
					
						
							|  |  |  |  |         being split.  Splitchars does not affect RFC 2047 encoded lines. | 
					
						
							| 
									
										
										
										
											2010-10-23 22:19:56 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         Optional linesep is a string to be used to separate the lines of | 
					
						
							|  |  |  |  |         the value.  The default value is the most useful for typical | 
					
						
							|  |  |  |  |         Python applications, but it can be set to \r\n to produce RFC-compliant | 
					
						
							|  |  |  |  |         line separators when needed. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         """
 | 
					
						
							|  |  |  |  |         self._normalize() | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         if maxlinelen is None: | 
					
						
							|  |  |  |  |             maxlinelen = self._maxlinelen | 
					
						
							|  |  |  |  |         # A maxlinelen of 0 means don't wrap.  For all practical purposes, | 
					
						
							|  |  |  |  |         # choosing a huge number here accomplishes that and makes the | 
					
						
							|  |  |  |  |         # _ValueFormatter algorithm much simpler. | 
					
						
							|  |  |  |  |         if maxlinelen == 0: | 
					
						
							|  |  |  |  |             maxlinelen = 1000000 | 
					
						
							|  |  |  |  |         formatter = _ValueFormatter(self._headerlen, maxlinelen, | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |                                     self._continuation_ws, splitchars) | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |         lastcs = None | 
					
						
							|  |  |  |  |         hasspace = lastspace = None | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         for string, charset in self._chunks: | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |             if hasspace is not None: | 
					
						
							|  |  |  |  |                 hasspace = string and self._nonctext(string[0]) | 
					
						
							|  |  |  |  |                 if lastcs not in (None, 'us-ascii'): | 
					
						
							|  |  |  |  |                     if not hasspace or charset not in (None, 'us-ascii'): | 
					
						
							|  |  |  |  |                         formatter.add_transition() | 
					
						
							|  |  |  |  |                 elif charset not in (None, 'us-ascii') and not lastspace: | 
					
						
							|  |  |  |  |                     formatter.add_transition() | 
					
						
							|  |  |  |  |             lastspace = string and self._nonctext(string[-1]) | 
					
						
							|  |  |  |  |             lastcs = charset | 
					
						
							|  |  |  |  |             hasspace = False | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             lines = string.splitlines() | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |             if lines: | 
					
						
							|  |  |  |  |                 formatter.feed('', lines[0], charset) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 formatter.feed('', '', charset) | 
					
						
							| 
									
										
										
										
											2011-01-07 21:57:25 +00:00
										 |  |  |  |             for line in lines[1:]: | 
					
						
							|  |  |  |  |                 formatter.newline() | 
					
						
							|  |  |  |  |                 if charset.header_encoding is not None: | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |                     formatter.feed(self._continuation_ws, ' ' + line.lstrip(), | 
					
						
							|  |  |  |  |                                    charset) | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     sline = line.lstrip() | 
					
						
							|  |  |  |  |                     fws = line[:len(line)-len(sline)] | 
					
						
							|  |  |  |  |                     formatter.feed(fws, sline, charset) | 
					
						
							| 
									
										
										
										
											2011-01-07 21:57:25 +00:00
										 |  |  |  |             if len(lines) > 1: | 
					
						
							|  |  |  |  |                 formatter.newline() | 
					
						
							| 
									
										
										
										
											2012-06-02 17:56:49 -04:00
										 |  |  |  |         if self._chunks: | 
					
						
							| 
									
										
										
										
											2007-08-31 02:35:00 +00:00
										 |  |  |  |             formatter.add_transition() | 
					
						
							| 
									
										
										
										
											2011-01-09 02:35:24 +00:00
										 |  |  |  |         value = formatter._str(linesep) | 
					
						
							|  |  |  |  |         if _embeded_header.search(value): | 
					
						
							|  |  |  |  |             raise HeaderParseError("header value appears to contain " | 
					
						
							|  |  |  |  |                 "an embedded header: {!r}".format(value)) | 
					
						
							|  |  |  |  |         return value | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def _normalize(self): | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         # Step 1: Normalize the chunks so that all runs of identical charsets | 
					
						
							|  |  |  |  |         # get collapsed into a single unicode string. | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         chunks = [] | 
					
						
							|  |  |  |  |         last_charset = None | 
					
						
							|  |  |  |  |         last_chunk = [] | 
					
						
							|  |  |  |  |         for string, charset in self._chunks: | 
					
						
							|  |  |  |  |             if charset == last_charset: | 
					
						
							|  |  |  |  |                 last_chunk.append(string) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 if last_charset is not None: | 
					
						
							|  |  |  |  |                     chunks.append((SPACE.join(last_chunk), last_charset)) | 
					
						
							|  |  |  |  |                 last_chunk = [string] | 
					
						
							|  |  |  |  |                 last_charset = charset | 
					
						
							|  |  |  |  |         if last_chunk: | 
					
						
							|  |  |  |  |             chunks.append((SPACE.join(last_chunk), last_charset)) | 
					
						
							|  |  |  |  |         self._chunks = chunks | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |  | 
					
						
							|  |  |  |  | class _ValueFormatter: | 
					
						
							|  |  |  |  |     def __init__(self, headerlen, maxlen, continuation_ws, splitchars): | 
					
						
							|  |  |  |  |         self._maxlen = maxlen | 
					
						
							|  |  |  |  |         self._continuation_ws = continuation_ws | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         self._continuation_ws_len = len(continuation_ws) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         self._splitchars = splitchars | 
					
						
							|  |  |  |  |         self._lines = [] | 
					
						
							|  |  |  |  |         self._current_line = _Accumulator(headerlen) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-23 22:19:56 +00:00
										 |  |  |  |     def _str(self, linesep): | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         self.newline() | 
					
						
							| 
									
										
										
										
											2010-10-23 22:19:56 +00:00
										 |  |  |  |         return linesep.join(self._lines) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def __str__(self): | 
					
						
							|  |  |  |  |         return self._str(NL) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def newline(self): | 
					
						
							| 
									
										
										
										
											2007-08-31 02:35:00 +00:00
										 |  |  |  |         end_of_line = self._current_line.pop() | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         if end_of_line != (' ', ''): | 
					
						
							|  |  |  |  |             self._current_line.push(*end_of_line) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         if len(self._current_line) > 0: | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |             if self._current_line.is_onlyws(): | 
					
						
							|  |  |  |  |                 self._lines[-1] += str(self._current_line) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 self._lines.append(str(self._current_line)) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         self._current_line.reset() | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-31 02:35:00 +00:00
										 |  |  |  |     def add_transition(self): | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         self._current_line.push(' ', '') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def feed(self, fws, string, charset): | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         # If the charset has no header encoding (i.e. it is an ASCII encoding) | 
					
						
							|  |  |  |  |         # then we must split the header at the "highest level syntactic break" | 
					
						
							|  |  |  |  |         # possible. Note that we don't have a lot of smarts about field | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         # syntax; we just try to break on semi-colons, then commas, then | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         # whitespace.  Eventually, this should be pluggable. | 
					
						
							|  |  |  |  |         if charset.header_encoding is None: | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |             self._ascii_split(fws, string, self._splitchars) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             return | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         # Otherwise, we're doing either a Base64 or a quoted-printable | 
					
						
							|  |  |  |  |         # encoding which means we don't need to split the line on syntactic | 
					
						
							|  |  |  |  |         # breaks.  We can basically just find enough characters to fit on the | 
					
						
							|  |  |  |  |         # current line, minus the RFC 2047 chrome.  What makes this trickier | 
					
						
							|  |  |  |  |         # though is that we have to split at octet boundaries, not character | 
					
						
							|  |  |  |  |         # boundaries but it's only safe to split at character boundaries so at | 
					
						
							|  |  |  |  |         # best we can only get close. | 
					
						
							|  |  |  |  |         encoded_lines = charset.header_encode_lines(string, self._maxlengths()) | 
					
						
							|  |  |  |  |         # The first element extends the current line, but if it's None then | 
					
						
							|  |  |  |  |         # nothing more fit on the current line so start a new line. | 
					
						
							|  |  |  |  |         try: | 
					
						
							|  |  |  |  |             first_line = encoded_lines.pop(0) | 
					
						
							|  |  |  |  |         except IndexError: | 
					
						
							|  |  |  |  |             # There are no encoded lines, so we're done. | 
					
						
							|  |  |  |  |             return | 
					
						
							|  |  |  |  |         if first_line is not None: | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |             self._append_chunk(fws, first_line) | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         try: | 
					
						
							|  |  |  |  |             last_line = encoded_lines.pop() | 
					
						
							|  |  |  |  |         except IndexError: | 
					
						
							|  |  |  |  |             # There was only one line. | 
					
						
							|  |  |  |  |             return | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         self.newline() | 
					
						
							|  |  |  |  |         self._current_line.push(self._continuation_ws, last_line) | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |         # Everything else are full lines in themselves. | 
					
						
							|  |  |  |  |         for line in encoded_lines: | 
					
						
							|  |  |  |  |             self._lines.append(self._continuation_ws + line) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def _maxlengths(self): | 
					
						
							|  |  |  |  |         # The first line's length. | 
					
						
							|  |  |  |  |         yield self._maxlen - len(self._current_line) | 
					
						
							|  |  |  |  |         while True: | 
					
						
							|  |  |  |  |             yield self._maxlen - self._continuation_ws_len | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |     def _ascii_split(self, fws, string, splitchars): | 
					
						
							|  |  |  |  |         # The RFC 2822 header folding algorithm is simple in principle but | 
					
						
							|  |  |  |  |         # complex in practice.  Lines may be folded any place where "folding | 
					
						
							|  |  |  |  |         # white space" appears by inserting a linesep character in front of the | 
					
						
							|  |  |  |  |         # FWS.  The complication is that not all spaces or tabs qualify as FWS, | 
					
						
							|  |  |  |  |         # and we are also supposed to prefer to break at "higher level | 
					
						
							|  |  |  |  |         # syntactic breaks".  We can't do either of these without intimate | 
					
						
							|  |  |  |  |         # knowledge of the structure of structured headers, which we don't have | 
					
						
							|  |  |  |  |         # here.  So the best we can do here is prefer to break at the specified | 
					
						
							|  |  |  |  |         # splitchars, and hope that we don't choose any spaces or tabs that | 
					
						
							|  |  |  |  |         # aren't legal FWS.  (This is at least better than the old algorithm, | 
					
						
							|  |  |  |  |         # where we would sometimes *introduce* FWS after a splitchar, or the | 
					
						
							|  |  |  |  |         # algorithm before that, where we would turn all white space runs into | 
					
						
							|  |  |  |  |         # single spaces or tabs.) | 
					
						
							|  |  |  |  |         parts = re.split("(["+FWS+"]+)", fws+string) | 
					
						
							|  |  |  |  |         if parts[0]: | 
					
						
							|  |  |  |  |             parts[:0] = [''] | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             parts.pop(0) | 
					
						
							|  |  |  |  |         for fws, part in zip(*[iter(parts)]*2): | 
					
						
							|  |  |  |  |             self._append_chunk(fws, part) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def _append_chunk(self, fws, string): | 
					
						
							|  |  |  |  |         self._current_line.push(fws, string) | 
					
						
							|  |  |  |  |         if len(self._current_line) > self._maxlen: | 
					
						
							|  |  |  |  |             # Find the best split point, working backward from the end. | 
					
						
							|  |  |  |  |             # There might be none, on a long first line. | 
					
						
							|  |  |  |  |             for ch in self._splitchars: | 
					
						
							|  |  |  |  |                 for i in range(self._current_line.part_count()-1, 0, -1): | 
					
						
							|  |  |  |  |                     if ch.isspace(): | 
					
						
							|  |  |  |  |                         fws = self._current_line[i][0] | 
					
						
							|  |  |  |  |                         if fws and fws[0]==ch: | 
					
						
							|  |  |  |  |                             break | 
					
						
							|  |  |  |  |                     prevpart = self._current_line[i-1][1] | 
					
						
							|  |  |  |  |                     if prevpart and prevpart[-1]==ch: | 
					
						
							|  |  |  |  |                         break | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |                 else: | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |                     continue | 
					
						
							|  |  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |                 fws, part = self._current_line.pop() | 
					
						
							|  |  |  |  |                 if self._current_line._initial_size > 0: | 
					
						
							|  |  |  |  |                     # There will be a header, so leave it on a line by itself. | 
					
						
							|  |  |  |  |                     self.newline() | 
					
						
							|  |  |  |  |                     if not fws: | 
					
						
							|  |  |  |  |                         # We don't use continuation_ws here because the whitespace | 
					
						
							|  |  |  |  |                         # after a header should always be a space. | 
					
						
							|  |  |  |  |                         fws = ' ' | 
					
						
							|  |  |  |  |                 self._current_line.push(fws, part) | 
					
						
							|  |  |  |  |                 return | 
					
						
							|  |  |  |  |             remainder = self._current_line.pop_from(i) | 
					
						
							|  |  |  |  |             self._lines.append(str(self._current_line)) | 
					
						
							|  |  |  |  |             self._current_line.reset(remainder) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  | class _Accumulator(list): | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |     def __init__(self, initial_size=0): | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         self._initial_size = initial_size | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |     def push(self, fws, string): | 
					
						
							|  |  |  |  |         self.append((fws, string)) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def pop_from(self, i=0): | 
					
						
							|  |  |  |  |         popped = self[i:] | 
					
						
							|  |  |  |  |         self[i:] = [] | 
					
						
							|  |  |  |  |         return popped | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def pop(self): | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         if self.part_count()==0: | 
					
						
							|  |  |  |  |             return ('', '') | 
					
						
							|  |  |  |  |         return super().pop() | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def __len__(self): | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         return sum((len(fws)+len(part) for fws, part in self), | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  |                    self._initial_size) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def __str__(self): | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) | 
					
						
							|  |  |  |  |                                 for fws, part in self)) | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |     def reset(self, startval=None): | 
					
						
							|  |  |  |  |         if startval is None: | 
					
						
							|  |  |  |  |             startval = [] | 
					
						
							|  |  |  |  |         self[:] = startval | 
					
						
							| 
									
										
										
										
											2007-08-30 01:15:14 +00:00
										 |  |  |  |         self._initial_size = 0 | 
					
						
							| 
									
										
										
										
											2007-08-30 03:46:43 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def is_onlyws(self): | 
					
						
							| 
									
										
										
										
											2011-04-18 10:04:34 -04:00
										 |  |  |  |         return self._initial_size==0 and (not self or str(self).isspace()) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def part_count(self): | 
					
						
							|  |  |  |  |         return super().__len__() |